emk · emk · Oct 29, 2016 · Oct 26, 2016 · Oct 26, 2016 · Oct 26, 2016
diff --git a/.gitmodules b/.gitmodules
@@ -1,3 +1,3 @@
 [submodule "uchardet-sys/uchardet"]
 	path = uchardet-sys/uchardet
-	url = https://github.com/BYVoid/uchardet
+	url = https://anongit.freedesktop.org/git/uchardet/uchardet.git
diff --git a/.travis.yml b/.travis.yml
@@ -3,7 +3,7 @@ sudo: required
 rust:
 - nightly
 - beta
-- 1.0.0
+- stable
 before_script:
 - |
   pip install 'travis-cargo<0.2' --user &&

diff --git a/Cargo.toml b/Cargo.toml
@@ -20,6 +20,7 @@ unstable = []
 
 [dependencies]
 libc = "*"
+error-chain = "0.5"
 
 [dependencies.uchardet-sys]
 path = "uchardet-sys"

diff --git a/src/lib.rs b/src/lib.rs
@@ -1,80 +1,91 @@
-//! A wrapper around the uchardet library.  Detects character encodings.
+//! A wrapper around the uchardet library. Detects character encodings.
 //!
 //! Note that the underlying implemention is written in C and C++, and I'm
 //! not aware of any security audits which have been performed against it.
 //!
 //! ```
 //! use uchardet::detect_encoding_name;
 //!
-//! assert_eq!(Some("windows-1252".to_string()),
-//!            detect_encoding_name(&[0x66u8, 0x72, 0x61, 0x6e, 0xe7,
-//!                                 0x61, 0x69, 0x73]).unwrap());
+//! assert_eq!("WINDOWS-1252",
+//!            detect_encoding_name(&[0x46, 0x93, 0x72, 0x61, 0x6e, 0xe7, 0x6f,
+//!                0x69, 0x73, 0xe9, 0x94]).unwrap());
 //! ```
 //!
 //! For more information, see [this project on
 //! GitHub](https://github.com/emk/rust-uchardet).
 
+// Increase the compiler's recursion limit for the `error_chain` crate.
+#![recursion_limit = "1024"]
 #![deny(missing_docs)]
 
+#[macro_use]
+extern crate error_chain;
 extern crate libc;
 extern crate uchardet_sys as ffi;
 
 use libc::size_t;
-use std::error::Error;
-use std::fmt;
-use std::result::Result;
 use std::ffi::CStr;
 use std::str::from_utf8;
 
-/// An error occurred while trying to detect the character encoding.
-#[derive(Debug)]
-pub struct EncodingDetectorError {
-    message: String
-}
+pub use errors::*;
 
-impl Error for EncodingDetectorError {
-    fn description(&self) -> &str { "encoding detector error" }
-    fn cause(&self) -> Option<&Error> { None }
-}
+#[allow(missing_docs)]
+mod errors {
+    error_chain! {
+        errors {
+            UnrecognizableCharset {
+                description("unrecognizable charset")
+                display("uchardet was unable to recognize a charset")
+            }
+            OutOfMemory {
+                description("out of memory error")
+                display("uchardet ran out of memory")
+            }
+            Other(int: i32) {
+                description("unknown error")
+                display("uchardet returned unknown error {}", int)
+            }
+        }
+    }
+
+    impl ErrorKind {
+        pub fn from_nsresult(nsresult: ::ffi::nsresult) -> ErrorKind {
+            assert!(nsresult != 0);
+            match nsresult {
+                1 => ErrorKind::OutOfMemory,
+                int => ErrorKind::Other(int),
+            }
+        }
 
-impl fmt::Display for EncodingDetectorError {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "{}", &self.message)
     }
 }
 
-/// Either a return value, or an encoding detection error.
-pub type EncodingDetectorResult<T> = Result<T, EncodingDetectorError>;
-
 /// Detects the encoding of text using the uchardet library.
 ///
 /// EXPERIMENTAL: This may be replaced by a better API soon.
 struct EncodingDetector {
     ptr: ffi::uchardet_t
 }
 
-/// Return the name of the charset used in `data`, or `None` if the
-/// charset is ASCII or if the encoding can't be detected.  This is
-/// the value returned by the underlying `uchardet` library, with
-/// the empty string mapped to `None`.
+/// Return the name of the charset used in `data` or an error if uchardet
+/// was unable to detect a charset.
 ///
 /// ```
 /// use uchardet::detect_encoding_name;
 ///
-/// assert_eq!(None, detect_encoding_name("ascii".as_bytes()).unwrap());
-/// assert_eq!(Some("UTF-8".to_string()),
-///            detect_encoding_name("français".as_bytes()).unwrap());
-/// assert_eq!(Some("windows-1252".to_string()),
-///            detect_encoding_name(&[0x66u8, 0x72, 0x61, 0x6e, 0xe7,
-///                                 0x61, 0x69, 0x73]).unwrap());
+/// assert_eq!("ASCII",
+///            detect_encoding_name("ascii".as_bytes()).unwrap());
+/// assert_eq!("UTF-8",
+///            detect_encoding_name("©français".as_bytes()).unwrap());
+/// assert_eq!("WINDOWS-1252",
+///            detect_encoding_name(&[0x46, 0x93, 0x72, 0x61, 0x6e, 0xe7, 0x6f,
+///                0x69, 0x73, 0xe9, 0x94]).unwrap());
 /// ```
-pub fn detect_encoding_name(data: &[u8]) ->
-    EncodingDetectorResult<Option<String>>
-{
+pub fn detect_encoding_name(data: &[u8]) -> Result<String> {
     let mut detector = EncodingDetector::new();
     try!(detector.handle_data(data));
     detector.data_end();
-    Ok(detector.charset())
+    detector.charset()
 }
 
 impl EncodingDetector {
@@ -85,49 +96,49 @@ impl EncodingDetector {
         EncodingDetector{ptr: ptr}
     }
 
-    /// Pass a chunk of raw bytes to the detector.  This is a no-op if a
+    /// Pass a chunk of raw bytes to the detector. This is a no-op if a
     /// charset has been detected.
-    fn handle_data(&mut self, data: &[u8]) -> EncodingDetectorResult<()> {
-        let result = unsafe {
+    fn handle_data(&mut self, data: &[u8]) -> Result<()> {
+        let nsresult = unsafe {
             ffi::uchardet_handle_data(self.ptr, data.as_ptr() as *const i8,
                                       data.len() as size_t)
         };
-        match result {
+        match nsresult {
             0 => Ok(()),
-            _ => {
-                let msg =  "Error handling data".to_string();
-                Err(EncodingDetectorError{message: msg})
+            int => {
+                Err(ErrorKind::from_nsresult(int).into())
             }
         }
     }
 
     /// Notify the detector that we're done calling `handle_data`, and that
-    /// we want it to make a guess as to our encoding.  This is a no-op if
+    /// we want it to make a guess as to our encoding. This is a no-op if
     /// no data has been passed yet, or if an encoding has been detected
-    /// for certain.  From reading the code, it appears that you can safely
+    /// for certain. From reading the code, it appears that you can safely
     /// call `handle_data` after calling this, but I'm not certain.
     fn data_end(&mut self) {
         unsafe { ffi::uchardet_data_end(self.ptr); }
     }
 
     /// Reset the detector's internal state.
-    //fn reset(&mut self) {
+    // fn reset(&mut self) {
     //    unsafe { ffi::uchardet_reset(self.ptr); }
-    //}
+    // }
 
-    /// Get the decoder's current best guess as to the encoding.  Returns
-    /// `None` on error, or if the data appears to be ASCII.
-    fn charset(&self) -> Option<String> {
+    /// Get the decoder's current best guess as to the encoding. May return
+    /// an error if uchardet was unable to detect an encoding.
+    fn charset(&self) -> Result<String> {
         unsafe {
             let internal_str = ffi::uchardet_get_charset(self.ptr);
             assert!(!internal_str.is_null());
             let bytes = CStr::from_ptr(internal_str).to_bytes();
             let charset = from_utf8(bytes);
             match charset {
                 Err(_) =>
-                    panic!("uchardet_get_charset returned invalid value"),
-                Ok("") => None,
-                Ok(encoding) => Some(encoding.to_string())
+                    panic!("uchardet_get_charset returned a charset name \
+                            containing invalid characters"),
+                Ok("") => Err(ErrorKind::UnrecognizableCharset.into()),
+                Ok(encoding) => Ok(encoding.to_string())
             }
         }
     }

diff --git a/uchardet-sys/Cargo.toml b/uchardet-sys/Cargo.toml
@@ -18,3 +18,4 @@ libc = "*"
 
 [build-dependencies]
 pkg-config = '*'
+cmake = "*"
diff --git a/uchardet-sys/build.rs b/uchardet-sys/build.rs
@@ -5,71 +5,40 @@
 // Patches are welcome to help make it work on other operating systems!
 
 extern crate pkg_config;
+extern crate cmake;
 
-use std::env;
-use std::fs::create_dir;
-use std::path::Path;
-use std::process::{Command, Stdio};
+use cmake::Config;
 
 fn main() {
     // Do nothing if this package is already provided by the system.
     if pkg_config::find_library("uchardet").is_ok() { return; }
 
-    // Get our configuration from our environment.
-    let mut cxxflags = env::var("CXXFLAGS").unwrap_or(String::new());
-    let target = env::var("TARGET").unwrap();
-    let manifest_dir = env::var("CARGO_MANIFEST_DIR").unwrap();
-    let src = Path::new(&manifest_dir);
-    let out_dir = env::var("OUT_DIR").unwrap();
-    let dst = Path::new(&out_dir);
+    // Build uchardet ourselves
+    let mut config = Config::new("uchardet");
 
-    // Fix up our build flags.
-    if target.contains("i686") {
-        cxxflags.push_str(" -m32");
-    } else if target.contains("x86_64") {
-        cxxflags.push_str(" -m64");
-    }
-    if !target.contains("i686") {
-        cxxflags.push_str(" -fPIC");
-    }
+    // Mustn't build the binaries as they aren't compatible with Windows
+    // and cause a compiler error
+    config.define("BUILD_BINARY", "OFF");
 
-    // Make a build directory.
-    let build = dst.join("build");
-    // This isn't ideal but until is_dir() is stable just always try to create
-    // the build directory. If it exists and error is returned, which is
-    // ignored.
-    create_dir(&build).unwrap_or(());
-
-    // Set up our CMake command.
-    run(Command::new("cmake")
-            .current_dir(&dst.join("build"))
-            .arg(&src.join("uchardet"))
-            .arg("-DCMAKE_BUILD_TYPE=Release")
-            .arg(&format!("-DCMAKE_INSTALL_PREFIX={}", dst.display()))
-            .arg(&format!("-DCMAKE_CXX_FLAGS={}", cxxflags)));
-
-    // Run our make command.
-    run(Command::new("make")
-            .current_dir(&dst.join("build"))
-            .arg("install"));
+    if cfg!(target_os = "windows") && cfg!(target_env = "gnu") {
+        // Disable sized deallocation as we're unable to link when it's enabled
+        config.cxxflag("-fno-sized-deallocation");
+    }
 
-    // Decide how to link our C++ runtime.  Feel free to submit patches
-    // to make this work on your platform.  Other likely options are "c++"
-    // and "c++abi" depending on OS and compiler.
-    let cxx_abi = "stdc++";
+    let dst = config.build();
 
     // Print out link instructions for Cargo.
-    println!("cargo:rustc-flags=-L {} -l static=uchardet -l {}",
-             dst.join("lib").display(), cxx_abi);
-    println!("cargo:root={}", dst.display());
-}
-
-// Run an external build command.
-fn run(cmd: &mut Command) {
-    println!("running: {:?}", cmd);
-    assert!(cmd.stdout(Stdio::inherit())
-               .stderr(Stdio::inherit())
-               .status()
-               .unwrap()
-               .success());
+    println!("cargo:rustc-link-search=native={}/lib", dst.display());
+    println!("cargo:rustc-link-search=native={}/lib64", dst.display());
+    println!("cargo:rustc-link-lib=static=uchardet");
+
+    if !(cfg!(target_os = "windows") && cfg!(target_env = "msvc")) {
+        // Not needed on windows-msvc
+
+        // Decide how to link our C++ runtime.  Feel free to submit patches
+        // to make this work on your platform.  Other likely options are "c++"
+        // and "c++abi" depending on OS and compiler.
+        let cxx_abi = "stdc++";
+        println!("cargo:rustc-flags=-l {}", cxx_abi);
+    }
 }
diff --git a/uchardet-sys/src/lib.rs b/uchardet-sys/src/lib.rs
@@ -8,11 +8,14 @@ use libc::{c_char, c_int, c_void, size_t};
 #[allow(non_camel_case_types)]
 pub type uchardet_t = *mut c_void;
 
+#[allow(non_camel_case_types)]
+pub type nsresult = c_int;
+
 extern {
     pub fn uchardet_new() -> uchardet_t;
     pub fn uchardet_delete(ud: uchardet_t);
     pub fn uchardet_handle_data(ud: uchardet_t, data: *const c_char,
-                                len: size_t) -> c_int;
+                                len: size_t) -> nsresult;
     pub fn uchardet_data_end(ud: uchardet_t);
     pub fn uchardet_reset(ud: uchardet_t);
     pub fn uchardet_get_charset(ud: uchardet_t) -> *const c_char;

diff --git a/uchardet-sys/uchardet b/uchardet-sys/uchardet
-Original file line number
+Diff line change
@@ Expand Up / @@ -3,7 +3,7 @@ sudo: required @@
     rust:
     - nightly
     - beta
-    - 1.0.0
+    - stable
     before_script:
     - |
       pip install 'travis-cargo<0.2' --user &&
@@ Expand Down @@
Original file line number	Diff line number	Diff line change
Expand Up		@@ -18,3 +18,4 @@ libc = "*"

		[build-dependencies]
		pkg-config = '*'
		cmake = "*"
Copy link Owner emk Oct 27, 2016 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others. Learn more. I'm quite happy to see this dependency, especially if it helps us build on Windows and stay in sync with upstream.