Skip to content

Add Cargo feature query_encoding_2 to use encoding_rs crate #445

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 12 additions & 4 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,24 @@ jobs:
# getopts is only used in tests. Its versions 0.2.16+ don’t build on 1.17.0
- cargo update -p getopts --precise 0.2.15
# data-url uses pub(crate) which is unstable in 1.17
script: cargo test --all-features -p url -p idna -p percent-encoding -p url_serde
script:
- cargo test --features "heap_size query_encoding_2" -p url -p idna -p percent-encoding -p url_serde
- cargo test --features "heap_size query_encoding" -p url -p idna -p percent-encoding -p url_serde

- rust: stable
script: cargo test --all-features --all
script:
- cargo test --features "heap_size query_encoding_2" --all
- cargo test --features "heap_size query_encoding" --all

- rust: beta
script: cargo test --all-features --all
script:
- cargo test --features "heap_size query_encoding_2" --all
- cargo test --features "heap_size query_encoding" --all

- rust: nightly
script: cargo test --all-features --all
script:
- cargo test --features "heap_size query_encoding_2" --all
- cargo test --features "heap_size query_encoding" --all

- rust: nightly
env: TARGET=WASM32 # For job list UI
Expand Down
4 changes: 3 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,13 @@ rustc-serialize = "0.3"
serde_json = ">=0.6.1, <0.9"

[features]
query_encoding_2 = ["encoding_rs"]
query_encoding = ["encoding"]
heap_size = ["heapsize"]

[dependencies]
encoding = {version = "0.2", optional = true}
encoding_rs = {version = "0.7", optional = true}
heapsize = {version = ">=0.4.1, <0.5", optional = true}
idna = { version = "0.1.0", path = "./idna" }
matches = "0.1"
Expand All @@ -49,4 +51,4 @@ rustc-serialize = {version = "0.3", optional = true}
serde = {version = ">=0.6.1, <0.9", optional = true}

[package.metadata.docs.rs]
features = ["query_encoding"]
features = ["query_encoding_2", "query_encoding"]
126 changes: 120 additions & 6 deletions src/encoding.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,132 @@
// except according to those terms.


//! Abstraction that conditionally compiles either to rust-encoding,
//! or to only support UTF-8.
//! Abstraction that conditionally compiles either to encoding_rs,
//! or rust-encoding (legacy), or to only support UTF-8.

#[cfg(feature = "query_encoding_2")] extern crate encoding_rs;
#[cfg(feature = "query_encoding")] extern crate encoding;

use std::borrow::Cow;
#[cfg(feature = "query_encoding")] use std::fmt::{self, Debug, Formatter};
#[cfg(any(feature = "query_encoding", feature = "query_encoding_2"))] use std::fmt::{self, Debug, Formatter};

#[cfg(feature = "query_encoding_2")] pub use self::encoding_rs::Encoding;

#[cfg(feature = "query_encoding")] use self::encoding::types::{DecoderTrap, EncoderTrap};
#[cfg(feature = "query_encoding")] use self::encoding::label::encoding_from_whatwg_label;
#[cfg(feature = "query_encoding")] pub use self::encoding::types::EncodingRef;



#[cfg(feature = "query_encoding_2")]
#[derive(Copy, Clone)]
pub struct EncodingOverride {
/// `None` means UTF-8.
encoding: Option<&'static Encoding>
}

#[cfg(feature = "query_encoding_2")]
impl EncodingOverride {
pub fn from_opt_encoding(encoding: Option<&'static Encoding>) -> Self {
encoding.map(Self::from_encoding).unwrap_or_else(Self::utf8)
}

pub fn from_encoding(encoding: &'static Encoding) -> Self {
EncodingOverride {
encoding: if encoding.name() == "UTF-8" { None } else { Some(encoding) }
}
}

#[inline]
pub fn utf8() -> Self {
EncodingOverride { encoding: None }
}

pub fn lookup(label: &[u8]) -> Option<Self> {
// Don't use String::from_utf8_lossy since no encoding label contains U+FFFD
// https://encoding.spec.whatwg.org/#names-and-labels
Encoding::for_label(label)
.map(Self::from_encoding)
}

/// https://encoding.spec.whatwg.org/#get-an-output-encoding
pub fn to_output_encoding(self) -> Self {
if let Some(encoding) = self.encoding {
if matches!(encoding.name(), "UTF-16LE" | "UTF-16BE") {
return Self::utf8()
}
}
self
}

pub fn is_utf8(&self) -> bool {
self.encoding.is_none()
}

pub fn name(&self) -> &'static str {
match self.encoding {
Some(encoding) => encoding.name(),
None => "UTF-8",
}
}

pub fn decode<'a>(&self, input: Cow<'a, [u8]>) -> Cow<'a, str> {
match self.encoding {
Some(encoding) => {
match input {
Cow::Borrowed(b) => {
let (cow, _) = encoding.decode_without_bom_handling(b);
cow
},
Cow::Owned(v) => {
{
let (cow, _) = encoding.decode_without_bom_handling(&v[..]);
match cow {
Cow::Owned(s) => {
// Free old heap buffer and return a new one.
return Cow::Owned(s);
}
Cow::Borrowed(_) => {}
}
}
// Reuse the old heap buffer.
Cow::Owned(unsafe { String::from_utf8_unchecked(v) })
},
}
},
None => decode_utf8_lossy(input),
}
}

pub fn encode<'a>(&self, input: Cow<'a, str>) -> Cow<'a, [u8]> {
match self.encoding {
Some(encoding) => {
match input {
Cow::Borrowed(s) => {
let (cow, _, _) = encoding.encode(s);
cow
},
Cow::Owned(s) => {
{
let (cow, _, _) = encoding.encode(&s[..]);
match cow {
Cow::Owned(v) => {
// Free old heap buffer and return a new one.
return Cow::Owned(v);
},
Cow::Borrowed(_) => {},
}
}
// Reuse the old heap buffer.
Cow::Owned(s.into_bytes())
},
}
},
None => encode_utf8(input),
}
}
}

#[cfg(feature = "query_encoding")]
#[derive(Copy, Clone)]
pub struct EncodingOverride {
Expand Down Expand Up @@ -90,7 +204,7 @@ impl EncodingOverride {
}
}

#[cfg(feature = "query_encoding")]
#[cfg(any(feature = "query_encoding", feature = "query_encoding_2"))]
impl Debug for EncodingOverride {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
write!(f, "EncodingOverride {{ encoding: ")?;
Expand All @@ -101,11 +215,11 @@ impl Debug for EncodingOverride {
}
}

#[cfg(not(feature = "query_encoding"))]
#[cfg(all(not(feature = "query_encoding"), not(feature = "query_encoding_2")))]
#[derive(Copy, Clone, Debug)]
pub struct EncodingOverride;

#[cfg(not(feature = "query_encoding"))]
#[cfg(all(not(feature = "query_encoding"), not(feature = "query_encoding_2")))]
impl EncodingOverride {
#[inline]
pub fn utf8() -> Self {
Expand Down
42 changes: 40 additions & 2 deletions src/form_urlencoded.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,29 @@ pub fn parse(input: &[u8]) -> Parse {
}


/// Convert a byte string in the `application/x-www-form-urlencoded` syntax
/// into a iterator of (name, value) pairs.
///
/// Use `parse(input.as_bytes())` to parse a `&str` string.
///
/// This function is only available if the `query_encoding_2`
/// [feature](http://doc.crates.io/manifest.html#the-features-section]) is enabled.
///
/// Arguments:
///
/// * `encoding_override`: The character encoding each name and values is decoded as
/// after percent-decoding. Defaults to UTF-8.
/// `Encoding` is defined in [encoding_rs](https://github.com/hsivonen/encoding_rs).
/// * `use_charset`: The *use _charset_ flag*. If in doubt, set to `false`.
#[cfg(feature = "query_encoding_2")]
pub fn parse_with_encoding<'a>(input: &'a [u8],
encoding_override: Option<&'static ::encoding::Encoding>,
use_charset: bool)
-> Result<Parse<'a>, ()> {
let encoding = EncodingOverride::from_opt_encoding(encoding_override);
do_parse_with_encoding(input, encoding, use_charset)
}

/// Convert a byte string in the `application/x-www-form-urlencoded` syntax
/// into a iterator of (name, value) pairs.
///
Expand All @@ -55,9 +78,17 @@ pub fn parse_with_encoding<'a>(input: &'a [u8],
encoding_override: Option<::encoding::EncodingRef>,
use_charset: bool)
-> Result<Parse<'a>, ()> {
let encoding = EncodingOverride::from_opt_encoding(encoding_override);
do_parse_with_encoding(input, encoding, use_charset)
}

#[cfg(any(feature = "query_encoding", feature = "query_encoding_2"))]
fn do_parse_with_encoding<'a>(input: &'a [u8],
mut encoding: EncodingOverride,
use_charset: bool)
-> Result<Parse<'a>, ()> {
use std::ascii::AsciiExt;

let mut encoding = EncodingOverride::from_opt_encoding(encoding_override);
if !(encoding.is_utf8() || input.is_ascii()) {
return Err(())
}
Expand Down Expand Up @@ -294,6 +325,13 @@ impl<T: Target> Serializer<T> {
self
}

/// Set the character encoding to be used for names and values before percent-encoding.
#[cfg(feature = "query_encoding_2")]
pub fn encoding_override(&mut self, new: Option<&'static ::encoding::Encoding>) -> &mut Self {
self.encoding = EncodingOverride::from_opt_encoding(new).to_output_encoding();
self
}

/// Set the character encoding to be used for names and values before percent-encoding.
#[cfg(feature = "query_encoding")]
pub fn encoding_override(&mut self, new: Option<::encoding::EncodingRef>) -> &mut Self {
Expand Down Expand Up @@ -343,7 +381,7 @@ impl<T: Target> Serializer<T> {
/// (See the `encoding_override()` method.)
///
/// Panics if called after `.finish()`.
#[cfg(feature = "query_encoding")]
#[cfg(any(feature = "query_encoding", feature = "query_encoding_2"))]
pub fn append_charset(&mut self) -> &mut Self {
assert!(self.custom_encoding.is_none(),
"Cannot use both custom_encoding_override() and append_charset()");
Expand Down
13 changes: 13 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,19 @@ impl<'a> ParseOptions<'a> {
self
}

/// Override the character encoding of query strings.
/// This is a legacy concept only relevant for HTML.
///
/// `Encoding` is defined in [encoding_rs](https://github.com/hsivonen/encoding_rs).
///
/// This method is only available if the `query_encoding_2`
/// [feature](http://doc.crates.io/manifest.html#the-features-section]) is enabled.
#[cfg(feature = "query_encoding_2")]
pub fn encoding_override(mut self, new: Option<&'static encoding::Encoding>) -> Self {
self.encoding_override = EncodingOverride::from_opt_encoding(new).to_output_encoding();
self
}

/// Override the character encoding of query strings.
/// This is a legacy concept only relevant for HTML.
///
Expand Down