summaryrefslogtreecommitdiff
path: root/src/lib.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib.rs')
-rw-r--r--src/lib.rs117
1 files changed, 103 insertions, 14 deletions
diff --git a/src/lib.rs b/src/lib.rs
index e1cf571..765ee16 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -13,19 +13,13 @@
//! Converts between a string (such as an URL’s query string)
//! and a sequence of (name, value) pairs.
-extern crate percent_encoding;
#[macro_use]
extern crate matches;
use percent_encoding::{percent_decode, percent_encode_byte};
-use query_encoding::decode_utf8_lossy;
use std::borrow::{Borrow, Cow};
use std::str;
-mod query_encoding;
-
-pub use query_encoding::EncodingOverride;
-
/// Convert a byte string in the `application/x-www-form-urlencoded` syntax
/// into a iterator of (name, value) pairs.
///
@@ -34,7 +28,7 @@ pub use query_encoding::EncodingOverride;
/// The names and values are percent-decoded. For instance, `%23first=%25try%25` will be
/// converted to `[("#first", "%try%")]`.
#[inline]
-pub fn parse(input: &[u8]) -> Parse {
+pub fn parse(input: &[u8]) -> Parse<'_> {
Parse { input }
}
/// The return type of `parse()`.
@@ -65,7 +59,7 @@ impl<'a> Iterator for Parse<'a> {
}
}
-fn decode(input: &[u8]) -> Cow<str> {
+fn decode(input: &[u8]) -> Cow<'_, str> {
let replaced = replace_plus(input);
decode_utf8_lossy(match percent_decode(&replaced).into() {
Cow::Owned(vec) => Cow::Owned(vec),
@@ -74,7 +68,7 @@ fn decode(input: &[u8]) -> Cow<str> {
}
/// Replace b'+' with b' '
-fn replace_plus(input: &[u8]) -> Cow<[u8]> {
+fn replace_plus(input: &[u8]) -> Cow<'_, [u8]> {
match input.iter().position(|&b| b == b'+') {
None => Cow::Borrowed(input),
Some(first_position) => {
@@ -116,7 +110,7 @@ impl<'a> Iterator for ParseIntoOwned<'a> {
/// https://url.spec.whatwg.org/#concept-urlencoded-byte-serializer).
///
/// Return an iterator of `&str` slices.
-pub fn byte_serialize(input: &[u8]) -> ByteSerialize {
+pub fn byte_serialize(input: &[u8]) -> ByteSerialize<'_> {
ByteSerialize { bytes: input }
}
@@ -150,6 +144,10 @@ impl<'a> Iterator for ByteSerialize<'a> {
None => (self.bytes, &[][..]),
};
self.bytes = remaining;
+ // This unsafe is appropriate because we have already checked these
+ // bytes in byte_serialized_unchanged, which checks for a subset
+ // of UTF-8. So we know these bytes are valid UTF-8, and doing
+ // another UTF-8 check would be wasteful.
Some(unsafe { str::from_utf8_unchecked(unchanged_slice) })
} else {
None
@@ -214,7 +212,14 @@ impl<'a, T: Target> Serializer<'a, T> {
/// If that suffix is non-empty,
/// its content is assumed to already be in `application/x-www-form-urlencoded` syntax.
pub fn for_suffix(mut target: T, start_position: usize) -> Self {
- &target.as_mut_string()[start_position..]; // Panic if out of bounds
+ if target.as_mut_string().len() < start_position {
+ panic!(
+ "invalid length {} for target of length {}",
+ start_position,
+ target.as_mut_string().len()
+ );
+ }
+
Serializer {
target: Some(target),
start_position,
@@ -250,6 +255,19 @@ impl<'a, T: Target> Serializer<'a, T> {
self
}
+ /// Serialize and append a name of parameter without any value.
+ ///
+ /// Panics if called after `.finish()`.
+ pub fn append_key_only(&mut self, name: &str) -> &mut Self {
+ append_key_only(
+ string(&mut self.target),
+ self.start_position,
+ self.encoding,
+ name,
+ );
+ self
+ }
+
/// Serialize and append a number of name/value pairs.
///
/// This simply calls `append_pair` repeatedly.
@@ -280,6 +298,29 @@ impl<'a, T: Target> Serializer<'a, T> {
self
}
+ /// Serialize and append a number of names without values.
+ ///
+ /// This simply calls `append_key_only` repeatedly.
+ /// This can be more convenient, so the user doesn’t need to introduce a block
+ /// to limit the scope of `Serializer`’s borrow of its string.
+ ///
+ /// Panics if called after `.finish()`.
+ pub fn extend_keys_only<I, K>(&mut self, iter: I) -> &mut Self
+ where
+ I: IntoIterator,
+ I::Item: Borrow<K>,
+ K: AsRef<str>,
+ {
+ {
+ let string = string(&mut self.target);
+ for key in iter {
+ let k = key.borrow().as_ref();
+ append_key_only(string, self.start_position, self.encoding, k);
+ }
+ }
+ self
+ }
+
/// If this serializer was constructed with a string, take and return that string.
///
/// ```rust
@@ -316,7 +357,7 @@ fn string<T: Target>(target: &mut Option<T>) -> &mut String {
fn append_pair(
string: &mut String,
start_position: usize,
- encoding: EncodingOverride,
+ encoding: EncodingOverride<'_>,
name: &str,
value: &str,
) {
@@ -326,6 +367,54 @@ fn append_pair(
append_encoded(value, string, encoding);
}
-fn append_encoded(s: &str, string: &mut String, encoding: EncodingOverride) {
- string.extend(byte_serialize(&query_encoding::encode(encoding, s.into())))
+fn append_key_only(
+ string: &mut String,
+ start_position: usize,
+ encoding: EncodingOverride,
+ name: &str,
+) {
+ append_separator_if_needed(string, start_position);
+ append_encoded(name, string, encoding);
}
+
+fn append_encoded(s: &str, string: &mut String, encoding: EncodingOverride<'_>) {
+ string.extend(byte_serialize(&encode(encoding, s)))
+}
+
+pub(crate) fn encode<'a>(encoding_override: EncodingOverride<'_>, input: &'a str) -> Cow<'a, [u8]> {
+ if let Some(o) = encoding_override {
+ return o(input);
+ }
+ input.as_bytes().into()
+}
+
+pub(crate) fn decode_utf8_lossy(input: Cow<'_, [u8]>) -> Cow<'_, str> {
+ // Note: This function is duplicated in `percent_encoding/lib.rs`.
+ match input {
+ Cow::Borrowed(bytes) => String::from_utf8_lossy(bytes),
+ Cow::Owned(bytes) => {
+ match String::from_utf8_lossy(&bytes) {
+ Cow::Borrowed(utf8) => {
+ // If from_utf8_lossy returns a Cow::Borrowed, then we can
+ // be sure our original bytes were valid UTF-8. This is because
+ // if the bytes were invalid UTF-8 from_utf8_lossy would have
+ // to allocate a new owned string to back the Cow so it could
+ // replace invalid bytes with a placeholder.
+
+ // First we do a debug_assert to confirm our description above.
+ let raw_utf8: *const [u8];
+ raw_utf8 = utf8.as_bytes();
+ debug_assert!(raw_utf8 == &*bytes as *const [u8]);
+
+ // Given we know the original input bytes are valid UTF-8,
+ // and we have ownership of those bytes, we re-use them and
+ // return a Cow::Owned here.
+ Cow::Owned(unsafe { String::from_utf8_unchecked(bytes) })
+ }
+ Cow::Owned(s) => Cow::Owned(s),
+ }
+ }
+ }
+}
+
+pub type EncodingOverride<'a> = Option<&'a dyn Fn(&str) -> Cow<'_, [u8]>>;