aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/host.rs86
-rw-r--r--src/lib.rs82
-rw-r--r--src/origin.rs3
-rw-r--r--src/parser.rs37
-rw-r--r--src/quirks.rs59
5 files changed, 172 insertions, 95 deletions
diff --git a/src/host.rs b/src/host.rs
index 9537436..f1921c6 100644
--- a/src/host.rs
+++ b/src/host.rs
@@ -82,7 +82,9 @@ impl Host<String> {
return parse_ipv6addr(&input[1..input.len() - 1]).map(Host::Ipv6);
}
let domain = percent_decode(input.as_bytes()).decode_utf8_lossy();
- let domain = idna::domain_to_ascii(&domain)?;
+
+ let domain = Self::domain_to_ascii(&domain)?;
+
if domain.is_empty() {
return Err(ParseError::EmptyHost);
}
@@ -90,9 +92,7 @@ impl Host<String> {
let is_invalid_domain_char = |c| {
matches!(
c,
- '\0' | '\t'
- | '\n'
- | '\r'
+ '\0'..='\u{001F}'
| ' '
| '#'
| '%'
@@ -106,12 +106,15 @@ impl Host<String> {
| '\\'
| ']'
| '^'
+ | '\u{007F}'
+ | '|'
)
};
if domain.find(is_invalid_domain_char).is_some() {
Err(ParseError::InvalidDomainCharacter)
- } else if let Some(address) = parse_ipv4addr(&domain)? {
+ } else if ends_in_a_number(&domain) {
+ let address = parse_ipv4addr(&domain)?;
Ok(Host::Ipv4(address))
} else {
Ok(Host::Domain(domain))
@@ -145,6 +148,7 @@ impl Host<String> {
| '\\'
| ']'
| '^'
+ | '|'
)
};
@@ -156,6 +160,11 @@ impl Host<String> {
))
}
}
+
+ /// convert domain with idna
+ fn domain_to_ascii(domain: &str) -> Result<String, ParseError> {
+ idna::domain_to_ascii(domain).map_err(Into::into)
+ }
}
impl<S: AsRef<str>> fmt::Display for Host<S> {
@@ -247,8 +256,33 @@ fn longest_zero_sequence(pieces: &[u16; 8]) -> (isize, isize) {
}
}
+/// <https://url.spec.whatwg.org/#ends-in-a-number-checker>
+fn ends_in_a_number(input: &str) -> bool {
+ let mut parts = input.rsplit('.');
+ let last = parts.next().unwrap();
+ let last = if last.is_empty() {
+ if let Some(last) = parts.next() {
+ last
+ } else {
+ return false;
+ }
+ } else {
+ last
+ };
+ if !last.is_empty() && last.chars().all(|c| ('0'..='9').contains(&c)) {
+ return true;
+ }
+
+ parse_ipv4number(last).is_ok()
+}
+
/// <https://url.spec.whatwg.org/#ipv4-number-parser>
+/// Ok(None) means the input is a valid number, but it overflows a `u32`.
fn parse_ipv4number(mut input: &str) -> Result<Option<u32>, ()> {
+ if input.is_empty() {
+ return Err(());
+ }
+
let mut r = 10;
if input.starts_with("0x") || input.starts_with("0X") {
input = &input[2..];
@@ -258,10 +292,10 @@ fn parse_ipv4number(mut input: &str) -> Result<Option<u32>, ()> {
r = 8;
}
- // At the moment we can't know the reason why from_str_radix fails
- // https://github.com/rust-lang/rust/issues/22639
- // So instead we check if the input looks like a real number and only return
- // an error when it's an overflow.
+ if input.is_empty() {
+ return Ok(Some(0));
+ }
+
let valid_number = match r {
8 => input.chars().all(|c| ('0'..='7').contains(&c)),
10 => input.chars().all(|c| ('0'..='9').contains(&c)),
@@ -270,50 +304,34 @@ fn parse_ipv4number(mut input: &str) -> Result<Option<u32>, ()> {
}),
_ => false,
};
-
if !valid_number {
- return Ok(None);
+ return Err(());
}
- if input.is_empty() {
- return Ok(Some(0));
- }
- if input.starts_with('+') {
- return Ok(None);
- }
match u32::from_str_radix(input, r) {
- Ok(number) => Ok(Some(number)),
- Err(_) => Err(()),
+ Ok(num) => Ok(Some(num)),
+ Err(_) => Ok(None), // The only possible error kind here is an integer overflow.
+ // The validity of the chars in the input is checked above.
}
}
/// <https://url.spec.whatwg.org/#concept-ipv4-parser>
-fn parse_ipv4addr(input: &str) -> ParseResult<Option<Ipv4Addr>> {
- if input.is_empty() {
- return Ok(None);
- }
+fn parse_ipv4addr(input: &str) -> ParseResult<Ipv4Addr> {
let mut parts: Vec<&str> = input.split('.').collect();
if parts.last() == Some(&"") {
parts.pop();
}
if parts.len() > 4 {
- return Ok(None);
+ return Err(ParseError::InvalidIpv4Address);
}
let mut numbers: Vec<u32> = Vec::new();
- let mut overflow = false;
for part in parts {
- if part.is_empty() {
- return Ok(None);
- }
match parse_ipv4number(part) {
Ok(Some(n)) => numbers.push(n),
- Ok(None) => return Ok(None),
- Err(()) => overflow = true,
+ Ok(None) => return Err(ParseError::InvalidIpv4Address), // u32 overflow
+ Err(()) => return Err(ParseError::InvalidIpv4Address),
};
}
- if overflow {
- return Err(ParseError::InvalidIpv4Address);
- }
let mut ipv4 = numbers.pop().expect("a non-empty list of numbers");
// Equivalent to: ipv4 >= 256 ** (4 − numbers.len())
if ipv4 > u32::max_value() >> (8 * numbers.len() as u32) {
@@ -325,7 +343,7 @@ fn parse_ipv4addr(input: &str) -> ParseResult<Option<Ipv4Addr>> {
for (counter, n) in numbers.iter().enumerate() {
ipv4 += n << (8 * (3 - counter as u32))
}
- Ok(Some(Ipv4Addr::from(ipv4)))
+ Ok(Ipv4Addr::from(ipv4))
}
/// <https://url.spec.whatwg.org/#concept-ipv6-parser>
diff --git a/src/lib.rs b/src/lib.rs
index 42793cf..6dc09d1 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -118,12 +118,16 @@ See [serde documentation](https://serde.rs) for more information.
```toml
url = { version = "2", features = ["serde"] }
```
+
*/
-#![doc(html_root_url = "https://docs.rs/url/2.2.2")]
+#![doc(html_root_url = "https://docs.rs/url/2.3.1")]
+#![cfg_attr(
+ feature = "debugger_visualizer",
+ feature(debugger_visualizer),
+ debugger_visualizer(natvis_file = "../../debug_metadata/url.natvis")
+)]
-#[macro_use]
-extern crate matches;
pub use form_urlencoded;
#[cfg(feature = "serde")]
@@ -460,7 +464,7 @@ impl Url {
}
// Add the filename if they are not the same
- if base_filename != url_filename {
+ if !relative.is_empty() || base_filename != url_filename {
// If the URIs filename is empty this means that it was a directory
// so we'll have to append a '/'.
//
@@ -1234,14 +1238,9 @@ impl Url {
/// # }
/// # run().unwrap();
/// ```
- #[allow(clippy::manual_strip)] // introduced in 1.45, MSRV is 1.36
pub fn path_segments(&self) -> Option<str::Split<'_, char>> {
let path = self.path();
- if path.starts_with('/') {
- Some(path[1..].split('/'))
- } else {
- None
- }
+ path.strip_prefix('/').map(|remainder| remainder.split('/'))
}
/// Return this URL’s query string, if any, as a percent-encoded ASCII string.
@@ -1304,7 +1303,7 @@ impl Url {
/// # Ok(())
/// # }
/// # run().unwrap();
- ///
+ /// ```
#[inline]
pub fn query_pairs(&self) -> form_urlencoded::Parse<'_> {
@@ -1351,7 +1350,7 @@ impl Url {
}
fn mutate<F: FnOnce(&mut Parser<'_>) -> R, R>(&mut self, f: F) -> R {
- let mut parser = Parser::for_setter(mem::replace(&mut self.serialization, String::new()));
+ let mut parser = Parser::for_setter(mem::take(&mut self.serialization));
let result = f(&mut parser);
self.serialization = parser.serialization;
result
@@ -1541,6 +1540,19 @@ impl Url {
/// url.set_path("data/report.csv");
/// assert_eq!(url.as_str(), "https://example.com/data/report.csv");
/// assert_eq!(url.path(), "/data/report.csv");
+ ///
+ /// // `set_path` percent-encodes the given string if it's not already percent-encoded.
+ /// let mut url = Url::parse("https://example.com")?;
+ /// url.set_path("api/some comments");
+ /// assert_eq!(url.as_str(), "https://example.com/api/some%20comments");
+ /// assert_eq!(url.path(), "/api/some%20comments");
+ ///
+ /// // `set_path` will not double percent-encode the string if it's already percent-encoded.
+ /// let mut url = Url::parse("https://example.com")?;
+ /// url.set_path("api/some%20comments");
+ /// assert_eq!(url.as_str(), "https://example.com/api/some%20comments");
+ /// assert_eq!(url.path(), "/api/some%20comments");
+ ///
/// # Ok(())
/// # }
/// # run().unwrap();
@@ -1792,8 +1804,10 @@ impl Url {
return Err(ParseError::SetHostOnCannotBeABaseUrl);
}
+ let scheme_type = SchemeType::from(self.scheme());
+
if let Some(host) = host {
- if host.is_empty() && SchemeType::from(self.scheme()).is_special() {
+ if host.is_empty() && scheme_type.is_special() && !scheme_type.is_file() {
return Err(ParseError::EmptyHost);
}
let mut host_substr = host;
@@ -1817,15 +1831,20 @@ impl Url {
self.set_host_internal(Host::parse_opaque(host_substr)?, None);
}
} else if self.has_host() {
- let scheme_type = SchemeType::from(self.scheme());
- if scheme_type.is_special() {
+ if scheme_type.is_special() && !scheme_type.is_file() {
return Err(ParseError::EmptyHost);
} else if self.serialization.len() == self.path_start as usize {
self.serialization.push('/');
}
debug_assert!(self.byte_at(self.scheme_end) == b':');
debug_assert!(self.byte_at(self.path_start) == b'/');
- let new_path_start = self.scheme_end + 1;
+
+ let new_path_start = if scheme_type.is_file() {
+ self.scheme_end + 3
+ } else {
+ self.scheme_end + 1
+ };
+
self.serialization
.drain(new_path_start as usize..self.path_start as usize);
let offset = self.path_start - new_path_start;
@@ -2127,7 +2146,7 @@ impl Url {
///
/// # Examples
///
- /// Change the URL’s scheme from `https` to `foo`:
+ /// Change the URL’s scheme from `https` to `http`:
///
/// ```
/// use url::Url;
@@ -2298,7 +2317,7 @@ impl Url {
/// # run().unwrap();
/// # }
/// ```
- #[cfg(any(unix, windows, target_os = "redox"))]
+ #[cfg(any(unix, windows, target_os = "redox", target_os = "wasi"))]
#[allow(clippy::result_unit_err)]
pub fn from_file_path<P: AsRef<Path>>(path: P) -> Result<Url, ()> {
let mut serialization = "file://".to_owned();
@@ -2335,7 +2354,7 @@ impl Url {
///
/// Note that `std::path` does not consider trailing slashes significant
/// and usually does not include them (e.g. in `Path::parent()`).
- #[cfg(any(unix, windows, target_os = "redox"))]
+ #[cfg(any(unix, windows, target_os = "redox", target_os = "wasi"))]
#[allow(clippy::result_unit_err)]
pub fn from_directory_path<P: AsRef<Path>>(path: P) -> Result<Url, ()> {
let mut url = Url::from_file_path(path)?;
@@ -2452,7 +2471,7 @@ impl Url {
/// (That is, if the percent-decoded path contains a NUL byte or,
/// for a Windows path, is not UTF-8.)
#[inline]
- #[cfg(any(unix, windows, target_os = "redox"))]
+ #[cfg(any(unix, windows, target_os = "redox", target_os = "wasi"))]
#[allow(clippy::result_unit_err)]
pub fn to_file_path(&self) -> Result<PathBuf, ()> {
if let Some(segments) = self.path_segments() {
@@ -2511,7 +2530,7 @@ impl fmt::Display for Url {
}
}
-/// String converstion.
+/// String conversion.
impl From<Url> for String {
fn from(value: Url) -> String {
value.serialization
@@ -2656,12 +2675,15 @@ impl<'de> serde::Deserialize<'de> for Url {
}
}
-#[cfg(any(unix, target_os = "redox"))]
+#[cfg(any(unix, target_os = "redox", target_os = "wasi"))]
fn path_to_file_url_segments(
path: &Path,
serialization: &mut String,
) -> Result<(u32, HostInternal), ()> {
+ #[cfg(any(unix, target_os = "redox"))]
use std::os::unix::prelude::OsStrExt;
+ #[cfg(target_os = "wasi")]
+ use std::os::wasi::prelude::OsStrExt;
if !path.is_absolute() {
return Err(());
}
@@ -2706,6 +2728,7 @@ fn path_to_file_url_segments_windows(
let host_start = serialization.len() + 1;
let host_end;
let host_internal;
+
match components.next() {
Some(Component::Prefix(ref p)) => match p.kind() {
Prefix::Disk(letter) | Prefix::VerbatimDisk(letter) => {
@@ -2726,7 +2749,6 @@ fn path_to_file_url_segments_windows(
}
_ => return Err(()),
},
-
_ => return Err(()),
}
@@ -2735,12 +2757,15 @@ fn path_to_file_url_segments_windows(
if component == Component::RootDir {
continue;
}
+
path_only_has_prefix = false;
// FIXME: somehow work with non-unicode?
let component = component.as_os_str().to_str().ok_or(())?;
+
serialization.push('/');
serialization.extend(percent_encode(component.as_bytes(), PATH_SEGMENT));
}
+
// A windows drive letter must end with a slash.
if serialization.len() > host_start
&& parser::is_windows_drive_letter(&serialization[host_start..])
@@ -2748,16 +2773,20 @@ fn path_to_file_url_segments_windows(
{
serialization.push('/');
}
+
Ok((host_end, host_internal))
}
-#[cfg(any(unix, target_os = "redox"))]
+#[cfg(any(unix, target_os = "redox", target_os = "wasi"))]
fn file_url_segments_to_pathbuf(
host: Option<&str>,
segments: str::Split<'_, char>,
) -> Result<PathBuf, ()> {
use std::ffi::OsStr;
+ #[cfg(any(unix, target_os = "redox"))]
use std::os::unix::prelude::OsStrExt;
+ #[cfg(target_os = "wasi")]
+ use std::os::wasi::prelude::OsStrExt;
if host.is_some() {
return Err(());
@@ -2768,10 +2797,12 @@ fn file_url_segments_to_pathbuf(
} else {
Vec::new()
};
+
for segment in segments {
bytes.push(b'/');
bytes.extend(percent_decode(segment.as_bytes()));
}
+
// A windows drive letter must end with a slash.
if bytes.len() > 2
&& matches!(bytes[bytes.len() - 2], b'a'..=b'z' | b'A'..=b'Z')
@@ -2779,12 +2810,15 @@ fn file_url_segments_to_pathbuf(
{
bytes.push(b'/');
}
+
let os_str = OsStr::from_bytes(&bytes);
let path = PathBuf::from(os_str);
+
debug_assert!(
path.is_absolute(),
"to_file_path() failed to produce an absolute Path"
);
+
Ok(path)
}
diff --git a/src/origin.rs b/src/origin.rs
index be2d948..81193f5 100644
--- a/src/origin.rs
+++ b/src/origin.rs
@@ -9,7 +9,6 @@
use crate::host::Host;
use crate::parser::default_port;
use crate::Url;
-use idna::domain_to_unicode;
use std::sync::atomic::{AtomicUsize, Ordering};
pub fn url_origin(url: &Url) -> Origin {
@@ -93,7 +92,7 @@ impl Origin {
Origin::Tuple(ref scheme, ref host, port) => {
let host = match *host {
Host::Domain(ref domain) => {
- let (domain, _errors) = domain_to_unicode(domain);
+ let (domain, _errors) = idna::domain_to_unicode(domain);
Host::Domain(domain)
}
_ => host.clone(),
diff --git a/src/parser.rs b/src/parser.rs
index 57be110..f5438c5 100644
--- a/src/parser.rs
+++ b/src/parser.rs
@@ -52,15 +52,12 @@ macro_rules! simple_enum_error {
///
/// This may be extended in the future so exhaustive matching is
/// discouraged with an unused variant.
- #[allow(clippy::manual_non_exhaustive)] // introduced in 1.40, MSRV is 1.36
#[derive(PartialEq, Eq, Clone, Copy, Debug)]
+ #[non_exhaustive]
pub enum ParseError {
$(
$name,
)+
- /// Unused variant enable non-exhaustive matching
- #[doc(hidden)]
- __FutureProof,
}
impl fmt::Display for ParseError {
@@ -69,9 +66,6 @@ macro_rules! simple_enum_error {
$(
ParseError::$name => fmt.write_str($description),
)+
- ParseError::__FutureProof => {
- unreachable!("Don't abuse the FutureProof!");
- }
}
}
}
@@ -105,15 +99,12 @@ macro_rules! syntax_violation_enum {
///
/// This may be extended in the future so exhaustive matching is
/// discouraged with an unused variant.
- #[allow(clippy::manual_non_exhaustive)] // introduced in 1.40, MSRV is 1.36
#[derive(PartialEq, Eq, Clone, Copy, Debug)]
+ #[non_exhaustive]
pub enum SyntaxViolation {
$(
$name,
)+
- /// Unused variant enable non-exhaustive matching
- #[doc(hidden)]
- __FutureProof,
}
impl SyntaxViolation {
@@ -122,9 +113,6 @@ macro_rules! syntax_violation_enum {
$(
SyntaxViolation::$name => $description,
)+
- SyntaxViolation::__FutureProof => {
- unreachable!("Don't abuse the FutureProof!");
- }
}
}
}
@@ -154,7 +142,7 @@ impl fmt::Display for SyntaxViolation {
}
}
-#[derive(Copy, Clone, PartialEq)]
+#[derive(Copy, Clone, PartialEq, Eq)]
pub enum SchemeType {
File,
SpecialNotFile,
@@ -1227,13 +1215,11 @@ impl<'a> Parser<'a> {
}
}
}
- // Going from &str to String to &str to please the 1.33.0 borrow checker
- let before_slash_string = if ends_with_slash {
- self.serialization[segment_start..self.serialization.len() - 1].to_owned()
+ let segment_before_slash = if ends_with_slash {
+ &self.serialization[segment_start..self.serialization.len() - 1]
} else {
- self.serialization[segment_start..self.serialization.len()].to_owned()
+ &self.serialization[segment_start..self.serialization.len()]
};
- let segment_before_slash: &str = &before_slash_string;
match segment_before_slash {
// If buffer is a double-dot path segment, shorten url’s path,
".." | "%2e%2e" | "%2e%2E" | "%2E%2e" | "%2E%2E" | "%2e." | "%2E." | ".%2e"
@@ -1292,7 +1278,7 @@ impl<'a> Parser<'a> {
//FIXME: log violation
let path = self.serialization.split_off(path_start);
self.serialization.push('/');
- self.serialization.push_str(&path.trim_start_matches('/'));
+ self.serialization.push_str(path.trim_start_matches('/'));
}
input
@@ -1423,7 +1409,8 @@ impl<'a> Parser<'a> {
scheme_end: u32,
mut input: Input<'i>,
) -> Option<Input<'i>> {
- let mut query = String::new(); // FIXME: use a streaming decoder instead
+ let len = input.chars.as_str().len();
+ let mut query = String::with_capacity(len); // FIXME: use a streaming decoder instead
let mut remaining = None;
while let Some(c) = input.next() {
if c == '#' && self.context == Context::UrlParser {
@@ -1563,17 +1550,17 @@ fn is_normalized_windows_drive_letter(segment: &str) -> bool {
is_windows_drive_letter(segment) && segment.as_bytes()[1] == b':'
}
-/// Wether the scheme is file:, the path has a single segment, and that segment
+/// Whether the scheme is file:, the path has a single segment, and that segment
/// is a Windows drive letter
#[inline]
pub fn is_windows_drive_letter(segment: &str) -> bool {
segment.len() == 2 && starts_with_windows_drive_letter(segment)
}
-/// Wether path starts with a root slash
+/// Whether path starts with a root slash
/// and a windows drive letter eg: "/c:" or "/a:/"
fn path_starts_with_windows_drive_letter(s: &str) -> bool {
- if let Some(c) = s.as_bytes().get(0) {
+ if let Some(c) = s.as_bytes().first() {
matches!(c, b'/' | b'\\' | b'?' | b'#') && starts_with_windows_drive_letter(&s[1..])
} else {
false
diff --git a/src/quirks.rs b/src/quirks.rs
index 0dbc6eb..0674ebb 100644
--- a/src/quirks.rs
+++ b/src/quirks.rs
@@ -14,6 +14,49 @@
use crate::parser::{default_port, Context, Input, Parser, SchemeType};
use crate::{Host, ParseError, Position, Url};
+/// Internal components / offsets of a URL.
+///
+/// https://user@pass:example.com:1234/foo/bar?baz#quux
+/// | | | | ^^^^| | |
+/// | | | | | | | `----- fragment_start
+/// | | | | | | `--------- query_start
+/// | | | | | `----------------- path_start
+/// | | | | `--------------------- port
+/// | | | `----------------------- host_end
+/// | | `---------------------------------- host_start
+/// | `--------------------------------------- username_end
+/// `---------------------------------------------- scheme_end
+#[derive(Copy, Clone)]
+#[cfg(feature = "expose_internals")]
+pub struct InternalComponents {
+ pub scheme_end: u32,
+ pub username_end: u32,
+ pub host_start: u32,
+ pub host_end: u32,
+ pub port: Option<u16>,
+ pub path_start: u32,
+ pub query_start: Option<u32>,
+ pub fragment_start: Option<u32>,
+}
+
+/// Internal component / parsed offsets of the URL.
+///
+/// This can be useful for implementing efficient serialization
+/// for the URL.
+#[cfg(feature = "expose_internals")]
+pub fn internal_components(url: &Url) -> InternalComponents {
+ InternalComponents {
+ scheme_end: url.scheme_end,
+ username_end: url.username_end,
+ host_start: url.host_start,
+ host_end: url.host_end,
+ port: url.port,
+ path_start: url.path_start,
+ query_start: url.query_start,
+ fragment_start: url.fragment_start,
+ }
+}
+
/// https://url.spec.whatwg.org/#dom-url-domaintoascii
pub fn domain_to_ascii(domain: &str) -> String {
match Host::parse(domain) {
@@ -138,14 +181,10 @@ pub fn set_host(url: &mut Url, new_host: &str) -> Result<(), ()> {
}
}
// Make sure we won't set an empty host to a url with a username or a port
- if host == Host::Domain("".to_string()) {
- if !username(&url).is_empty() {
- return Err(());
- } else if let Some(Some(_)) = opt_port {
- return Err(());
- } else if url.port().is_some() {
- return Err(());
- }
+ if host == Host::Domain("".to_string())
+ && (!username(url).is_empty() || matches!(opt_port, Some(Some(_))) || url.port().is_some())
+ {
+ return Err(());
}
url.set_host_internal(host, opt_port);
Ok(())
@@ -177,10 +216,10 @@ pub fn set_hostname(url: &mut Url, new_hostname: &str) -> Result<(), ()> {
// Empty host on special not file url
if SchemeType::from(url.scheme()) == SchemeType::SpecialNotFile
// Port with an empty host
- ||!port(&url).is_empty()
+ ||!port(url).is_empty()
// Empty host that includes credentials
|| !url.username().is_empty()
- || !url.password().unwrap_or(&"").is_empty()
+ || !url.password().unwrap_or("").is_empty()
{
return Err(());
}