diff options
author | Jeff Vander Stoep <jeffv@google.com> | 2022-12-13 09:19:21 +0100 |
---|---|---|
committer | Jeff Vander Stoep <jeffv@google.com> | 2022-12-13 09:19:21 +0100 |
commit | f983a87abb31060fd161399e657d20fa3940a57d (patch) | |
tree | e74967a2ccaad34b918be498fe7f0d7b64c90527 | |
parent | 5d2fd3007cb69905e0eb3397527471c25859c651 (diff) | |
download | os_str_bytes-f983a87abb31060fd161399e657d20fa3940a57d.tar.gz |
Update os_str_bytes to 6.4.1
Test: Treehugger
Change-Id: Ia699ac963537a1a53c8c18ad0451db025ea1fada
-rw-r--r-- | Android.bp | 145 | ||||
-rw-r--r-- | Cargo.toml | 81 | ||||
-rw-r--r-- | Cargo.toml.orig | 34 | ||||
l--------- | LICENSE | 2 | ||||
-rw-r--r-- | METADATA | 21 | ||||
-rw-r--r-- | MODULE_LICENSE_APACHE2 (renamed from MODULE_LICENSE_MIT) | 0 | ||||
-rw-r--r-- | README.md | 9 | ||||
-rw-r--r-- | cargo2android.json | 5 | ||||
-rw-r--r-- | rustfmt.toml | 1 | ||||
-rw-r--r-- | src/common/mod.rs | 2 | ||||
-rw-r--r-- | src/common/raw.rs | 7 | ||||
-rw-r--r-- | src/iter.rs | 58 | ||||
-rw-r--r-- | src/lib.rs | 349 | ||||
-rw-r--r-- | src/pattern.rs | 8 | ||||
-rw-r--r-- | src/raw_str.rs | 785 | ||||
-rw-r--r-- | src/util.rs | 1 | ||||
-rw-r--r-- | src/wasm/mod.rs (renamed from src/wasm32/mod.rs) | 16 | ||||
-rw-r--r-- | src/wasm/raw.rs (renamed from src/wasm32/raw.rs) | 35 | ||||
-rw-r--r-- | src/windows/mod.rs | 101 | ||||
-rw-r--r-- | src/windows/raw.rs | 12 | ||||
-rw-r--r-- | src/windows/wtf8/code_points.rs | 40 | ||||
-rw-r--r-- | src/windows/wtf8/convert.rs | 47 | ||||
-rw-r--r-- | src/windows/wtf8/string.rs | 42 | ||||
-rw-r--r-- | tests/common.rs | 94 | ||||
-rw-r--r-- | tests/debug.rs | 34 | ||||
-rw-r--r-- | tests/edge_cases.rs | 7 | ||||
-rw-r--r-- | tests/index.rs | 86 | ||||
-rw-r--r-- | tests/integration.rs | 75 | ||||
-rw-r--r-- | tests/random.rs | 126 | ||||
-rw-r--r-- | tests/raw.rs | 108 |
30 files changed, 1168 insertions, 1163 deletions
@@ -1,51 +1,14 @@ -// This file is generated by cargo2android.py --run --device --tests. +// This file is generated by cargo2android.py --config cargo2android.json. // Do not modify this file as changes will be overridden on upgrade. -package { - default_applicable_licenses: ["external_rust_crates_os_str_bytes_license"], -} - -// Added automatically by a large-scale-change that took the approach of -// 'apply every license found to every target'. While this makes sure we respect -// every license restriction, it may not be entirely correct. -// -// e.g. GPL in an MIT project might only apply to the contrib/ directory. -// -// Please consider splitting the single license below into multiple licenses, -// taking care not to lose any license_kind information, and overriding the -// default license using the 'licenses: [...]' property on targets as needed. -// -// For unused files, consider creating a 'fileGroup' with "//visibility:private" -// to attach the license to, and including a comment whether the files may be -// used in the current project. -// -// large-scale-change included anything that looked like it might be a license -// text as a license_text. e.g. LICENSE, NOTICE, COPYING etc. -// -// Please consider removing redundant or irrelevant files from 'license_text:'. -// See: http://go/android-license-faq -license { - name: "external_rust_crates_os_str_bytes_license", - visibility: [":__subpackages__"], - license_kinds: [ - "SPDX-license-identifier-Apache-2.0", - "SPDX-license-identifier-MIT", - ], - license_text: [ - "COPYRIGHT", - "LICENSE-APACHE", - "LICENSE-MIT", - ], -} - rust_library { name: "libos_str_bytes", host_supported: true, crate_name: "os_str_bytes", cargo_env_compat: true, - cargo_pkg_version: "6.1.0", + cargo_pkg_version: "6.4.1", srcs: ["src/lib.rs"], edition: "2021", features: [ @@ -61,107 +24,3 @@ rust_library { "//apex_available:anyapex", ], } - -rust_test { - name: "os_str_bytes_test_src_lib", - host_supported: true, - crate_name: "os_str_bytes", - cargo_env_compat: true, - cargo_pkg_version: "6.1.0", - srcs: ["src/lib.rs"], - test_suites: ["general-tests"], - auto_gen_config: true, - test_options: { - unit_test: true, - }, - edition: "2021", - features: [ - "default", - "memchr", - "raw_os_str", - ], - rustlibs: [ - "libgetrandom", - "libmemchr", - ], -} - -rust_defaults { - name: "os_str_bytes_test_defaults", - crate_name: "os_str_bytes", - cargo_env_compat: true, - cargo_pkg_version: "6.1.0", - test_suites: ["general-tests"], - auto_gen_config: true, - edition: "2021", - features: [ - "default", - "memchr", - "raw_os_str", - ], - rustlibs: [ - "libgetrandom", - "libmemchr", - "libos_str_bytes", - ], -} - -rust_test { - name: "os_str_bytes_test_tests_debug", - defaults: ["os_str_bytes_test_defaults"], - host_supported: true, - srcs: ["tests/debug.rs"], - test_options: { - unit_test: true, - }, -} - -rust_test { - name: "os_str_bytes_test_tests_edge_cases", - defaults: ["os_str_bytes_test_defaults"], - host_supported: true, - srcs: ["tests/edge_cases.rs"], - test_options: { - unit_test: true, - }, -} - -rust_test { - name: "os_str_bytes_test_tests_index", - defaults: ["os_str_bytes_test_defaults"], - host_supported: true, - srcs: ["tests/index.rs"], - test_options: { - unit_test: true, - }, -} - -rust_test { - name: "os_str_bytes_test_tests_integration", - defaults: ["os_str_bytes_test_defaults"], - host_supported: true, - srcs: ["tests/integration.rs"], - test_options: { - unit_test: true, - }, -} - -rust_test { - name: "os_str_bytes_test_tests_random", - defaults: ["os_str_bytes_test_defaults"], - host_supported: true, - srcs: ["tests/random.rs"], - test_options: { - unit_test: true, - }, -} - -rust_test { - name: "os_str_bytes_test_tests_raw", - defaults: ["os_str_bytes_test_defaults"], - host_supported: true, - srcs: ["tests/raw.rs"], - test_options: { - unit_test: true, - }, -} @@ -1,33 +1,78 @@ +# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO +# +# When uploading crates to the registry Cargo will automatically +# "normalize" Cargo.toml files for maximal compatibility +# with all versions of Cargo and also rewrite `path` dependencies +# to registry (e.g., crates.io) dependencies. +# +# If you are reading this file be aware that the original Cargo.toml +# will likely look very different (and much more reasonable). +# See Cargo.toml.orig for the original contents. + [package] -name = "os_str_bytes" -version = "6.1.0" -authors = ["dylni"] edition = "2021" rust-version = "1.57.0" +name = "os_str_bytes" +version = "6.4.1" +authors = ["dylni"] +exclude = [ + ".*", + "tests.rs", + "/rustfmt.toml", + "/src/bin", + "/tests", +] description = """ -Utilities for converting between byte sequences and platform-native strings +Convert between byte sequences and platform-native strings """ readme = "README.md" -repository = "https://github.com/dylni/os_str_bytes" +keywords = [ + "bytes", + "osstr", + "osstring", + "path", + "windows", +] +categories = [ + "command-line-interface", + "development-tools::ffi", + "encoding", + "os", + "rust-patterns", +] license = "MIT OR Apache-2.0" -keywords = ["bytes", "osstr", "osstring", "path", "windows"] -categories = ["command-line-interface", "development-tools::ffi", "encoding", "os", "rust-patterns"] -exclude = [".*", "/rustfmt.toml", "/tests"] +repository = "https://github.com/dylni/os_str_bytes" [package.metadata.docs.rs] all-features = true -rustc-args = ["--cfg", "os_str_bytes_docs_rs"] -rustdoc-args = ["--cfg", "os_str_bytes_docs_rs"] +rustc-args = [ + "--cfg", + "os_str_bytes_docs_rs", +] +rustdoc-args = [ + "--cfg", + "os_str_bytes_docs_rs", +] -[dependencies] -memchr = { version = "2.4", optional = true } -print_bytes = { version = "0.6", optional = true } -uniquote = { version = "3.0", optional = true } +[dependencies.memchr] +version = "2.4" +optional = true -[dev-dependencies] -getrandom = "0.2" +[dependencies.print_bytes] +version = "0.7" +optional = true -[features] -default = ["memchr", "raw_os_str"] +[dependencies.uniquote] +version = "3.0" +optional = true +[dev-dependencies.getrandom] +version = "0.2" + +[features] +checked_conversions = [] +default = [ + "memchr", + "raw_os_str", +] raw_os_str = [] diff --git a/Cargo.toml.orig b/Cargo.toml.orig new file mode 100644 index 0000000..dd388f2 --- /dev/null +++ b/Cargo.toml.orig @@ -0,0 +1,34 @@ +[package] +name = "os_str_bytes" +version = "6.4.1" +authors = ["dylni"] +edition = "2021" +rust-version = "1.57.0" +description = """ +Convert between byte sequences and platform-native strings +""" +readme = "README.md" +repository = "https://github.com/dylni/os_str_bytes" +license = "MIT OR Apache-2.0" +keywords = ["bytes", "osstr", "osstring", "path", "windows"] +categories = ["command-line-interface", "development-tools::ffi", "encoding", "os", "rust-patterns"] +exclude = [".*", "tests.rs", "/rustfmt.toml", "/src/bin", "/tests"] + +[package.metadata.docs.rs] +all-features = true +rustc-args = ["--cfg", "os_str_bytes_docs_rs"] +rustdoc-args = ["--cfg", "os_str_bytes_docs_rs"] + +[dependencies] +memchr = { version = "2.4", optional = true } +print_bytes = { version = "0.7", optional = true } +uniquote = { version = "3.0", optional = true } + +[dev-dependencies] +getrandom = "0.2" + +[features] +default = ["memchr", "raw_os_str"] + +checked_conversions = [] +raw_os_str = [] @@ -1 +1 @@ -LICENSE-MIT
\ No newline at end of file +LICENSE-APACHE
\ No newline at end of file @@ -1,13 +1,20 @@ name: "os_str_bytes" -description: - "This crate allows interacting with the data stored by OsStr and OsString, without resorting to panics or corruption for invalid UTF-8. Thus, methods can be used that are already defined on [u8] and Vec<u8>." - +description: "()" third_party { url { - type: GIT - value: "https://github.com/dylni/os_str_bytes" + type: HOMEPAGE + value: "https://crates.io/crates/os_str_bytes" } - version: "6.1.0" - last_upgrade_date { year: 2022 month: 6 day: 29 } + url { + type: ARCHIVE + value: "https://static.crates.io/crates/os_str_bytes/os_str_bytes-6.4.1.crate" + } + version: "6.4.1" + # Dual-licensed, using the least restrictive per go/thirdpartylicenses#same. license_type: NOTICE + last_upgrade_date { + year: 2022 + month: 12 + day: 13 + } } diff --git a/MODULE_LICENSE_MIT b/MODULE_LICENSE_APACHE2 index e69de29..e69de29 100644 --- a/MODULE_LICENSE_MIT +++ b/MODULE_LICENSE_APACHE2 @@ -19,7 +19,7 @@ Add the following lines to your "Cargo.toml" file: ```toml [dependencies] -os_str_bytes = "6.1" +os_str_bytes = "6.4" ``` See the [documentation] for available functionality and examples. @@ -69,6 +69,11 @@ The minimum supported Rust toolchain version depends on the platform: <td><code>*-*-windows-*</code></td> <td>1.57.0</td> </tr> + <tr> + <td>Xous</td> + <td><code>*-*-xous-*</code></td> + <td>unstable</td> + </tr> </table> Minor version updates may increase these version requirements. However, the @@ -78,7 +83,7 @@ crate's minor version: ```toml [dependencies] -os_str_bytes = "~6.1" +os_str_bytes = "~6.4" ``` ## License diff --git a/cargo2android.json b/cargo2android.json new file mode 100644 index 0000000..ff6df50 --- /dev/null +++ b/cargo2android.json @@ -0,0 +1,5 @@ +{ + "device": true, + "run": true, + "tests": true +} diff --git a/rustfmt.toml b/rustfmt.toml deleted file mode 100644 index a1ffd27..0000000 --- a/rustfmt.toml +++ /dev/null @@ -1 +0,0 @@ -max_width = 79 diff --git a/src/common/mod.rs b/src/common/mod.rs index dd49890..e28aba6 100644 --- a/src/common/mod.rs +++ b/src/common/mod.rs @@ -12,6 +12,8 @@ use std::os::solid as os; use std::os::unix as os; #[cfg(target_os = "wasi")] use std::os::wasi as os; +#[cfg(target_os = "xous")] +use std::os::xous as os; use os::ffi::OsStrExt; use os::ffi::OsStringExt; diff --git a/src/common/raw.rs b/src/common/raw.rs index 070a62c..97d0353 100644 --- a/src/common/raw.rs +++ b/src/common/raw.rs @@ -1,12 +1,19 @@ use std::fmt; use std::fmt::Formatter; +use super::Result; + #[inline(always)] pub(crate) const fn is_continuation(_: u8) -> bool { false } #[inline(always)] +pub(crate) fn validate_bytes(_: &[u8]) -> Result<()> { + Ok(()) +} + +#[inline(always)] pub(crate) fn decode_code_point(_: &[u8]) -> u32 { unreachable!(); } diff --git a/src/iter.rs b/src/iter.rs index 5cb7299..03ff982 100644 --- a/src/iter.rs +++ b/src/iter.rs @@ -2,11 +2,11 @@ #![cfg_attr(os_str_bytes_docs_rs, doc(cfg(feature = "raw_os_str")))] +use std::convert; use std::fmt; use std::fmt::Debug; use std::fmt::Formatter; use std::iter::FusedIterator; -use std::str; use super::pattern::Encoded; use super::Pattern; @@ -29,6 +29,7 @@ impl<'a, P> Split<'a, P> where P: Pattern, { + #[track_caller] pub(super) fn new(string: &'a RawOsStr, pat: P) -> Self { let pat = pat.__encode(); assert!( @@ -56,31 +57,6 @@ macro_rules! impl_next { }}; } -impl<P> DoubleEndedIterator for Split<'_, P> -where - P: Pattern, -{ - fn next_back(&mut self) -> Option<Self::Item> { - impl_next!(self, rsplit_once_raw, |(prefix, suffix)| (suffix, prefix)) - } -} - -impl<'a, P> Iterator for Split<'a, P> -where - P: Pattern, -{ - type Item = &'a RawOsStr; - - #[inline] - fn last(mut self) -> Option<Self::Item> { - self.next_back() - } - - fn next(&mut self) -> Option<Self::Item> { - impl_next!(self, split_once_raw, |x| x) - } -} - impl<P> Clone for Split<'_, P> where P: Pattern, @@ -102,12 +78,34 @@ where fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { f.debug_struct("Split") .field("string", &self.string) - .field( - "pat", - &str::from_utf8(self.pat.__get()).expect("invalid pattern"), - ) + .field("pat", &self.pat) .finish() } } +impl<P> DoubleEndedIterator for Split<'_, P> +where + P: Pattern, +{ + fn next_back(&mut self) -> Option<Self::Item> { + impl_next!(self, rsplit_once_raw, |(prefix, suffix)| (suffix, prefix)) + } +} + impl<P> FusedIterator for Split<'_, P> where P: Pattern {} + +impl<'a, P> Iterator for Split<'a, P> +where + P: Pattern, +{ + type Item = &'a RawOsStr; + + #[inline] + fn last(mut self) -> Option<Self::Item> { + self.next_back() + } + + fn next(&mut self) -> Option<Self::Item> { + impl_next!(self, split_once_raw, convert::identity) + } +} @@ -21,9 +21,9 @@ //! However, the following invariants will always be upheld: //! //! - The encoding will be compatible with UTF-8. In particular, splitting an -//! encoded byte sequence by a UTF-8–encoded character always produces other -//! valid byte sequences. They can be re-encoded without error using -//! [`OsStrBytes::from_raw_bytes`] and similar methods. +//! encoded byte sequence by a UTF-8–encoded character always produces +//! other valid byte sequences. They can be re-encoded without error using +//! [`RawOsString::into_os_string`] and similar methods. //! //! - All characters valid in platform strings are representable. [`OsStr`] and //! [`OsString`] can always be losslessly reconstructed from extracted bytes. @@ -73,10 +73,27 @@ //! For more information, see [`RawOsStr`][memchr complexity]. //! //! - **raw\_os\_str** - -//! Enables use of [`RawOsStr`] and [`RawOsString`]. +//! Provides: +//! - [`iter`] +//! - [`Pattern`] +//! - [`RawOsStr`] +//! - [`RawOsStrCow`] +//! - [`RawOsString`] //! //! ### Optional Features //! +//! - **checked\_conversions** - +//! Provides: +//! - [`EncodingError`] +//! - [`OsStrBytes::from_raw_bytes`] +//! - [`OsStringBytes::from_raw_vec`] +//! - [`RawOsStr::from_raw_bytes`] +//! - [`RawOsString::from_raw_vec`] +//! +//! Because this feature should not be used in libraries, the +//! "OS_STR_BYTES_CHECKED_CONVERSIONS" environment variable must be defined +//! during compilation. +//! //! - **print\_bytes** - //! Provides implementations of [`print_bytes::ToBytes`] for [`RawOsStr`] and //! [`RawOsString`]. @@ -98,21 +115,23 @@ //! //! # Complexity //! -//! The time complexities of trait methods will vary based on what -//! functionality is available for the platform. At worst, they will all be -//! linear, but some can take constant time. For example, -//! [`OsStringBytes::from_raw_vec`] might be able to reuse the allocation for -//! its argument. +//! Conversion method complexities will vary based on what functionality is +//! available for the platform. At worst, they will all be linear, but some can +//! take constant time. For example, [`RawOsString::into_os_string`] might be +//! able to reuse its allocation. //! //! # Examples //! //! ``` +//! # use std::io; +//! # +//! # #[cfg(feature = "raw_os_str")] +//! # { //! # #[cfg(any())] //! use std::env; //! use std::fs; -//! # use std::io; //! -//! use os_str_bytes::OsStrBytes; +//! use os_str_bytes::RawOsStr; //! //! # mod env { //! # use std::env; @@ -126,12 +145,13 @@ //! # } //! # //! for file in env::args_os().skip(1) { -//! if file.to_raw_bytes().first() != Some(&b'-') { +//! if !RawOsStr::new(&file).starts_with('-') { //! let string = "Hello, world!"; //! fs::write(&file, string)?; //! assert_eq!(string, fs::read_to_string(file)?); //! } //! } +//! # } //! # //! # Ok::<_, io::Error>(()) //! ``` @@ -146,6 +166,7 @@ //! [sealed]: https://rust-lang.github.io/api-guidelines/future-proofing.html#c-sealed //! [print\_bytes]: https://crates.io/crates/print_bytes +#![cfg_attr(not(feature = "checked_conversions"), allow(deprecated))] // Only require a nightly compiler when building documentation for docs.rs. // This is a private option that should not be used. // https://github.com/rust-lang/docs.rs/issues/147#issuecomment-389544407 @@ -170,6 +191,41 @@ use std::path::Path; use std::path::PathBuf; use std::result; +macro_rules! if_checked_conversions { + ( $($item:item)+ ) => { + $( + #[cfg(feature = "checked_conversions")] + $item + )+ + }; +} + +#[cfg(not(os_str_bytes_docs_rs))] +if_checked_conversions! { + const _: &str = env!( + "OS_STR_BYTES_CHECKED_CONVERSIONS", + "The 'OS_STR_BYTES_CHECKED_CONVERSIONS' environment variable must be \ + defined to use the 'checked_conversions' feature.", + ); +} + +#[rustfmt::skip] +macro_rules! deprecated_checked_conversion { + ( $message:expr , $item:item ) => { + #[cfg_attr( + not(feature = "checked_conversions"), + deprecated = $message + )] + $item + }; +} + +macro_rules! expect_encoded { + ( $result:expr ) => { + $result.expect("invalid raw bytes") + }; +} + macro_rules! if_raw_str { ( $($item:item)+ ) => { $( @@ -180,16 +236,24 @@ macro_rules! if_raw_str { } #[cfg_attr( - all(target_arch = "wasm32", target_os = "unknown"), - path = "wasm32/mod.rs" + all(target_family = "wasm", target_os = "unknown"), + path = "wasm/mod.rs" )] #[cfg_attr(windows, path = "windows/mod.rs")] #[cfg_attr( - not(any(all(target_arch = "wasm32", target_os = "unknown"), windows)), + not(any(all(target_family = "wasm", target_os = "unknown"), windows)), path = "common/mod.rs" )] mod imp; +#[cfg(any( + all( + feature = "raw_os_str", + target_family = "wasm", + target_os = "unknown", + ), + windows, +))] mod util; if_raw_str! { @@ -200,32 +264,43 @@ if_raw_str! { mod raw_str; pub use raw_str::RawOsStr; + pub use raw_str::RawOsStrCow; pub use raw_str::RawOsString; } -/// The error that occurs when a byte sequence is not representable in the -/// platform encoding. -/// -/// [`Result::unwrap`] should almost always be called on results containing -/// this error. It should be known whether or not byte sequences are properly -/// encoded for the platform, since [the module-level documentation][encoding] -/// discourages using encoded bytes in interchange. Results are returned -/// primarily to make panicking behavior explicit. -/// -/// On Unix, this error is never returned, but [`OsStrExt`] or [`OsStringExt`] -/// should be used instead if that needs to be guaranteed. -/// -/// [encoding]: self#encoding -/// [`OsStrExt`]: ::std::os::unix::ffi::OsStrExt -/// [`OsStringExt`]: ::std::os::unix::ffi::OsStringExt -/// [`Result::unwrap`]: ::std::result::Result::unwrap -#[derive(Debug, Eq, PartialEq)] -pub struct EncodingError(imp::EncodingError); +deprecated_checked_conversion! { + "use `OsStrBytes::assert_from_raw_bytes` or \ + `OsStringBytes::assert_from_raw_vec` instead, or enable the \ + 'checked_conversions' feature", + /// The error that occurs when a byte sequence is not representable in the + /// platform encoding. + /// + /// [`Result::unwrap`] should almost always be called on results containing + /// this error. It should be known whether or not byte sequences are + /// properly encoded for the platform, since [the module-level + /// documentation][encoding] discourages using encoded bytes in + /// interchange. Results are returned primarily to make panicking behavior + /// explicit. + /// + /// On Unix, this error is never returned, but [`OsStrExt`] or + /// [`OsStringExt`] should be used instead if that needs to be guaranteed. + /// + /// [encoding]: self#encoding + /// [`OsStrExt`]: ::std::os::unix::ffi::OsStrExt + /// [`OsStringExt`]: ::std::os::unix::ffi::OsStringExt + /// [`Result::unwrap`]: ::std::result::Result::unwrap + #[derive(Clone, Debug, Eq, PartialEq)] + #[cfg_attr( + os_str_bytes_docs_rs, + doc(cfg(feature = "checked_conversions")) + )] + pub struct EncodingError(imp::EncodingError); +} impl Display for EncodingError { #[inline] - fn fmt(&self, formatter: &mut Formatter<'_>) -> fmt::Result { - self.0.fmt(formatter) + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + self.0.fmt(f) } } @@ -233,6 +308,25 @@ impl Error for EncodingError {} type Result<T> = result::Result<T, EncodingError>; +fn from_raw_bytes<'a, S>( + string: S, +) -> result::Result<Cow<'a, OsStr>, imp::EncodingError> +where + S: Into<Cow<'a, [u8]>>, +{ + match string.into() { + Cow::Borrowed(string) => imp::os_str_from_bytes(string), + Cow::Owned(string) => imp::os_string_from_vec(string).map(Cow::Owned), + } +} + +fn cow_os_str_into_path(string: Cow<'_, OsStr>) -> Cow<'_, Path> { + match string { + Cow::Borrowed(string) => Cow::Borrowed(Path::new(string)), + Cow::Owned(string) => Cow::Owned(string.into()), + } +} + /// A platform agnostic variant of [`OsStrExt`]. /// /// For more information, see [the module-level documentation][module]. @@ -240,14 +334,12 @@ type Result<T> = result::Result<T, EncodingError>; /// [module]: self /// [`OsStrExt`]: ::std::os::unix::ffi::OsStrExt pub trait OsStrBytes: private::Sealed + ToOwned { - /// Converts a byte slice into an equivalent platform-native string. - /// - /// Provided byte strings should always be valid for the [unspecified - /// encoding] used by this crate. + /// Converts a byte string into an equivalent platform-native string. /// - /// # Errors + /// # Panics /// - /// See documentation for [`EncodingError`]. + /// Panics if the string is not valid for the [unspecified encoding] used + /// by this crate. /// /// # Examples /// @@ -260,32 +352,70 @@ pub trait OsStrBytes: private::Sealed + ToOwned { /// /// let os_string = env::current_exe()?; /// let os_bytes = os_string.to_raw_bytes(); - /// assert_eq!(os_string, OsStr::from_raw_bytes(os_bytes).unwrap()); + /// assert_eq!(os_string, OsStr::assert_from_raw_bytes(os_bytes)); /// # /// # Ok::<_, io::Error>(()) /// ``` /// /// [unspecified encoding]: self#encoding - fn from_raw_bytes<'a, S>(string: S) -> Result<Cow<'a, Self>> + #[must_use = "method should not be used for validation"] + #[track_caller] + fn assert_from_raw_bytes<'a, S>(string: S) -> Cow<'a, Self> where S: Into<Cow<'a, [u8]>>; - /// Converts a platform-native string into an equivalent byte slice. + deprecated_checked_conversion! { + "use `assert_from_raw_bytes` instead, or enable the \ + 'checked_conversions' feature", + /// Converts a byte string into an equivalent platform-native string. + /// + /// [`assert_from_raw_bytes`] should almost always be used instead. For + /// more information, see [`EncodingError`]. + /// + /// # Errors + /// + /// See documentation for [`EncodingError`]. + /// + /// # Examples + /// + /// ``` + /// use std::env; + /// use std::ffi::OsStr; + /// # use std::io; + /// + /// use os_str_bytes::OsStrBytes; + /// + /// let os_string = env::current_exe()?; + /// let os_bytes = os_string.to_raw_bytes(); + /// assert_eq!(os_string, OsStr::from_raw_bytes(os_bytes).unwrap()); + /// # + /// # Ok::<_, io::Error>(()) + /// ``` + /// + /// [`assert_from_raw_bytes`]: Self::assert_from_raw_bytes + #[cfg_attr( + os_str_bytes_docs_rs, + doc(cfg(feature = "checked_conversions")) + )] + fn from_raw_bytes<'a, S>(string: S) -> Result<Cow<'a, Self>> + where + S: Into<Cow<'a, [u8]>>; + } + + /// Converts a platform-native string into an equivalent byte string. /// - /// The returned bytes string will use an [unspecified encoding]. + /// The returned string will use an [unspecified encoding]. /// /// # Examples /// /// ``` - /// use std::env; - /// # use std::io; + /// use std::ffi::OsStr; /// /// use os_str_bytes::OsStrBytes; /// - /// let os_string = env::current_exe()?; - /// println!("{:?}", os_string.to_raw_bytes()); - /// # - /// # Ok::<_, io::Error>(()) + /// let string = "foobar"; + /// let os_string = OsStr::new(string); + /// assert_eq!(string.as_bytes(), &*os_string.to_raw_bytes()); /// ``` /// /// [unspecified encoding]: self#encoding @@ -295,18 +425,19 @@ pub trait OsStrBytes: private::Sealed + ToOwned { impl OsStrBytes for OsStr { #[inline] + fn assert_from_raw_bytes<'a, S>(string: S) -> Cow<'a, Self> + where + S: Into<Cow<'a, [u8]>>, + { + expect_encoded!(from_raw_bytes(string)) + } + + #[inline] fn from_raw_bytes<'a, S>(string: S) -> Result<Cow<'a, Self>> where S: Into<Cow<'a, [u8]>>, { - match string.into() { - Cow::Borrowed(string) => { - imp::os_str_from_bytes(string).map_err(EncodingError) - } - Cow::Owned(string) => { - OsStringBytes::from_raw_vec(string).map(Cow::Owned) - } - } + from_raw_bytes(string).map_err(EncodingError) } #[inline] @@ -317,14 +448,19 @@ impl OsStrBytes for OsStr { impl OsStrBytes for Path { #[inline] + fn assert_from_raw_bytes<'a, S>(string: S) -> Cow<'a, Self> + where + S: Into<Cow<'a, [u8]>>, + { + cow_os_str_into_path(OsStr::assert_from_raw_bytes(string)) + } + + #[inline] fn from_raw_bytes<'a, S>(string: S) -> Result<Cow<'a, Self>> where S: Into<Cow<'a, [u8]>>, { - OsStr::from_raw_bytes(string).map(|os_string| match os_string { - Cow::Borrowed(os_string) => Cow::Borrowed(Self::new(os_string)), - Cow::Owned(os_string) => Cow::Owned(os_string.into()), - }) + OsStr::from_raw_bytes(string).map(cow_os_str_into_path) } #[inline] @@ -340,14 +476,12 @@ impl OsStrBytes for Path { /// [module]: self /// [`OsStringExt`]: ::std::os::unix::ffi::OsStringExt pub trait OsStringBytes: private::Sealed + Sized { - /// Converts a byte vector into an equivalent platform-native string. + /// Converts a byte string into an equivalent platform-native string. /// - /// Provided byte strings should always be valid for the [unspecified - /// encoding] used by this crate. + /// # Panics /// - /// # Errors - /// - /// See documentation for [`EncodingError`]. + /// Panics if the string is not valid for the [unspecified encoding] used + /// by this crate. /// /// # Examples /// @@ -360,30 +494,66 @@ pub trait OsStringBytes: private::Sealed + Sized { /// /// let os_string = env::current_exe()?; /// let os_bytes = os_string.clone().into_raw_vec(); - /// assert_eq!(os_string, OsString::from_raw_vec(os_bytes).unwrap()); + /// assert_eq!(os_string, OsString::assert_from_raw_vec(os_bytes)); /// # /// # Ok::<_, io::Error>(()) /// ``` /// /// [unspecified encoding]: self#encoding - fn from_raw_vec(string: Vec<u8>) -> Result<Self>; + #[must_use = "method should not be used for validation"] + #[track_caller] + fn assert_from_raw_vec(string: Vec<u8>) -> Self; - /// Converts a platform-native string into an equivalent byte vector. + deprecated_checked_conversion! { + "use `assert_from_raw_vec` instead, or enable the \ + 'checked_conversions' feature", + /// Converts a byte string into an equivalent platform-native string. + /// + /// [`assert_from_raw_vec`] should almost always be used instead. For + /// more information, see [`EncodingError`]. + /// + /// # Errors + /// + /// See documentation for [`EncodingError`]. + /// + /// # Examples + /// + /// ``` + /// use std::env; + /// use std::ffi::OsString; + /// # use std::io; + /// + /// use os_str_bytes::OsStringBytes; + /// + /// let os_string = env::current_exe()?; + /// let os_bytes = os_string.clone().into_raw_vec(); + /// assert_eq!(os_string, OsString::from_raw_vec(os_bytes).unwrap()); + /// # + /// # Ok::<_, io::Error>(()) + /// ``` + /// + /// [`assert_from_raw_vec`]: Self::assert_from_raw_vec + #[cfg_attr( + os_str_bytes_docs_rs, + doc(cfg(feature = "checked_conversions")) + )] + fn from_raw_vec(string: Vec<u8>) -> Result<Self>; + } + + /// Converts a platform-native string into an equivalent byte string. /// - /// The returned byte string will use an [unspecified encoding]. + /// The returned string will use an [unspecified encoding]. /// /// # Examples /// /// ``` - /// use std::env; - /// # use std::io; + /// use std::ffi::OsString; /// /// use os_str_bytes::OsStringBytes; /// - /// let os_string = env::current_exe()?; - /// println!("{:?}", os_string.into_raw_vec()); - /// # - /// # Ok::<_, io::Error>(()) + /// let string = "foobar".to_owned(); + /// let os_string: OsString = string.clone().into(); + /// assert_eq!(string.into_bytes(), os_string.into_raw_vec()); /// ``` /// /// [unspecified encoding]: self#encoding @@ -393,6 +563,11 @@ pub trait OsStringBytes: private::Sealed + Sized { impl OsStringBytes for OsString { #[inline] + fn assert_from_raw_vec(string: Vec<u8>) -> Self { + expect_encoded!(imp::os_string_from_vec(string)) + } + + #[inline] fn from_raw_vec(string: Vec<u8>) -> Result<Self> { imp::os_string_from_vec(string).map_err(EncodingError) } @@ -405,6 +580,11 @@ impl OsStringBytes for OsString { impl OsStringBytes for PathBuf { #[inline] + fn assert_from_raw_vec(string: Vec<u8>) -> Self { + OsString::assert_from_raw_vec(string).into() + } + + #[inline] fn from_raw_vec(string: Vec<u8>) -> Result<Self> { OsString::from_raw_vec(string).map(Into::into) } @@ -421,7 +601,14 @@ mod private { use std::path::Path; use std::path::PathBuf; + if_raw_str! { + use std::borrow::Cow; + + use super::RawOsStr; + } + pub trait Sealed {} + impl Sealed for char {} impl Sealed for OsStr {} impl Sealed for OsString {} @@ -429,4 +616,8 @@ mod private { impl Sealed for PathBuf {} impl Sealed for &str {} impl Sealed for &String {} + + if_raw_str! { + impl Sealed for Cow<'_, RawOsStr> {} + } } diff --git a/src/pattern.rs b/src/pattern.rs index 267a679..11f86bf 100644 --- a/src/pattern.rs +++ b/src/pattern.rs @@ -1,24 +1,24 @@ +use std::fmt::Debug; + use super::private; pub trait Encoded { fn __get(&self) -> &[u8]; } -#[derive(Clone)] +#[derive(Clone, Debug)] pub struct EncodedChar { buffer: [u8; 4], length: usize, } impl Encoded for EncodedChar { - #[inline] fn __get(&self) -> &[u8] { &self.buffer[..self.length] } } impl Encoded for &str { - #[inline] fn __get(&self) -> &[u8] { self.as_bytes() } @@ -35,7 +35,7 @@ impl Encoded for &str { #[cfg_attr(os_str_bytes_docs_rs, doc(cfg(feature = "raw_os_str")))] pub trait Pattern: private::Sealed { #[doc(hidden)] - type __Encoded: Clone + Encoded; + type __Encoded: Clone + Debug + Encoded; #[doc(hidden)] fn __encode(self) -> Self::__Encoded; diff --git a/src/raw_str.rs b/src/raw_str.rs index ccec858..659b34d 100644 --- a/src/raw_str.rs +++ b/src/raw_str.rs @@ -16,6 +16,7 @@ use std::ops::RangeFull; use std::ops::RangeInclusive; use std::ops::RangeTo; use std::ops::RangeToInclusive; +use std::result; use std::str; #[cfg(feature = "memchr")] @@ -23,73 +24,54 @@ use memchr::memmem::find; #[cfg(feature = "memchr")] use memchr::memmem::rfind; +use super::imp; use super::imp::raw; use super::iter::Split; use super::pattern::Encoded as EncodedPattern; -use super::OsStrBytes; -use super::OsStringBytes; +use super::private; use super::Pattern; +if_checked_conversions! { + use super::EncodingError; + use super::Result; +} + #[cfg(not(feature = "memchr"))] fn find(string: &[u8], pat: &[u8]) -> Option<usize> { - for i in 0..=string.len().checked_sub(pat.len())? { - if string[i..].starts_with(pat) { - return Some(i); - } - } - None + (0..=string.len().checked_sub(pat.len())?) + .find(|&x| string[x..].starts_with(pat)) } #[cfg(not(feature = "memchr"))] fn rfind(string: &[u8], pat: &[u8]) -> Option<usize> { - for i in (pat.len()..=string.len()).rev() { - if string[..i].ends_with(pat) { - return Some(i - pat.len()); - } - } - None + (pat.len()..=string.len()) + .rfind(|&x| string[..x].ends_with(pat)) + .map(|x| x - pat.len()) } -macro_rules! impl_trim_matches { - ( $self:ident , $pat:expr , $strip_method:ident ) => {{ - let pat = $pat.__encode(); - let pat = pat.__get(); - if pat.is_empty() { - return $self; - } - - let mut string = &$self.0; - while let Some(substring) = string.$strip_method(pat) { - string = substring; - } - Self::from_raw_bytes_unchecked(string) - }}; +#[allow(clippy::missing_safety_doc)] +unsafe trait TransmuteBox { + fn transmute_box<R>(self: Box<Self>) -> Box<R> + where + R: ?Sized + TransmuteBox, + { + let value = Box::into_raw(self); + // SAFETY: This trait is only implemented for types that can be + // transmuted. + unsafe { Box::from_raw(mem::transmute_copy(&value)) } + } } -macro_rules! impl_split_once_raw { - ( $self:ident , $pat:expr , $find_fn:expr ) => {{ - let pat = $pat.__get(); - - let index = $find_fn(&$self.0, pat)?; - let prefix = &$self.0[..index]; - let suffix = &$self.0[index + pat.len()..]; - Some(( - Self::from_raw_bytes_unchecked(prefix), - Self::from_raw_bytes_unchecked(suffix), - )) - }}; -} +// SAFETY: This struct has a layout that makes this operation safe. +unsafe impl TransmuteBox for RawOsStr {} +unsafe impl TransmuteBox for [u8] {} -/// A container for the byte strings converted by [`OsStrBytes`]. +/// A container for borrowed byte strings converted by this crate. /// /// This wrapper is intended to prevent violating the invariants of the /// [unspecified encoding] used by this crate and minimize encoding /// conversions. /// -/// Although this type is annotated with `#[repr(transparent)]`, the inner -/// representation is not stable. Transmuting between this type and any other -/// causes immediate undefined behavior. -/// /// # Indices /// /// Methods of this struct that accept indices require that the index lie on a @@ -110,6 +92,12 @@ macro_rules! impl_split_once_raw { /// these methods to instead run in linear time in the worst case (documented /// for [`memchr::memmem::find`][memchr complexity]). /// +/// # Safety +/// +/// Although this type is annotated with `#[repr(transparent)]`, the inner +/// representation is not stable. Transmuting between this type and any other +/// causes immediate undefined behavior. +/// /// [memchr complexity]: memchr::memmem::find#complexity /// [unspecified encoding]: super#encoding #[derive(Eq, Hash, Ord, PartialEq, PartialOrd)] @@ -118,7 +106,7 @@ macro_rules! impl_split_once_raw { pub struct RawOsStr([u8]); impl RawOsStr { - fn from_raw_bytes_unchecked(string: &[u8]) -> &Self { + const fn from_inner(string: &[u8]) -> &Self { // SAFETY: This struct has a layout that makes this operation safe. unsafe { mem::transmute(string) } } @@ -147,10 +135,8 @@ impl RawOsStr { #[inline] #[must_use] pub fn new(string: &OsStr) -> Cow<'_, Self> { - match string.to_raw_bytes() { - Cow::Borrowed(string) => { - Cow::Borrowed(Self::from_raw_bytes_unchecked(string)) - } + match imp::os_str_to_bytes(string) { + Cow::Borrowed(string) => Cow::Borrowed(Self::from_inner(string)), Cow::Owned(string) => Cow::Owned(RawOsString(string)), } } @@ -175,13 +161,15 @@ impl RawOsStr { #[inline] #[must_use] pub fn from_str(string: &str) -> &Self { - Self::from_raw_bytes_unchecked(string.as_bytes()) + Self::from_inner(string.as_bytes()) } - /// Returns the byte string stored by this container. + /// Wraps a byte string, without copying or encoding conversion. + /// + /// # Panics /// - /// The result will match what would be returned by - /// [`OsStrBytes::to_raw_bytes`] for the same string. + /// Panics if the string is not valid for the [unspecified encoding] used + /// by this crate. /// /// # Examples /// @@ -189,15 +177,116 @@ impl RawOsStr { /// use std::env; /// # use std::io; /// - /// use os_str_bytes::OsStrBytes; /// use os_str_bytes::RawOsStr; /// /// let os_string = env::current_exe()?.into_os_string(); /// let raw = RawOsStr::new(&os_string); - /// assert_eq!(os_string.to_raw_bytes(), raw.as_raw_bytes()); + /// let raw_bytes = raw.as_raw_bytes(); + /// assert_eq!(&*raw, RawOsStr::assert_from_raw_bytes(raw_bytes)); /// # /// # Ok::<_, io::Error>(()) /// ``` + /// + /// [unspecified encoding]: super#encoding + #[inline] + #[must_use = "method should not be used for validation"] + #[track_caller] + pub fn assert_from_raw_bytes(string: &[u8]) -> &Self { + expect_encoded!(raw::validate_bytes(string)); + + Self::from_inner(string) + } + + if_checked_conversions! { + /// Wraps a byte string, without copying or encoding conversion. + /// + /// [`assert_from_raw_bytes`] should almost always be used instead. For + /// more information, see [`EncodingError`]. + /// + /// # Errors + /// + /// See documentation for [`EncodingError`]. + /// + /// # Examples + /// + /// ``` + /// use std::env; + /// # use std::io; + /// + /// use os_str_bytes::RawOsStr; + /// + /// let os_string = env::current_exe()?.into_os_string(); + /// let raw = RawOsStr::new(&os_string); + /// assert_eq!(Ok(&*raw), RawOsStr::from_raw_bytes(raw.as_raw_bytes())); + /// # + /// # Ok::<_, io::Error>(()) + /// ``` + /// + /// [`assert_from_raw_bytes`]: Self::assert_from_raw_bytes + #[cfg_attr( + os_str_bytes_docs_rs, + doc(cfg(feature = "checked_conversions")) + )] + #[inline] + pub fn from_raw_bytes(string: &[u8]) -> Result<&Self> { + raw::validate_bytes(string) + .map(|()| Self::from_inner(string)) + .map_err(EncodingError) + } + } + + /// Wraps a byte string, without copying or encoding conversion. + /// + /// # Safety + /// + /// The string must be valid for the [unspecified encoding] used by this + /// crate. + /// + /// # Examples + /// + /// ``` + /// use std::env; + /// # use std::io; + /// + /// use os_str_bytes::RawOsStr; + /// + /// let os_string = env::current_exe()?.into_os_string(); + /// let raw = RawOsStr::new(&os_string); + /// let raw_bytes = raw.as_raw_bytes(); + /// assert_eq!(&*raw, unsafe { + /// RawOsStr::from_raw_bytes_unchecked(raw_bytes) + /// }); + /// # + /// # Ok::<_, io::Error>(()) + /// ``` + /// + /// [unspecified encoding]: super#encoding + #[inline] + #[must_use] + #[track_caller] + pub unsafe fn from_raw_bytes_unchecked(string: &[u8]) -> &Self { + if cfg!(debug_assertions) { + expect_encoded!(raw::validate_bytes(string)); + } + + Self::from_inner(string) + } + + /// Returns the byte string stored by this container. + /// + /// The returned string will use an [unspecified encoding]. + /// + /// # Examples + /// + /// ``` + /// use os_str_bytes::RawOsStr; + /// + /// let string = "foobar"; + /// let raw = RawOsStr::from_str(string); + /// assert_eq!(string.as_bytes(), raw.as_raw_bytes()); + /// ``` + /// + /// [unspecified encoding]: super#encoding #[inline] #[must_use] pub fn as_raw_bytes(&self) -> &[u8] { @@ -206,10 +295,6 @@ impl RawOsStr { /// Equivalent to [`str::contains`]. /// - /// # Panics - /// - /// Panics if the pattern is a byte outside of the ASCII range. - /// /// # Examples /// /// ``` @@ -230,10 +315,6 @@ impl RawOsStr { /// Equivalent to [`str::ends_with`]. /// - /// # Panics - /// - /// Panics if the pattern is a byte outside of the ASCII range. - /// /// # Examples /// /// ``` @@ -257,10 +338,6 @@ impl RawOsStr { /// Equivalent to [`str::ends_with`] but accepts this type for the pattern. /// - /// # Panics - /// - /// Panics if the pattern is a byte outside of the ASCII range. - /// /// # Examples /// /// ``` @@ -278,10 +355,6 @@ impl RawOsStr { /// Equivalent to [`str::find`]. /// - /// # Panics - /// - /// Panics if the pattern is a byte outside of the ASCII range. - /// /// # Examples /// /// ``` @@ -347,10 +420,6 @@ impl RawOsStr { /// Equivalent to [`str::rfind`]. /// - /// # Panics - /// - /// Panics if the pattern is a byte outside of the ASCII range. - /// /// # Examples /// /// ``` @@ -372,19 +441,32 @@ impl RawOsStr { rfind(&self.0, pat) } + fn split_once_raw_with<P, F>( + &self, + pat: &P, + find_fn: F, + ) -> Option<(&Self, &Self)> + where + F: FnOnce(&[u8], &[u8]) -> Option<usize>, + P: EncodedPattern, + { + let pat = pat.__get(); + + let index = find_fn(&self.0, pat)?; + let prefix = &self.0[..index]; + let suffix = &self.0[index + pat.len()..]; + Some((Self::from_inner(prefix), Self::from_inner(suffix))) + } + pub(super) fn rsplit_once_raw<P>(&self, pat: &P) -> Option<(&Self, &Self)> where P: EncodedPattern, { - impl_split_once_raw!(self, pat, rfind) + self.split_once_raw_with(pat, rfind) } /// Equivalent to [`str::rsplit_once`]. /// - /// # Panics - /// - /// Panics if the pattern is a byte outside of the ASCII range. - /// /// # Examples /// /// ``` @@ -413,19 +495,18 @@ impl RawOsStr { fn index_boundary_error(&self, index: usize) -> ! { debug_assert!(raw::is_continuation(self.0[index])); - let start = self.0[..index] + let start = expect_encoded!(self.0[..index] .iter() - .rposition(|&x| !raw::is_continuation(x)) - .expect("invalid raw bytes"); + .rposition(|&x| !raw::is_continuation(x))); let mut end = index + 1; end += self.0[end..] .iter() - .position(|&x| !raw::is_continuation(x)) - .unwrap_or_else(|| self.raw_len() - end); + .take_while(|&&x| raw::is_continuation(x)) + .count(); let code_point = raw::decode_code_point(&self.0[start..end]); panic!( "byte index {} is not a valid boundary; it is inside U+{:04X} \ - (bytes {}..{})", + (bytes {}..{})", index, code_point, start, end, ); } @@ -443,7 +524,7 @@ impl RawOsStr { /// /// # Panics /// - /// Panics if the pattern is a byte outside of the ASCII range or empty. + /// Panics if the pattern is empty. /// /// # Examples /// @@ -455,6 +536,7 @@ impl RawOsStr { /// ``` #[inline] #[must_use] + #[track_caller] pub fn split<P>(&self, pat: P) -> Split<'_, P> where P: Pattern, @@ -483,29 +565,23 @@ impl RawOsStr { /// [valid boundary]: #indices #[inline] #[must_use] + #[track_caller] pub fn split_at(&self, mid: usize) -> (&Self, &Self) { self.check_bound(mid); let (prefix, suffix) = self.0.split_at(mid); - ( - Self::from_raw_bytes_unchecked(prefix), - Self::from_raw_bytes_unchecked(suffix), - ) + (Self::from_inner(prefix), Self::from_inner(suffix)) } pub(super) fn split_once_raw<P>(&self, pat: &P) -> Option<(&Self, &Self)> where P: EncodedPattern, { - impl_split_once_raw!(self, pat, find) + self.split_once_raw_with(pat, find) } /// Equivalent to [`str::split_once`]. /// - /// # Panics - /// - /// Panics if the pattern is a byte outside of the ASCII range. - /// /// # Examples /// /// ``` @@ -529,10 +605,6 @@ impl RawOsStr { /// Equivalent to [`str::starts_with`]. /// - /// # Panics - /// - /// Panics if the pattern is a byte outside of the ASCII range. - /// /// # Examples /// /// ``` @@ -557,10 +629,6 @@ impl RawOsStr { /// Equivalent to [`str::starts_with`] but accepts this type for the /// pattern. /// - /// # Panics - /// - /// Panics if the pattern is a byte outside of the ASCII range. - /// /// # Examples /// /// ``` @@ -578,10 +646,6 @@ impl RawOsStr { /// Equivalent to [`str::strip_prefix`]. /// - /// # Panics - /// - /// Panics if the pattern is a byte outside of the ASCII range. - /// /// # Examples /// /// ``` @@ -603,15 +667,11 @@ impl RawOsStr { let pat = pat.__encode(); let pat = pat.__get(); - self.0.strip_prefix(pat).map(Self::from_raw_bytes_unchecked) + self.0.strip_prefix(pat).map(Self::from_inner) } /// Equivalent to [`str::strip_suffix`]. /// - /// # Panics - /// - /// Panics if the pattern is a byte outside of the ASCII range. - /// /// # Examples /// /// ``` @@ -633,11 +693,14 @@ impl RawOsStr { let pat = pat.__encode(); let pat = pat.__get(); - self.0.strip_suffix(pat).map(Self::from_raw_bytes_unchecked) + self.0.strip_suffix(pat).map(Self::from_inner) } /// Converts this representation back to a platform-native string. /// + /// When possible, use [`RawOsStrCow::into_os_str`] for a more efficient + /// conversion on some platforms. + /// /// # Examples /// /// ``` @@ -655,7 +718,7 @@ impl RawOsStr { #[inline] #[must_use] pub fn to_os_str(&self) -> Cow<'_, OsStr> { - OsStr::from_raw_bytes(&self.0).expect("invalid raw bytes") + expect_encoded!(imp::os_str_from_bytes(&self.0)) } /// Equivalent to [`OsStr::to_str`]. @@ -704,12 +767,32 @@ impl RawOsStr { String::from_utf8_lossy(&self.0) } + fn trim_matches_raw_with<P, F>(&self, pat: &P, strip_fn: F) -> &Self + where + F: for<'a> Fn(&'a [u8], &[u8]) -> Option<&'a [u8]>, + P: EncodedPattern, + { + let pat = pat.__get(); + if pat.is_empty() { + return self; + } + + let mut string = &self.0; + while let Some(substring) = strip_fn(string, pat) { + string = substring; + } + Self::from_inner(string) + } + + fn trim_end_matches_raw<P>(&self, pat: &P) -> &Self + where + P: EncodedPattern, + { + self.trim_matches_raw_with(pat, <[_]>::strip_suffix) + } + /// Equivalent to [`str::trim_end_matches`]. /// - /// # Panics - /// - /// Panics if the pattern is a byte outside of the ASCII range. - /// /// # Examples /// /// ``` @@ -719,19 +802,44 @@ impl RawOsStr { /// assert_eq!("111foo1bar", raw.trim_end_matches("1")); /// assert_eq!("111foo1bar111", raw.trim_end_matches("o")); /// ``` + #[inline] #[must_use] pub fn trim_end_matches<P>(&self, pat: P) -> &Self where P: Pattern, { - impl_trim_matches!(self, pat, strip_suffix) + self.trim_end_matches_raw(&pat.__encode()) } - /// Equivalent to [`str::trim_start_matches`]. + /// Equivalent to [`str::trim_matches`]. /// - /// # Panics + /// # Examples /// - /// Panics if the pattern is a byte outside of the ASCII range. + /// ``` + /// use os_str_bytes::RawOsStr; + /// + /// let raw = RawOsStr::from_str("111foo1bar111"); + /// assert_eq!("foo1bar", raw.trim_matches("1")); + /// assert_eq!("111foo1bar111", raw.trim_matches("o")); + /// ``` + #[inline] + #[must_use] + pub fn trim_matches<P>(&self, pat: P) -> &Self + where + P: Pattern, + { + let pat = pat.__encode(); + self.trim_start_matches_raw(&pat).trim_end_matches_raw(&pat) + } + + fn trim_start_matches_raw<P>(&self, pat: &P) -> &Self + where + P: EncodedPattern, + { + self.trim_matches_raw_with(pat, <[_]>::strip_prefix) + } + + /// Equivalent to [`str::trim_start_matches`]. /// /// # Examples /// @@ -742,12 +850,13 @@ impl RawOsStr { /// assert_eq!("foo1bar111", raw.trim_start_matches("1")); /// assert_eq!("111foo1bar111", raw.trim_start_matches("o")); /// ``` + #[inline] #[must_use] pub fn trim_start_matches<P>(&self, pat: P) -> &Self where P: Pattern, { - impl_trim_matches!(self, pat, strip_prefix) + self.trim_start_matches_raw(&pat.__encode()) } } @@ -781,40 +890,17 @@ impl Default for &RawOsStr { impl<'a> From<&'a RawOsStr> for Cow<'a, RawOsStr> { #[inline] - fn from(other: &'a RawOsStr) -> Self { - Cow::Borrowed(other) + fn from(value: &'a RawOsStr) -> Self { + Cow::Borrowed(value) } } -macro_rules! r#impl { - ( - $index_type:ty - $(, $index_var:ident , $first_bound:expr $(, $second_bound:expr)?)? - ) => { - impl Index<$index_type> for RawOsStr { - type Output = Self; - - #[inline] - fn index(&self, idx: $index_type) -> &Self::Output { - $( - let $index_var = &idx; - self.check_bound($first_bound); - $(self.check_bound($second_bound);)? - )? - - Self::from_raw_bytes_unchecked(&self.0[idx]) - } - } - }; +impl From<Box<str>> for Box<RawOsStr> { + #[inline] + fn from(value: Box<str>) -> Self { + value.into_boxed_bytes().transmute_box() + } } -r#impl!(Range<usize>, x, x.start, x.end); -r#impl!(RangeFrom<usize>, x, x.start); -r#impl!(RangeFull); -// [usize::MAX] will always be a valid inclusive end index. -#[rustfmt::skip] -r#impl!(RangeInclusive<usize>, x, *x.start(), x.end().wrapping_add(1)); -r#impl!(RangeTo<usize>, x, x.end); -r#impl!(RangeToInclusive<usize>, x, x.end.wrapping_add(1)); impl ToOwned for RawOsStr { type Owned = RawOsString; @@ -825,11 +911,74 @@ impl ToOwned for RawOsStr { } } -/// A container for the byte strings converted by [`OsStringBytes`]. +/// Extensions to [`Cow<RawOsStr>`] for additional conversions. /// -/// For more information, see [`RawOsStr`]. +/// [`Cow<RawOsStr>`]: Cow +#[cfg_attr(os_str_bytes_docs_rs, doc(cfg(feature = "raw_os_str")))] +pub trait RawOsStrCow<'a>: private::Sealed { + /// Converts this representation back to a platform-native string. + /// + /// # Examples + /// + /// ``` + /// use std::env; + /// # use std::io; + /// + /// use os_str_bytes::RawOsStr; + /// use os_str_bytes::RawOsStrCow; + /// + /// let os_string = env::current_exe()?.into_os_string(); + /// let raw = RawOsStr::new(&os_string); + /// assert_eq!(os_string, raw.into_os_str()); + /// # + /// # Ok::<_, io::Error>(()) + /// ``` + #[must_use] + fn into_os_str(self) -> Cow<'a, OsStr>; + + /// Returns the byte string stored by this container. + /// + /// The returned string will use an [unspecified encoding]. + /// + /// # Examples + /// + /// ``` + /// use std::borrow::Cow; + /// + /// use os_str_bytes::RawOsStr; + /// use os_str_bytes::RawOsStrCow; + /// + /// let string = "foobar"; + /// let raw = Cow::Borrowed(RawOsStr::from_str(string)); + /// assert_eq!(string.as_bytes(), &*raw.into_raw_bytes()); + /// ``` + /// + /// [unspecified encoding]: super#encoding + #[must_use] + fn into_raw_bytes(self) -> Cow<'a, [u8]>; +} + +impl<'a> RawOsStrCow<'a> for Cow<'a, RawOsStr> { + #[inline] + fn into_os_str(self) -> Cow<'a, OsStr> { + match self { + Cow::Borrowed(string) => string.to_os_str(), + Cow::Owned(string) => Cow::Owned(string.into_os_string()), + } + } + + #[inline] + fn into_raw_bytes(self) -> Cow<'a, [u8]> { + match self { + Cow::Borrowed(string) => Cow::Borrowed(&string.0), + Cow::Owned(string) => Cow::Owned(string.0), + } + } +} + +/// A container for owned byte strings converted by this crate. /// -/// [unspecified encoding]: super#encoding +/// For more information, see [`RawOsStr`]. #[derive(Clone, Default, Eq, Hash, Ord, PartialEq, PartialOrd)] #[cfg_attr(os_str_bytes_docs_rs, doc(cfg(feature = "raw_os_str")))] pub struct RawOsString(Vec<u8>); @@ -856,7 +1005,7 @@ impl RawOsString { #[inline] #[must_use] pub fn new(string: OsString) -> Self { - Self(string.into_raw_vec()) + Self(imp::os_string_into_vec(string)) } /// Wraps a string, without copying or encoding conversion. @@ -881,7 +1030,12 @@ impl RawOsString { Self(string.into_bytes()) } - /// Converts this representation back to a platform-native string. + /// Wraps a byte string, without copying or encoding conversion. + /// + /// # Panics + /// + /// Panics if the string is not valid for the [unspecified encoding] used + /// by this crate. /// /// # Examples /// @@ -892,21 +1046,139 @@ impl RawOsString { /// use os_str_bytes::RawOsString; /// /// let os_string = env::current_exe()?.into_os_string(); - /// let raw = RawOsString::new(os_string.clone()); - /// assert_eq!(os_string, raw.into_os_string()); + /// let raw = RawOsString::new(os_string); + /// let raw_bytes = raw.clone().into_raw_vec(); + /// assert_eq!(raw, RawOsString::assert_from_raw_vec(raw_bytes)); /// # /// # Ok::<_, io::Error>(()) /// ``` + /// + /// [unspecified encoding]: super#encoding + #[inline] + #[must_use = "method should not be used for validation"] + #[track_caller] + pub fn assert_from_raw_vec(string: Vec<u8>) -> Self { + expect_encoded!(raw::validate_bytes(&string)); + + Self(string) + } + + if_checked_conversions! { + /// Wraps a byte string, without copying or encoding conversion. + /// + /// [`assert_from_raw_vec`] should almost always be used instead. For + /// more information, see [`EncodingError`]. + /// + /// # Errors + /// + /// See documentation for [`EncodingError`]. + /// + /// # Examples + /// + /// ``` + /// use std::env; + /// # use std::io; + /// + /// use os_str_bytes::RawOsString; + /// + /// let os_string = env::current_exe()?.into_os_string(); + /// let raw = RawOsString::new(os_string); + /// let raw_clone = raw.clone(); + /// assert_eq!(Ok(raw), RawOsString::from_raw_vec(raw_clone.into_raw_vec())); + /// # + /// # Ok::<_, io::Error>(()) + /// ``` + /// + /// [`assert_from_raw_vec`]: Self::assert_from_raw_vec + #[cfg_attr( + os_str_bytes_docs_rs, + doc(cfg(feature = "checked_conversions")) + )] + #[inline] + pub fn from_raw_vec(string: Vec<u8>) -> Result<Self> { + raw::validate_bytes(&string) + .map(|()| Self(string)) + .map_err(EncodingError) + } + } + + /// Wraps a byte string, without copying or encoding conversion. + /// + /// # Safety + /// + /// The string must be valid for the [unspecified encoding] used by this + /// crate. + /// + /// # Examples + /// + /// ``` + /// use std::env; + /// # use std::io; + /// + /// use os_str_bytes::RawOsString; + /// + /// let os_string = env::current_exe()?.into_os_string(); + /// let raw = RawOsString::new(os_string); + /// let raw_bytes = raw.clone().into_raw_vec(); + /// assert_eq!(raw, unsafe { + /// RawOsString::from_raw_vec_unchecked(raw_bytes) + /// }); + /// # + /// # Ok::<_, io::Error>(()) + /// ``` + /// + /// [unspecified encoding]: super#encoding #[inline] #[must_use] - pub fn into_os_string(self) -> OsString { - OsString::from_raw_vec(self.0).expect("invalid raw bytes") + #[track_caller] + pub unsafe fn from_raw_vec_unchecked(string: Vec<u8>) -> Self { + if cfg!(debug_assertions) { + expect_encoded!(raw::validate_bytes(&string)); + } + + Self(string) } - /// Returns the byte string stored by this container. + /// Equivalent to [`String::clear`]. + /// + /// # Examples + /// + /// ``` + /// use std::env; + /// # use std::io; /// - /// The result will match what would be returned by - /// [`OsStringBytes::into_raw_vec`] for the same string. + /// use os_str_bytes::RawOsString; + /// + /// let os_string = env::current_exe()?.into_os_string(); + /// let mut raw = RawOsString::new(os_string); + /// raw.clear(); + /// assert!(raw.is_empty()); + /// # + /// # Ok::<_, io::Error>(()) + /// ``` + #[inline] + pub fn clear(&mut self) { + self.0.clear(); + } + + /// Equivalent to [`String::into_boxed_str`]. + /// + /// # Examples + /// + /// ``` + /// use os_str_bytes::RawOsString; + /// + /// let string = "foobar".to_owned(); + /// let raw = RawOsString::from_string(string.clone()); + /// assert_eq!(string, *raw.into_box()); + /// ``` + #[inline] + #[must_use] + pub fn into_box(self) -> Box<RawOsStr> { + self.0.into_boxed_slice().transmute_box() + } + + /// Converts this representation back to a platform-native string. /// /// # Examples /// @@ -914,17 +1186,37 @@ impl RawOsString { /// use std::env; /// # use std::io; /// - /// use os_str_bytes::OsStringBytes; /// use os_str_bytes::RawOsString; /// /// let os_string = env::current_exe()?.into_os_string(); /// let raw = RawOsString::new(os_string.clone()); - /// assert_eq!(os_string.into_raw_vec(), raw.into_raw_vec()); + /// assert_eq!(os_string, raw.into_os_string()); /// # /// # Ok::<_, io::Error>(()) /// ``` #[inline] #[must_use] + pub fn into_os_string(self) -> OsString { + expect_encoded!(imp::os_string_from_vec(self.0)) + } + + /// Returns the byte string stored by this container. + /// + /// The returned string will use an [unspecified encoding]. + /// + /// # Examples + /// + /// ``` + /// use os_str_bytes::RawOsString; + /// + /// let string = "foobar".to_owned(); + /// let raw = RawOsString::from_string(string.clone()); + /// assert_eq!(string.into_bytes(), raw.into_raw_vec()); + /// ``` + /// + /// [unspecified encoding]: super#encoding + #[inline] + #[must_use] pub fn into_raw_vec(self) -> Vec<u8> { self.0 } @@ -941,9 +1233,77 @@ impl RawOsString { /// assert_eq!(Ok(string), raw.into_string()); /// ``` #[inline] - pub fn into_string(self) -> Result<String, Self> { + pub fn into_string(self) -> result::Result<String, Self> { String::from_utf8(self.0).map_err(|x| Self(x.into_bytes())) } + + /// Equivalent to [`String::shrink_to_fit`]. + /// + /// # Examples + /// + /// ``` + /// use os_str_bytes::RawOsString; + /// + /// let string = "foobar".to_owned(); + /// let mut raw = RawOsString::from_string(string.clone()); + /// raw.shrink_to_fit(); + /// assert_eq!(string, raw); + /// ``` + #[inline] + pub fn shrink_to_fit(&mut self) { + self.0.shrink_to_fit(); + } + + /// Equivalent to [`String::split_off`]. + /// + /// # Panics + /// + /// Panics if the index is not a [valid boundary]. + /// + /// # Examples + /// + /// ``` + /// use os_str_bytes::RawOsString; + /// + /// let mut raw = RawOsString::from_string("foobar".to_owned()); + /// assert_eq!("bar", raw.split_off(3)); + /// assert_eq!("foo", raw); + /// ``` + /// + /// [valid boundary]: RawOsStr#indices + #[inline] + #[must_use] + #[track_caller] + pub fn split_off(&mut self, at: usize) -> Self { + self.check_bound(at); + + Self(self.0.split_off(at)) + } + + /// Equivalent to [`String::truncate`]. + /// + /// # Panics + /// + /// Panics if the index is not a [valid boundary]. + /// + /// # Examples + /// + /// ``` + /// use os_str_bytes::RawOsString; + /// + /// let mut raw = RawOsString::from_string("foobar".to_owned()); + /// raw.truncate(3); + /// assert_eq!("foo", raw); + /// ``` + /// + /// [valid boundary]: RawOsStr#indices + #[inline] + #[track_caller] + pub fn truncate(&mut self, new_len: usize) { + self.check_bound(new_len); + + self.0.truncate(new_len); + } } impl AsRef<RawOsStr> for RawOsString { @@ -965,46 +1325,41 @@ impl Deref for RawOsString { #[inline] fn deref(&self) -> &Self::Target { - RawOsStr::from_raw_bytes_unchecked(&self.0) + RawOsStr::from_inner(&self.0) } } -impl From<String> for RawOsString { +impl From<RawOsString> for Box<RawOsStr> { #[inline] - fn from(other: String) -> Self { - Self::from_string(other) + fn from(value: RawOsString) -> Self { + value.into_box() } } -impl From<RawOsString> for Cow<'_, RawOsStr> { +impl From<Box<RawOsStr>> for RawOsString { #[inline] - fn from(other: RawOsString) -> Self { - Cow::Owned(other) + fn from(value: Box<RawOsStr>) -> Self { + Self(value.transmute_box::<[_]>().into_vec()) } } -macro_rules! r#impl { - ( $index_type:ty ) => { - impl Index<$index_type> for RawOsString { - type Output = RawOsStr; +impl From<RawOsString> for Cow<'_, RawOsStr> { + #[inline] + fn from(value: RawOsString) -> Self { + Cow::Owned(value) + } +} - #[inline] - fn index(&self, idx: $index_type) -> &Self::Output { - &(**self)[idx] - } - } - }; +impl From<String> for RawOsString { + #[inline] + fn from(value: String) -> Self { + Self::from_string(value) + } } -r#impl!(Range<usize>); -r#impl!(RangeFrom<usize>); -r#impl!(RangeFull); -r#impl!(RangeInclusive<usize>); -r#impl!(RangeTo<usize>); -r#impl!(RangeToInclusive<usize>); -struct Buffer<'a>(&'a [u8]); +struct DebugBuffer<'a>(&'a [u8]); -impl Debug for Buffer<'_> { +impl Debug for DebugBuffer<'_> { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { f.write_str("\"")?; @@ -1050,7 +1405,7 @@ macro_rules! r#impl { #[inline] fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { f.debug_tuple(stringify!($type)) - .field(&Buffer(&self.0)) + .field(&DebugBuffer(&self.0)) .finish() } } @@ -1060,6 +1415,42 @@ r#impl!(RawOsStr); r#impl!(RawOsString); macro_rules! r#impl { + ( $index_type:ty $(, $index_var:ident , $($bound:expr),+)? ) => { + impl Index<$index_type> for RawOsStr { + type Output = Self; + + #[inline] + fn index(&self, idx: $index_type) -> &Self::Output { + $( + let $index_var = &idx; + $(self.check_bound($bound);)+ + )? + + Self::from_inner(&self.0[idx]) + } + } + + impl Index<$index_type> for RawOsString { + type Output = RawOsStr; + + #[allow(clippy::indexing_slicing)] + #[inline] + fn index(&self, idx: $index_type) -> &Self::Output { + &(**self)[idx] + } + } + }; +} +r#impl!(Range<usize>, x, x.start, x.end); +r#impl!(RangeFrom<usize>, x, x.start); +r#impl!(RangeFull); +// [usize::MAX] will always be a valid inclusive end index. +#[rustfmt::skip] +r#impl!(RangeInclusive<usize>, x, *x.start(), x.end().wrapping_add(1)); +r#impl!(RangeTo<usize>, x, x.end); +r#impl!(RangeToInclusive<usize>, x, x.end.wrapping_add(1)); + +macro_rules! r#impl { ( $type:ty , $other_type:ty ) => { impl PartialEq<$other_type> for $type { #[inline] diff --git a/src/util.rs b/src/util.rs index bd28b7b..f931969 100644 --- a/src/util.rs +++ b/src/util.rs @@ -4,7 +4,6 @@ pub(super) const CONT_MASK: u8 = (1 << BYTE_SHIFT) - 1; pub(super) const CONT_TAG: u8 = 0b1000_0000; -#[cfg_attr(not(windows), allow(dead_code))] pub(super) const fn is_continuation(byte: u8) -> bool { byte & !CONT_MASK == CONT_TAG } diff --git a/src/wasm32/mod.rs b/src/wasm/mod.rs index f8ae368..a8a2996 100644 --- a/src/wasm32/mod.rs +++ b/src/wasm/mod.rs @@ -13,12 +13,12 @@ if_raw_str! { pub(super) mod raw; } -#[derive(Debug, Eq, PartialEq)] +#[derive(Clone, Copy, Debug, Eq, PartialEq)] pub(super) struct EncodingError(Utf8Error); impl Display for EncodingError { - fn fmt(&self, formatter: &mut Formatter<'_>) -> fmt::Result { - write!(formatter, "os_str_bytes: {}", self.0) + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + write!(f, "os_str_bytes: {}", self.0) } } @@ -30,15 +30,17 @@ macro_rules! expect_utf8 { ( $result:expr ) => { $result.expect( "platform string contains invalid UTF-8, which should not be \ - possible", + possible", ) }; } +fn from_bytes(string: &[u8]) -> Result<&str> { + str::from_utf8(string).map_err(EncodingError) +} + pub(super) fn os_str_from_bytes(string: &[u8]) -> Result<Cow<'_, OsStr>> { - str::from_utf8(string) - .map(|x| Cow::Borrowed(OsStr::new(x))) - .map_err(EncodingError) + from_bytes(string).map(|x| Cow::Borrowed(OsStr::new(x))) } pub(super) fn os_str_to_bytes(os_string: &OsStr) -> Cow<'_, [u8]> { diff --git a/src/wasm32/raw.rs b/src/wasm/raw.rs index 5645900..fb291a6 100644 --- a/src/wasm32/raw.rs +++ b/src/wasm/raw.rs @@ -4,8 +4,22 @@ use std::str; pub(crate) use crate::util::is_continuation; +use super::Result; + +#[allow(dead_code)] +#[path = "../common/raw.rs"] +mod common_raw; +pub(crate) use common_raw::ends_with; +pub(crate) use common_raw::starts_with; +#[cfg(feature = "uniquote")] +pub(crate) use common_raw::uniquote; + +pub(crate) fn validate_bytes(string: &[u8]) -> Result<()> { + super::from_bytes(string).map(drop) +} + pub(crate) fn decode_code_point(string: &[u8]) -> u32 { - let string = str::from_utf8(string).expect("invalid string"); + let string = expect_encoded!(str::from_utf8(string)); let mut chars = string.chars(); let ch = chars .next() @@ -14,26 +28,7 @@ pub(crate) fn decode_code_point(string: &[u8]) -> u32 { ch.into() } -pub(crate) fn ends_with(string: &[u8], suffix: &[u8]) -> bool { - string.ends_with(suffix) -} - -pub(crate) fn starts_with(string: &[u8], prefix: &[u8]) -> bool { - string.starts_with(prefix) -} - pub(crate) fn debug(string: &[u8], _: &mut Formatter<'_>) -> fmt::Result { assert!(string.is_empty()); Ok(()) } - -#[cfg(feature = "uniquote")] -pub(crate) mod uniquote { - use uniquote::Formatter; - use uniquote::Quote; - use uniquote::Result; - - pub(crate) fn escape(string: &[u8], f: &mut Formatter<'_>) -> Result { - string.escape(f) - } -} diff --git a/src/windows/mod.rs b/src/windows/mod.rs index 3b6105b..ed9e60b 100644 --- a/src/windows/mod.rs +++ b/src/windows/mod.rs @@ -9,6 +9,7 @@ use std::ffi::OsString; use std::fmt; use std::fmt::Display; use std::fmt::Formatter; +use std::ops::Not; use std::os::windows::ffi::OsStrExt; use std::os::windows::ffi::OsStringExt; use std::result; @@ -19,10 +20,12 @@ if_raw_str! { } mod wtf8; -use wtf8::encode_wide; use wtf8::DecodeWide; -#[derive(Debug, Eq, PartialEq)] +#[cfg(test)] +mod tests; + +#[derive(Clone, Copy, Debug, Eq, PartialEq)] pub(super) enum EncodingError { Byte(u8), CodePoint(u32), @@ -42,11 +45,11 @@ impl EncodingError { } impl Display for EncodingError { - fn fmt(&self, formatter: &mut Formatter<'_>) -> fmt::Result { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { write!( - formatter, + f, "byte sequence is not representable in the platform encoding; \ - error at {}", + error at {}", self.position(), ) } @@ -56,16 +59,21 @@ impl Error for EncodingError {} type Result<T> = result::Result<T, EncodingError>; -fn from_bytes(string: &[u8]) -> Result<OsString> { - let encoder = encode_wide(string); +fn from_bytes(string: &[u8]) -> Result<Option<OsString>> { + let mut encoder = wtf8::encode_wide(string); // Collecting an iterator into a result ignores the size hint: // https://github.com/rust-lang/rust/issues/48994 let mut encoded_string = Vec::with_capacity(encoder.size_hint().0); - for wchar in encoder { + for wchar in &mut encoder { encoded_string.push(wchar?); } - Ok(OsStringExt::from_wide(&encoded_string)) + + debug_assert_eq!(str::from_utf8(string).is_ok(), encoder.is_still_utf8()); + Ok(encoder + .is_still_utf8() + .not() + .then(|| OsStringExt::from_wide(&encoded_string))) } fn to_bytes(os_string: &OsStr) -> Vec<u8> { @@ -77,7 +85,14 @@ fn to_bytes(os_string: &OsStr) -> Vec<u8> { } pub(super) fn os_str_from_bytes(string: &[u8]) -> Result<Cow<'_, OsStr>> { - from_bytes(string).map(Cow::Owned) + from_bytes(string).map(|os_string| { + os_string.map(Cow::Owned).unwrap_or_else(|| { + // SAFETY: This slice was validated to be UTF-8. + Cow::Borrowed(OsStr::new(unsafe { + str::from_utf8_unchecked(string) + })) + }) + }) } pub(super) fn os_str_to_bytes(os_string: &OsStr) -> Cow<'_, [u8]> { @@ -85,68 +100,14 @@ pub(super) fn os_str_to_bytes(os_string: &OsStr) -> Cow<'_, [u8]> { } pub(super) fn os_string_from_vec(string: Vec<u8>) -> Result<OsString> { - from_bytes(&string) + from_bytes(&string).map(|os_string| { + os_string.unwrap_or_else(|| { + // SAFETY: This slice was validated to be UTF-8. + unsafe { String::from_utf8_unchecked(string) }.into() + }) + }) } pub(super) fn os_string_into_vec(os_string: OsString) -> Vec<u8> { to_bytes(&os_string) } - -#[cfg(test)] -mod tests { - use std::ffi::OsStr; - - use crate::OsStrBytes; - - use super::EncodingError; - - #[test] - fn test_invalid() { - use EncodingError::Byte; - use EncodingError::CodePoint; - use EncodingError::End; - - test_error(Byte(b'\x83'), b"\x0C\x83\xD7\x3E"); - test_error(Byte(b'\x52'), b"\x19\xF7\x52\x84"); - test_error(Byte(b'\xB8'), b"\x70\xB8\x1F\x66"); - test_error(CodePoint(0x34_0388), b"\x70\xFD\x80\x8E\x88"); - test_error(Byte(b'\x80'), b"\x80"); - test_error(Byte(b'\x80'), b"\x80\x80"); - test_error(Byte(b'\x80'), b"\x80\x80\x80"); - test_error(Byte(b'\x81'), b"\x81"); - test_error(Byte(b'\x88'), b"\x88\xB4\xC7\x46"); - test_error(Byte(b'\x97'), b"\x97\xCE\x06"); - test_error(Byte(b'\x00'), b"\xC2\x00"); - test_error(Byte(b'\x7F'), b"\xC2\x7F"); - test_error(Byte(b'\x09'), b"\xCD\x09\x95"); - test_error(Byte(b'\x43'), b"\xCD\x43\x5F\xA0"); - test_error(Byte(b'\x69'), b"\xD7\x69\xB2"); - test_error(CodePoint(0x528), b"\xE0\x94\xA8"); - test_error(CodePoint(0x766), b"\xE0\x9D\xA6\x12\xAE"); - test_error(Byte(b'\xFD'), b"\xE2\xAB\xFD\x51"); - test_error(Byte(b'\xC4'), b"\xE3\xC4"); - test_error(CodePoint(0xDC00), b"\xED\xA0\x80\xED\xB0\x80"); - test_error(End(), b"\xF1"); - test_error(End(), b"\xF1\x80"); - test_error(End(), b"\xF1\x80\x80"); - test_error(Byte(b'\xF1'), b"\xF1\x80\x80\xF1"); - test_error(CodePoint(0x11_09CC), b"\xF4\x90\xA7\x8C"); - test_error(CodePoint(0x15_EC46), b"\xF5\x9E\xB1\x86"); - test_error(End(), b"\xFB"); - test_error(End(), b"\xFB\x80"); - test_error(End(), b"\xFB\x80\x80"); - test_error(CodePoint(0x2C_0000), b"\xFB\x80\x80\x80"); - test_error(End(), b"\xFF"); - test_error(End(), b"\xFF\x80"); - test_error(End(), b"\xFF\x80\x80"); - test_error(CodePoint(0x3C_0000), b"\xFF\x80\x80\x80"); - test_error(CodePoint(0x3C_6143), b"\xFF\x86\x85\x83"); - - fn test_error(error: EncodingError, string: &[u8]) { - assert_eq!( - Err(error), - OsStr::from_raw_bytes(string).map_err(|x| x.0), - ); - } - } -} diff --git a/src/windows/raw.rs b/src/windows/raw.rs index 630eb01..80953de 100644 --- a/src/windows/raw.rs +++ b/src/windows/raw.rs @@ -7,19 +7,23 @@ use super::wtf8; pub(crate) use super::wtf8::ends_with; pub(crate) use super::wtf8::starts_with; use super::wtf8::CodePoints; +use super::Result; + +pub(crate) fn validate_bytes(string: &[u8]) -> Result<()> { + wtf8::encode_wide(string).try_for_each(|x| x.map(drop)) +} pub(crate) fn encode_wide_unchecked( string: &[u8], ) -> impl '_ + Iterator<Item = u16> { - wtf8::encode_wide(string).map(|x| x.expect("invalid string")) + wtf8::encode_wide(string).map(|x| expect_encoded!(x)) } pub(crate) fn decode_code_point(string: &[u8]) -> u32 { let mut code_points = CodePoints::new(string.iter().copied()); - let code_point = code_points + let code_point = expect_encoded!(code_points .next() - .expect("cannot parse code point from empty string") - .expect("invalid string"); + .expect("cannot parse code point from empty string")); assert_eq!(None, code_points.next(), "multiple code points found"); code_point } diff --git a/src/windows/wtf8/code_points.rs b/src/windows/wtf8/code_points.rs index b265db3..9800d78 100644 --- a/src/windows/wtf8/code_points.rs +++ b/src/windows/wtf8/code_points.rs @@ -1,3 +1,4 @@ +use std::iter::FusedIterator; use std::iter::Peekable; use std::mem; @@ -14,6 +15,7 @@ where { iter: Peekable<I>, surrogate: bool, + still_utf8: bool, } impl<I> CodePoints<I> @@ -22,30 +24,34 @@ where { pub(in super::super) fn new<S>(string: S) -> Self where - S: IntoIterator<IntoIter = I, Item = I::Item>, + S: IntoIterator<IntoIter = I>, { Self { iter: string.into_iter().peekable(), surrogate: false, + still_utf8: true, } } + pub(super) fn is_still_utf8(&self) -> bool { + self.still_utf8 + } + fn consume_next(&mut self, code_point: &mut u32) -> Result<()> { - if let Some(&byte) = self.iter.peek() { - if !is_continuation(byte) { - self.surrogate = false; - // Not consuming this byte will be useful if this crate ever - // offers a way to encode lossily. - return Err(EncodingError::Byte(byte)); - } - *code_point = - (*code_point << BYTE_SHIFT) | u32::from(byte & CONT_MASK); + let &byte = self.iter.peek().ok_or(EncodingError::End())?; - let removed = self.iter.next(); - debug_assert_eq!(Some(byte), removed); - } else { - return Err(EncodingError::End()); + if !is_continuation(byte) { + self.surrogate = false; + // Not consuming this byte will be useful if this crate ever offers + // a way to encode lossily. + return Err(EncodingError::Byte(byte)); } + *code_point = + (*code_point << BYTE_SHIFT) | u32::from(byte & CONT_MASK); + + let removed = self.iter.next(); + debug_assert_eq!(Some(byte), removed); + Ok(()) } @@ -54,6 +60,11 @@ where } } +impl<I> FusedIterator for CodePoints<I> where + I: FusedIterator + Iterator<Item = u8> +{ +} + impl<I> Iterator for CodePoints<I> where I: Iterator<Item = u8>, @@ -94,6 +105,7 @@ where // This condition is optimized to detect surrogate code points. } else if code_point & 0xFE0 == 0x360 { + self.still_utf8 = false; if code_point & 0x10 == 0 { self.surrogate = true; } else if prev_surrogate { diff --git a/src/windows/wtf8/convert.rs b/src/windows/wtf8/convert.rs index fcaf562..70a8a9f 100644 --- a/src/windows/wtf8/convert.rs +++ b/src/windows/wtf8/convert.rs @@ -1,5 +1,6 @@ use std::char; use std::char::DecodeUtf16; +use std::iter::FusedIterator; use std::num::NonZeroU16; use crate::util::BYTE_SHIFT; @@ -27,7 +28,7 @@ where { iter: DecodeUtf16<I>, code_point: u32, - shift: u8, + shifts: u8, } impl<I> DecodeWide<I> @@ -41,9 +42,14 @@ where Self { iter: char::decode_utf16(string), code_point: 0, - shift: 0, + shifts: 0, } } + + #[inline(always)] + fn get_raw_byte(&self) -> u8 { + (self.code_point >> (self.shifts * BYTE_SHIFT)) as u8 + } } impl<I> Iterator for DecodeWide<I> @@ -53,11 +59,9 @@ where type Item = u8; fn next(&mut self) -> Option<Self::Item> { - if let Some(shift) = self.shift.checked_sub(BYTE_SHIFT) { - self.shift = shift; - return Some( - ((self.code_point >> self.shift) as u8 & CONT_MASK) | CONT_TAG, - ); + if let Some(shifts) = self.shifts.checked_sub(1) { + self.shifts = shifts; + return Some((self.get_raw_byte() & CONT_MASK) | CONT_TAG); } self.code_point = self @@ -68,7 +72,7 @@ where macro_rules! decode { ( $tag:expr ) => { - Some((self.code_point >> self.shift) as u8 | $tag) + Some(self.get_raw_byte() | $tag) }; } macro_rules! try_decode { @@ -76,7 +80,7 @@ where if self.code_point < $upper_bound { return decode!($tag); } - self.shift += BYTE_SHIFT; + self.shifts += 1; }; } try_decode!(0, 0x80); @@ -87,16 +91,16 @@ where fn size_hint(&self) -> (usize, Option<usize>) { let (low, high) = self.iter.size_hint(); - let shift = self.shift.into(); + let shifts = self.shifts.into(); ( - low.saturating_add(shift), + low.saturating_add(shifts), high.and_then(|x| x.checked_mul(4)) - .and_then(|x| x.checked_add(shift)), + .and_then(|x| x.checked_add(shifts)), ) } } -struct EncodeWide<I> +pub(in super::super) struct EncodeWide<I> where I: Iterator<Item = u8>, { @@ -108,15 +112,24 @@ impl<I> EncodeWide<I> where I: Iterator<Item = u8>, { - pub(in super::super) fn new<S>(string: S) -> Self + fn new<S>(string: S) -> Self where - S: IntoIterator<IntoIter = I, Item = I::Item>, + S: IntoIterator<IntoIter = I>, { Self { iter: CodePoints::new(string), surrogate: None, } } + + pub(in super::super) fn is_still_utf8(&self) -> bool { + self.iter.is_still_utf8() + } +} + +impl<I> FusedIterator for EncodeWide<I> where + I: FusedIterator + Iterator<Item = u8> +{ } impl<I> Iterator for EncodeWide<I> @@ -137,6 +150,8 @@ where .map(|offset| { static_assert!(MIN_LOW_SURROGATE != 0); + // SAFETY: The above static assertion guarantees that + // this value will not be zero. self.surrogate = Some(unsafe { NonZeroU16::new_unchecked( (offset & 0x3FF) as u16 | MIN_LOW_SURROGATE, @@ -161,6 +176,6 @@ where pub(in super::super) fn encode_wide( string: &[u8], -) -> impl '_ + Iterator<Item = Result<u16>> { +) -> EncodeWide<impl '_ + Iterator<Item = u8>> { EncodeWide::new(string.iter().copied()) } diff --git a/src/windows/wtf8/string.rs b/src/windows/wtf8/string.rs index 10b8faf..b3523a2 100644 --- a/src/windows/wtf8/string.rs +++ b/src/windows/wtf8/string.rs @@ -1,27 +1,28 @@ -use crate::util::is_continuation; - -use super::encode_wide; +use crate::util; const SURROGATE_LENGTH: usize = 3; pub(crate) fn ends_with(string: &[u8], mut suffix: &[u8]) -> bool { - let index = match string.len().checked_sub(suffix.len()) { - Some(index) => index, - None => return false, + let index = if let Some(index) = string.len().checked_sub(suffix.len()) { + index + } else { + return false; }; if let Some(&byte) = string.get(index) { - if is_continuation(byte) { - let index = index.checked_sub(1).expect("invalid string"); - let mut wide_surrogate = match suffix.get(..SURROGATE_LENGTH) { - Some(surrogate) => encode_wide(surrogate), - None => return false, - }; + if util::is_continuation(byte) { + let index = expect_encoded!(index.checked_sub(1)); + let mut wide_surrogate = + if let Some(surrogate) = suffix.get(..SURROGATE_LENGTH) { + super::encode_wide(surrogate) + } else { + return false; + }; let surrogate_wchar = wide_surrogate .next() .expect("failed decoding non-empty suffix"); if wide_surrogate.next().is_some() - || encode_wide(&string[index..]) + || super::encode_wide(&string[index..]) .take_while(Result::is_ok) .nth(1) != Some(surrogate_wchar) @@ -36,20 +37,23 @@ pub(crate) fn ends_with(string: &[u8], mut suffix: &[u8]) -> bool { pub(crate) fn starts_with(string: &[u8], mut prefix: &[u8]) -> bool { if let Some(&byte) = string.get(prefix.len()) { - if is_continuation(byte) { - let index = match prefix.len().checked_sub(SURROGATE_LENGTH) { - Some(index) => index, - None => return false, + if util::is_continuation(byte) { + let index = if let Some(index) = + prefix.len().checked_sub(SURROGATE_LENGTH) + { + index + } else { + return false; }; let (substring, surrogate) = prefix.split_at(index); - let mut wide_surrogate = encode_wide(surrogate); + let mut wide_surrogate = super::encode_wide(surrogate); let surrogate_wchar = wide_surrogate .next() .expect("failed decoding non-empty prefix"); if surrogate_wchar.is_err() || wide_surrogate.next().is_some() - || encode_wide(&string[index..]) + || super::encode_wide(&string[index..]) .next() .expect("failed decoding non-empty substring") != surrogate_wchar diff --git a/tests/common.rs b/tests/common.rs deleted file mode 100644 index c0909bc..0000000 --- a/tests/common.rs +++ /dev/null @@ -1,94 +0,0 @@ -#![allow(dead_code)] -#![warn(unsafe_op_in_unsafe_fn)] - -use std::borrow::Cow; -use std::ffi::OsStr; -use std::ffi::OsString; -#[cfg(feature = "raw_os_str")] -use std::mem; -use std::path::Path; -use std::path::PathBuf; -use std::result; - -use os_str_bytes::EncodingError; -use os_str_bytes::OsStrBytes; -use os_str_bytes::OsStringBytes; -#[cfg(feature = "raw_os_str")] -use os_str_bytes::RawOsStr; - -pub(crate) type Result<T> = result::Result<T, EncodingError>; - -pub(crate) const WTF8_STRING: &[u8] = b"foo\xED\xA0\xBD\xF0\x9F\x92\xA9bar"; - -// SAFETY: This string is valid in WTF-8. -#[cfg(all(any(unix, windows), feature = "raw_os_str"))] -pub(crate) const RAW_WTF8_STRING: &RawOsStr = - unsafe { from_raw_bytes_unchecked(WTF8_STRING) }; - -#[cfg(feature = "raw_os_str")] -pub(crate) const unsafe fn from_raw_bytes_unchecked( - string: &[u8], -) -> &RawOsStr { - // SAFETY: This implementation detail can only be assumed by this crate. - unsafe { mem::transmute(string) } -} - -#[track_caller] -fn test_from_bytes<'a, T, U, S>(result: &Result<U>, string: S) -where - S: Into<Cow<'a, [u8]>>, - T: 'a + AsRef<OsStr> + OsStrBytes + ?Sized, - U: AsRef<OsStr>, -{ - assert_eq!( - result.as_ref().map(AsRef::as_ref), - T::from_raw_bytes(string).as_deref().map(AsRef::as_ref), - ); -} - -pub(crate) fn from_bytes(string: &[u8]) -> Result<Cow<'_, OsStr>> { - let os_string = OsStr::from_raw_bytes(string); - - test_from_bytes::<Path, _, _>(&os_string, string); - - os_string -} - -pub(crate) fn from_vec(string: Vec<u8>) -> Result<OsString> { - let os_string = OsString::from_raw_vec(string.clone()); - test_from_bytes::<OsStr, _, _>(&os_string, string.clone()); - - let path = PathBuf::from_raw_vec(string.clone()); - test_from_bytes::<Path, _, _>(&path, string); - assert_eq!(os_string, path.map(PathBuf::into_os_string)); - - os_string -} - -pub(crate) fn test_bytes(string: &[u8]) -> Result<()> { - let os_string = from_bytes(string)?; - assert_eq!(string.len(), os_string.len()); - assert_eq!(string, &*os_string.to_raw_bytes()); - Ok(()) -} - -pub(crate) fn test_vec(string: &[u8]) -> Result<()> { - let os_string = from_vec(string.to_owned())?; - assert_eq!(string.len(), os_string.len()); - assert_eq!(string, os_string.into_raw_vec()); - Ok(()) -} - -pub(crate) fn test_utf8_bytes(string: &str) { - let os_string = OsStr::new(string); - let string = string.as_bytes(); - assert_eq!(Ok(Cow::Borrowed(os_string)), from_bytes(string)); - assert_eq!(string, &*os_string.to_raw_bytes()); -} - -pub(crate) fn test_utf8_vec(string: &str) { - let os_string = string.to_owned().into(); - let string = string.as_bytes(); - assert_eq!(Ok(&os_string), from_vec(string.to_owned()).as_ref()); - assert_eq!(string, os_string.into_raw_vec()); -} diff --git a/tests/debug.rs b/tests/debug.rs deleted file mode 100644 index c252deb..0000000 --- a/tests/debug.rs +++ /dev/null @@ -1,34 +0,0 @@ -#![cfg(feature = "raw_os_str")] - -use os_str_bytes::RawOsStr; - -mod common; -use common::RAW_WTF8_STRING; - -fn test(result: &str, string: &RawOsStr) { - assert_eq!(format!("RawOsStr({})", result), format!("{:?}", string)); - assert_eq!( - format!("RawOsString({})", result), - format!("{:?}", string.to_owned()), - ); -} - -#[test] -fn test_debug_empty() { - test("\"\"", RawOsStr::from_str("")); -} - -#[test] -fn test_debug_wft8() { - let wchar = if cfg!(unix) { - "\\xED\\xA0\\xBD" - } else { - "\\u{D83D}" - }; - test(&format!("\"foo{}\u{1F4A9}bar\"", wchar), RAW_WTF8_STRING); -} - -#[test] -fn test_debug_quote() { - test("\"foo\\\"bar\"", RawOsStr::from_str("foo\"bar")); -} diff --git a/tests/edge_cases.rs b/tests/edge_cases.rs deleted file mode 100644 index a0fa529..0000000 --- a/tests/edge_cases.rs +++ /dev/null @@ -1,7 +0,0 @@ -mod common; -use common::test_bytes; - -#[test] -fn test_edge_cases() { - assert_eq!(Ok(()), test_bytes(b"\xED\xAB\xBE\xF4\x8D\xBC\x9A")); -} diff --git a/tests/index.rs b/tests/index.rs deleted file mode 100644 index 50abd6c..0000000 --- a/tests/index.rs +++ /dev/null @@ -1,86 +0,0 @@ -#![cfg(feature = "raw_os_str")] - -use std::ops::Index; -use std::panic; -use std::panic::UnwindSafe; - -use os_str_bytes::RawOsStr; - -mod common; -use common::RAW_WTF8_STRING; - -#[test] -fn test_valid_indices() { - test(0); - test(1); - test(2); - test(3); - test(6); - test(10); - test(11); - test(12); - test(13); - - #[track_caller] - fn test(index: usize) { - let _ = RAW_WTF8_STRING.index(index..); - } -} - -macro_rules! test { - ( $name:ident , $index:literal , $code_point:expr ) => { - // https://github.com/rust-lang/rust/issues/88430 - #[test] - fn $name() { - let index_fn = || RAW_WTF8_STRING.index($index..); - if cfg!(unix) { - let _ = index_fn(); - return; - } - - let error = panic::catch_unwind(index_fn) - .expect_err("test did not panic as expected"); - let error: &String = - error.downcast_ref().expect("incorrect panic message type"); - assert_eq!( - concat!( - "byte index ", - $index, - " is not a valid boundary; it is inside ", - $code_point - ), - error, - ); - } - }; -} - -test!(test_index_4, 4, "U+D83D (bytes 3..6)"); - -test!(test_index_5, 5, "U+D83D (bytes 3..6)"); - -test!(test_index_7, 7, "U+1F4A9 (bytes 6..10)"); - -test!(test_index_8, 8, "U+1F4A9 (bytes 6..10)"); - -test!(test_index_9, 9, "U+1F4A9 (bytes 6..10)"); - -#[test] -fn test_index_panics() { - let string = RawOsStr::from_str("\u{F6}"); - test(|| string.index(1..2)); - test(|| string.index(0..1)); - test(|| string.index(1..)); - test(|| string.index(0..=0)); - test(|| string.index(..1)); - test(|| string.index(..=0)); - test(|| string.split_at(1)); - - #[track_caller] - fn test<F, R>(f: F) - where - F: FnOnce() -> R + UnwindSafe, - { - assert_eq!(!cfg!(unix), panic::catch_unwind(f).is_err()); - } -} diff --git a/tests/integration.rs b/tests/integration.rs deleted file mode 100644 index 0107fe5..0000000 --- a/tests/integration.rs +++ /dev/null @@ -1,75 +0,0 @@ -use std::str; - -mod common; -use common::test_bytes; -use common::test_utf8_bytes; -use common::test_utf8_vec; -use common::test_vec; -use common::Result; -use common::WTF8_STRING; - -const INVALID_STRING: &[u8] = b"\xF1foo\xF1\x80bar\xF1\x80\x80baz"; - -const UTF8_STRING: &str = "string"; - -fn test_string_is_invalid_utf8(string: &[u8]) { - assert!(str::from_utf8(string).is_err()); -} - -fn test_invalid_result(result: &Result<()>) { - if cfg!(windows) { - assert!(result.is_err()); - } else { - assert_eq!(&Ok(()), result); - } -} - -#[test] -fn test_empty_bytes() { - test_utf8_bytes(""); -} - -#[test] -fn test_empty_vec() { - test_utf8_vec(""); -} - -#[test] -fn test_nonempty_utf8_bytes() { - test_utf8_bytes(UTF8_STRING); -} - -#[test] -fn test_nonempty_utf8_vec() { - test_utf8_vec(UTF8_STRING); -} - -#[test] -fn test_invalid_string_is_invalid_utf8() { - test_string_is_invalid_utf8(INVALID_STRING); -} - -#[test] -fn test_invalid_bytes() { - test_invalid_result(&test_bytes(INVALID_STRING)); -} - -#[test] -fn test_invalid_vec() { - test_invalid_result(&test_vec(INVALID_STRING)); -} - -#[test] -fn test_wtf8_string_is_invalid_utf8() { - test_string_is_invalid_utf8(WTF8_STRING); -} - -#[test] -fn test_wtf8_bytes() { - assert_eq!(Ok(()), test_bytes(WTF8_STRING)); -} - -#[test] -fn test_wtf8_vec() { - assert_eq!(Ok(()), test_vec(WTF8_STRING)); -} diff --git a/tests/random.rs b/tests/random.rs deleted file mode 100644 index ad6e8d2..0000000 --- a/tests/random.rs +++ /dev/null @@ -1,126 +0,0 @@ -use std::borrow::Cow; -use std::ffi::OsStr; -use std::ffi::OsString; - -use getrandom::getrandom; - -use os_str_bytes::OsStrBytes; -use os_str_bytes::OsStringBytes; - -mod common; -use common::from_bytes; -use common::from_vec; - -const SMALL_LENGTH: usize = 16; - -const LARGE_LENGTH: usize = 1024; - -const ITERATIONS: usize = 1024; - -fn random_os_string( - buffer_length: usize, -) -> Result<OsString, getrandom::Error> { - let mut buffer = vec![0; buffer_length]; - #[cfg(unix)] - { - use std::os::unix::ffi::OsStringExt; - - getrandom(&mut buffer)?; - Ok(OsStringExt::from_vec(buffer)) - } - #[cfg(windows)] - { - use std::os::windows::ffi::OsStringExt; - use std::slice; - - getrandom(as_mut_bytes(&mut buffer))?; - return Ok(OsStringExt::from_wide(&buffer)); - - fn as_mut_bytes(buffer: &mut [u16]) -> &mut [u8] { - // SAFETY: [u16] can always be transmuted to two [u8] bytes. - unsafe { - slice::from_raw_parts_mut( - buffer.as_mut_ptr() as *mut u8, - buffer.len() * 2, - ) - } - } - } - #[cfg(not(any(unix, windows)))] - Err(getrandom::Error::UNSUPPORTED) -} - -#[test] -fn test_random_bytes() -> Result<(), getrandom::Error> { - let os_string = random_os_string(LARGE_LENGTH)?; - let string = os_string.to_raw_bytes(); - assert_eq!(os_string.len(), string.len()); - assert_eq!(Ok(Cow::Borrowed(&*os_string)), from_bytes(&string)); - Ok(()) -} - -#[test] -fn test_random_vec() -> Result<(), getrandom::Error> { - let os_string = random_os_string(LARGE_LENGTH)?; - let string = os_string.clone().into_raw_vec(); - assert_eq!(os_string.len(), string.len()); - assert_eq!(Ok(os_string), from_vec(string)); - Ok(()) -} - -#[test] -fn test_lossless() -> Result<(), getrandom::Error> { - for _ in 0..ITERATIONS { - let mut string = vec![0; SMALL_LENGTH]; - getrandom(&mut string)?; - if let Ok(os_string) = OsStr::from_raw_bytes(&string) { - let encoded_string = os_string.to_raw_bytes(); - assert_eq!(string, &*encoded_string); - } - } - Ok(()) -} - -#[cfg(feature = "raw_os_str")] -#[test] -fn test_raw() -> Result<(), getrandom::Error> { - use os_str_bytes::RawOsStr; - use os_str_bytes::RawOsString; - - macro_rules! test { - ( - $result:expr , - $method:ident (& $string:ident , & $substring:ident ) - ) => { - #[allow(clippy::bool_assert_comparison)] - { - assert_eq!( - $result, - $string.$method(&$substring), - concat!(stringify!($method), "({:?}, {:?})"), - $string, - $substring, - ); - } - }; - } - - for _ in 0..ITERATIONS { - let mut string = random_os_string(SMALL_LENGTH)?; - let prefix = RawOsStr::new(&string).into_owned(); - let suffix = random_os_string(SMALL_LENGTH)?; - string.push(&suffix); - - let string = RawOsString::new(string); - let suffix = RawOsString::new(suffix); - - test!(true, ends_with_os(&string, &suffix)); - test!(true, starts_with_os(&string, &prefix)); - - if prefix != suffix { - test!(false, ends_with_os(&string, &prefix)); - test!(false, starts_with_os(&string, &suffix)); - } - } - Ok(()) -} diff --git a/tests/raw.rs b/tests/raw.rs deleted file mode 100644 index fe29705..0000000 --- a/tests/raw.rs +++ /dev/null @@ -1,108 +0,0 @@ -#![cfg(feature = "raw_os_str")] - -use std::ffi::OsStr; - -use os_str_bytes::EncodingError; -use os_str_bytes::OsStrBytes; -use os_str_bytes::RawOsStr; - -mod common; -use common::RAW_WTF8_STRING; - -fn from_raw_bytes(string: &[u8]) -> Result<&RawOsStr, EncodingError> { - // SAFETY: The string is validated before conversion. - OsStr::from_raw_bytes(string) - .map(|_| unsafe { common::from_raw_bytes_unchecked(string) }) -} - -#[test] -fn test_ends_with() { - test(true, b""); - test(true, b"r"); - test(true, b"ar"); - test(true, b"bar"); - if cfg!(not(windows)) { - test(true, b"\xA9bar"); - test(true, b"\x92\xA9bar"); - test(true, b"\x9F\x92\xA9bar"); - } - test(cfg!(windows), b"\xED\xB2\xA9bar"); - test(true, b"\xF0\x9F\x92\xA9bar"); - test(true, b"\xED\xA0\xBD\xF0\x9F\x92\xA9bar"); - test(true, b"o\xED\xA0\xBD\xF0\x9F\x92\xA9bar"); - test(true, b"oo\xED\xA0\xBD\xF0\x9F\x92\xA9bar"); - test(true, b"foo\xED\xA0\xBD\xF0\x9F\x92\xA9bar"); - - test(false, b"\xED\xA0\xBDbar"); - test(false, b"\xED\xB2\xA9aar"); - - fn test(result: bool, suffix: &[u8]) { - let suffix = from_raw_bytes(suffix).unwrap(); - assert_eq!(result, RAW_WTF8_STRING.ends_with_os(suffix)); - } -} - -#[test] -fn test_empty_ends_with() { - macro_rules! test { - ( $result:expr , $string:expr , $substring:expr ) => { - #[allow(clippy::bool_assert_comparison)] - { - assert_eq!( - $result, - RawOsStr::from_str($string) - .ends_with_os(RawOsStr::from_str($substring)), - ); - } - }; - } - test!(true, "", ""); - test!(false, "", "r"); - test!(false, "", "ar"); -} - -#[test] -fn test_starts_with() { - test(true, b""); - test(true, b"f"); - test(true, b"fo"); - test(true, b"foo"); - test(true, b"foo\xED\xA0\xBD"); - if cfg!(not(windows)) { - test(true, b"foo\xED\xA0\xBD\xF0"); - test(true, b"foo\xED\xA0\xBD\xF0\x9F"); - test(true, b"foo\xED\xA0\xBD\xF0\x9F\x92"); - } - test(cfg!(windows), b"foo\xED\xA0\xBD\xED\xA0\xBD"); - test(true, b"foo\xED\xA0\xBD\xF0\x9F\x92\xA9"); - test(true, b"foo\xED\xA0\xBD\xF0\x9F\x92\xA9b"); - test(true, b"foo\xED\xA0\xBD\xF0\x9F\x92\xA9ba"); - test(true, b"foo\xED\xA0\xBD\xF0\x9F\x92\xA9bar"); - - test(false, b"foo\xED\xB2\xA9"); - test(false, b"fof\xED\xA0\xBD\xED\xA0\xBD"); - - fn test(result: bool, prefix: &[u8]) { - let prefix = from_raw_bytes(prefix).unwrap(); - assert_eq!(result, RAW_WTF8_STRING.starts_with_os(prefix)); - } -} - -#[test] -fn test_empty_starts_with() { - macro_rules! test { - ( $result:expr , $string:expr , $substring:expr ) => { - #[allow(clippy::bool_assert_comparison)] - { - assert_eq!( - $result, - RawOsStr::from_str($string) - .starts_with_os(RawOsStr::from_str($substring)), - ); - } - }; - } - test!(true, "", ""); - test!(false, "", "f"); - test!(false, "", "fo"); -} |