aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJeff Vander Stoep <jeffv@google.com>2022-12-13 09:19:21 +0100
committerJeff Vander Stoep <jeffv@google.com>2022-12-13 09:19:21 +0100
commitf983a87abb31060fd161399e657d20fa3940a57d (patch)
treee74967a2ccaad34b918be498fe7f0d7b64c90527
parent5d2fd3007cb69905e0eb3397527471c25859c651 (diff)
downloados_str_bytes-f983a87abb31060fd161399e657d20fa3940a57d.tar.gz
Update os_str_bytes to 6.4.1
Test: Treehugger Change-Id: Ia699ac963537a1a53c8c18ad0451db025ea1fada
-rw-r--r--Android.bp145
-rw-r--r--Cargo.toml81
-rw-r--r--Cargo.toml.orig34
l---------LICENSE2
-rw-r--r--METADATA21
-rw-r--r--MODULE_LICENSE_APACHE2 (renamed from MODULE_LICENSE_MIT)0
-rw-r--r--README.md9
-rw-r--r--cargo2android.json5
-rw-r--r--rustfmt.toml1
-rw-r--r--src/common/mod.rs2
-rw-r--r--src/common/raw.rs7
-rw-r--r--src/iter.rs58
-rw-r--r--src/lib.rs349
-rw-r--r--src/pattern.rs8
-rw-r--r--src/raw_str.rs785
-rw-r--r--src/util.rs1
-rw-r--r--src/wasm/mod.rs (renamed from src/wasm32/mod.rs)16
-rw-r--r--src/wasm/raw.rs (renamed from src/wasm32/raw.rs)35
-rw-r--r--src/windows/mod.rs101
-rw-r--r--src/windows/raw.rs12
-rw-r--r--src/windows/wtf8/code_points.rs40
-rw-r--r--src/windows/wtf8/convert.rs47
-rw-r--r--src/windows/wtf8/string.rs42
-rw-r--r--tests/common.rs94
-rw-r--r--tests/debug.rs34
-rw-r--r--tests/edge_cases.rs7
-rw-r--r--tests/index.rs86
-rw-r--r--tests/integration.rs75
-rw-r--r--tests/random.rs126
-rw-r--r--tests/raw.rs108
30 files changed, 1168 insertions, 1163 deletions
diff --git a/Android.bp b/Android.bp
index a7d2325..77ea655 100644
--- a/Android.bp
+++ b/Android.bp
@@ -1,51 +1,14 @@
-// This file is generated by cargo2android.py --run --device --tests.
+// This file is generated by cargo2android.py --config cargo2android.json.
// Do not modify this file as changes will be overridden on upgrade.
-package {
- default_applicable_licenses: ["external_rust_crates_os_str_bytes_license"],
-}
-
-// Added automatically by a large-scale-change that took the approach of
-// 'apply every license found to every target'. While this makes sure we respect
-// every license restriction, it may not be entirely correct.
-//
-// e.g. GPL in an MIT project might only apply to the contrib/ directory.
-//
-// Please consider splitting the single license below into multiple licenses,
-// taking care not to lose any license_kind information, and overriding the
-// default license using the 'licenses: [...]' property on targets as needed.
-//
-// For unused files, consider creating a 'fileGroup' with "//visibility:private"
-// to attach the license to, and including a comment whether the files may be
-// used in the current project.
-//
-// large-scale-change included anything that looked like it might be a license
-// text as a license_text. e.g. LICENSE, NOTICE, COPYING etc.
-//
-// Please consider removing redundant or irrelevant files from 'license_text:'.
-// See: http://go/android-license-faq
-license {
- name: "external_rust_crates_os_str_bytes_license",
- visibility: [":__subpackages__"],
- license_kinds: [
- "SPDX-license-identifier-Apache-2.0",
- "SPDX-license-identifier-MIT",
- ],
- license_text: [
- "COPYRIGHT",
- "LICENSE-APACHE",
- "LICENSE-MIT",
- ],
-}
-
rust_library {
name: "libos_str_bytes",
host_supported: true,
crate_name: "os_str_bytes",
cargo_env_compat: true,
- cargo_pkg_version: "6.1.0",
+ cargo_pkg_version: "6.4.1",
srcs: ["src/lib.rs"],
edition: "2021",
features: [
@@ -61,107 +24,3 @@ rust_library {
"//apex_available:anyapex",
],
}
-
-rust_test {
- name: "os_str_bytes_test_src_lib",
- host_supported: true,
- crate_name: "os_str_bytes",
- cargo_env_compat: true,
- cargo_pkg_version: "6.1.0",
- srcs: ["src/lib.rs"],
- test_suites: ["general-tests"],
- auto_gen_config: true,
- test_options: {
- unit_test: true,
- },
- edition: "2021",
- features: [
- "default",
- "memchr",
- "raw_os_str",
- ],
- rustlibs: [
- "libgetrandom",
- "libmemchr",
- ],
-}
-
-rust_defaults {
- name: "os_str_bytes_test_defaults",
- crate_name: "os_str_bytes",
- cargo_env_compat: true,
- cargo_pkg_version: "6.1.0",
- test_suites: ["general-tests"],
- auto_gen_config: true,
- edition: "2021",
- features: [
- "default",
- "memchr",
- "raw_os_str",
- ],
- rustlibs: [
- "libgetrandom",
- "libmemchr",
- "libos_str_bytes",
- ],
-}
-
-rust_test {
- name: "os_str_bytes_test_tests_debug",
- defaults: ["os_str_bytes_test_defaults"],
- host_supported: true,
- srcs: ["tests/debug.rs"],
- test_options: {
- unit_test: true,
- },
-}
-
-rust_test {
- name: "os_str_bytes_test_tests_edge_cases",
- defaults: ["os_str_bytes_test_defaults"],
- host_supported: true,
- srcs: ["tests/edge_cases.rs"],
- test_options: {
- unit_test: true,
- },
-}
-
-rust_test {
- name: "os_str_bytes_test_tests_index",
- defaults: ["os_str_bytes_test_defaults"],
- host_supported: true,
- srcs: ["tests/index.rs"],
- test_options: {
- unit_test: true,
- },
-}
-
-rust_test {
- name: "os_str_bytes_test_tests_integration",
- defaults: ["os_str_bytes_test_defaults"],
- host_supported: true,
- srcs: ["tests/integration.rs"],
- test_options: {
- unit_test: true,
- },
-}
-
-rust_test {
- name: "os_str_bytes_test_tests_random",
- defaults: ["os_str_bytes_test_defaults"],
- host_supported: true,
- srcs: ["tests/random.rs"],
- test_options: {
- unit_test: true,
- },
-}
-
-rust_test {
- name: "os_str_bytes_test_tests_raw",
- defaults: ["os_str_bytes_test_defaults"],
- host_supported: true,
- srcs: ["tests/raw.rs"],
- test_options: {
- unit_test: true,
- },
-}
diff --git a/Cargo.toml b/Cargo.toml
index 1f7b398..e33d3d4 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,33 +1,78 @@
+# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
+#
+# When uploading crates to the registry Cargo will automatically
+# "normalize" Cargo.toml files for maximal compatibility
+# with all versions of Cargo and also rewrite `path` dependencies
+# to registry (e.g., crates.io) dependencies.
+#
+# If you are reading this file be aware that the original Cargo.toml
+# will likely look very different (and much more reasonable).
+# See Cargo.toml.orig for the original contents.
+
[package]
-name = "os_str_bytes"
-version = "6.1.0"
-authors = ["dylni"]
edition = "2021"
rust-version = "1.57.0"
+name = "os_str_bytes"
+version = "6.4.1"
+authors = ["dylni"]
+exclude = [
+ ".*",
+ "tests.rs",
+ "/rustfmt.toml",
+ "/src/bin",
+ "/tests",
+]
description = """
-Utilities for converting between byte sequences and platform-native strings
+Convert between byte sequences and platform-native strings
"""
readme = "README.md"
-repository = "https://github.com/dylni/os_str_bytes"
+keywords = [
+ "bytes",
+ "osstr",
+ "osstring",
+ "path",
+ "windows",
+]
+categories = [
+ "command-line-interface",
+ "development-tools::ffi",
+ "encoding",
+ "os",
+ "rust-patterns",
+]
license = "MIT OR Apache-2.0"
-keywords = ["bytes", "osstr", "osstring", "path", "windows"]
-categories = ["command-line-interface", "development-tools::ffi", "encoding", "os", "rust-patterns"]
-exclude = [".*", "/rustfmt.toml", "/tests"]
+repository = "https://github.com/dylni/os_str_bytes"
[package.metadata.docs.rs]
all-features = true
-rustc-args = ["--cfg", "os_str_bytes_docs_rs"]
-rustdoc-args = ["--cfg", "os_str_bytes_docs_rs"]
+rustc-args = [
+ "--cfg",
+ "os_str_bytes_docs_rs",
+]
+rustdoc-args = [
+ "--cfg",
+ "os_str_bytes_docs_rs",
+]
-[dependencies]
-memchr = { version = "2.4", optional = true }
-print_bytes = { version = "0.6", optional = true }
-uniquote = { version = "3.0", optional = true }
+[dependencies.memchr]
+version = "2.4"
+optional = true
-[dev-dependencies]
-getrandom = "0.2"
+[dependencies.print_bytes]
+version = "0.7"
+optional = true
-[features]
-default = ["memchr", "raw_os_str"]
+[dependencies.uniquote]
+version = "3.0"
+optional = true
+[dev-dependencies.getrandom]
+version = "0.2"
+
+[features]
+checked_conversions = []
+default = [
+ "memchr",
+ "raw_os_str",
+]
raw_os_str = []
diff --git a/Cargo.toml.orig b/Cargo.toml.orig
new file mode 100644
index 0000000..dd388f2
--- /dev/null
+++ b/Cargo.toml.orig
@@ -0,0 +1,34 @@
+[package]
+name = "os_str_bytes"
+version = "6.4.1"
+authors = ["dylni"]
+edition = "2021"
+rust-version = "1.57.0"
+description = """
+Convert between byte sequences and platform-native strings
+"""
+readme = "README.md"
+repository = "https://github.com/dylni/os_str_bytes"
+license = "MIT OR Apache-2.0"
+keywords = ["bytes", "osstr", "osstring", "path", "windows"]
+categories = ["command-line-interface", "development-tools::ffi", "encoding", "os", "rust-patterns"]
+exclude = [".*", "tests.rs", "/rustfmt.toml", "/src/bin", "/tests"]
+
+[package.metadata.docs.rs]
+all-features = true
+rustc-args = ["--cfg", "os_str_bytes_docs_rs"]
+rustdoc-args = ["--cfg", "os_str_bytes_docs_rs"]
+
+[dependencies]
+memchr = { version = "2.4", optional = true }
+print_bytes = { version = "0.7", optional = true }
+uniquote = { version = "3.0", optional = true }
+
+[dev-dependencies]
+getrandom = "0.2"
+
+[features]
+default = ["memchr", "raw_os_str"]
+
+checked_conversions = []
+raw_os_str = []
diff --git a/LICENSE b/LICENSE
index 7f9a88e..6b579aa 120000
--- a/LICENSE
+++ b/LICENSE
@@ -1 +1 @@
-LICENSE-MIT \ No newline at end of file
+LICENSE-APACHE \ No newline at end of file
diff --git a/METADATA b/METADATA
index e2c6635..b1fd452 100644
--- a/METADATA
+++ b/METADATA
@@ -1,13 +1,20 @@
name: "os_str_bytes"
-description:
- "This crate allows interacting with the data stored by OsStr and OsString, without resorting to panics or corruption for invalid UTF-8. Thus, methods can be used that are already defined on [u8] and Vec<u8>."
-
+description: "()"
third_party {
url {
- type: GIT
- value: "https://github.com/dylni/os_str_bytes"
+ type: HOMEPAGE
+ value: "https://crates.io/crates/os_str_bytes"
}
- version: "6.1.0"
- last_upgrade_date { year: 2022 month: 6 day: 29 }
+ url {
+ type: ARCHIVE
+ value: "https://static.crates.io/crates/os_str_bytes/os_str_bytes-6.4.1.crate"
+ }
+ version: "6.4.1"
+ # Dual-licensed, using the least restrictive per go/thirdpartylicenses#same.
license_type: NOTICE
+ last_upgrade_date {
+ year: 2022
+ month: 12
+ day: 13
+ }
}
diff --git a/MODULE_LICENSE_MIT b/MODULE_LICENSE_APACHE2
index e69de29..e69de29 100644
--- a/MODULE_LICENSE_MIT
+++ b/MODULE_LICENSE_APACHE2
diff --git a/README.md b/README.md
index d0fe83b..a008f08 100644
--- a/README.md
+++ b/README.md
@@ -19,7 +19,7 @@ Add the following lines to your "Cargo.toml" file:
```toml
[dependencies]
-os_str_bytes = "6.1"
+os_str_bytes = "6.4"
```
See the [documentation] for available functionality and examples.
@@ -69,6 +69,11 @@ The minimum supported Rust toolchain version depends on the platform:
<td><code>*-*-windows-*</code></td>
<td>1.57.0</td>
</tr>
+ <tr>
+ <td>Xous</td>
+ <td><code>*-*-xous-*</code></td>
+ <td>unstable</td>
+ </tr>
</table>
Minor version updates may increase these version requirements. However, the
@@ -78,7 +83,7 @@ crate's minor version:
```toml
[dependencies]
-os_str_bytes = "~6.1"
+os_str_bytes = "~6.4"
```
## License
diff --git a/cargo2android.json b/cargo2android.json
new file mode 100644
index 0000000..ff6df50
--- /dev/null
+++ b/cargo2android.json
@@ -0,0 +1,5 @@
+{
+ "device": true,
+ "run": true,
+ "tests": true
+}
diff --git a/rustfmt.toml b/rustfmt.toml
deleted file mode 100644
index a1ffd27..0000000
--- a/rustfmt.toml
+++ /dev/null
@@ -1 +0,0 @@
-max_width = 79
diff --git a/src/common/mod.rs b/src/common/mod.rs
index dd49890..e28aba6 100644
--- a/src/common/mod.rs
+++ b/src/common/mod.rs
@@ -12,6 +12,8 @@ use std::os::solid as os;
use std::os::unix as os;
#[cfg(target_os = "wasi")]
use std::os::wasi as os;
+#[cfg(target_os = "xous")]
+use std::os::xous as os;
use os::ffi::OsStrExt;
use os::ffi::OsStringExt;
diff --git a/src/common/raw.rs b/src/common/raw.rs
index 070a62c..97d0353 100644
--- a/src/common/raw.rs
+++ b/src/common/raw.rs
@@ -1,12 +1,19 @@
use std::fmt;
use std::fmt::Formatter;
+use super::Result;
+
#[inline(always)]
pub(crate) const fn is_continuation(_: u8) -> bool {
false
}
#[inline(always)]
+pub(crate) fn validate_bytes(_: &[u8]) -> Result<()> {
+ Ok(())
+}
+
+#[inline(always)]
pub(crate) fn decode_code_point(_: &[u8]) -> u32 {
unreachable!();
}
diff --git a/src/iter.rs b/src/iter.rs
index 5cb7299..03ff982 100644
--- a/src/iter.rs
+++ b/src/iter.rs
@@ -2,11 +2,11 @@
#![cfg_attr(os_str_bytes_docs_rs, doc(cfg(feature = "raw_os_str")))]
+use std::convert;
use std::fmt;
use std::fmt::Debug;
use std::fmt::Formatter;
use std::iter::FusedIterator;
-use std::str;
use super::pattern::Encoded;
use super::Pattern;
@@ -29,6 +29,7 @@ impl<'a, P> Split<'a, P>
where
P: Pattern,
{
+ #[track_caller]
pub(super) fn new(string: &'a RawOsStr, pat: P) -> Self {
let pat = pat.__encode();
assert!(
@@ -56,31 +57,6 @@ macro_rules! impl_next {
}};
}
-impl<P> DoubleEndedIterator for Split<'_, P>
-where
- P: Pattern,
-{
- fn next_back(&mut self) -> Option<Self::Item> {
- impl_next!(self, rsplit_once_raw, |(prefix, suffix)| (suffix, prefix))
- }
-}
-
-impl<'a, P> Iterator for Split<'a, P>
-where
- P: Pattern,
-{
- type Item = &'a RawOsStr;
-
- #[inline]
- fn last(mut self) -> Option<Self::Item> {
- self.next_back()
- }
-
- fn next(&mut self) -> Option<Self::Item> {
- impl_next!(self, split_once_raw, |x| x)
- }
-}
-
impl<P> Clone for Split<'_, P>
where
P: Pattern,
@@ -102,12 +78,34 @@ where
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
f.debug_struct("Split")
.field("string", &self.string)
- .field(
- "pat",
- &str::from_utf8(self.pat.__get()).expect("invalid pattern"),
- )
+ .field("pat", &self.pat)
.finish()
}
}
+impl<P> DoubleEndedIterator for Split<'_, P>
+where
+ P: Pattern,
+{
+ fn next_back(&mut self) -> Option<Self::Item> {
+ impl_next!(self, rsplit_once_raw, |(prefix, suffix)| (suffix, prefix))
+ }
+}
+
impl<P> FusedIterator for Split<'_, P> where P: Pattern {}
+
+impl<'a, P> Iterator for Split<'a, P>
+where
+ P: Pattern,
+{
+ type Item = &'a RawOsStr;
+
+ #[inline]
+ fn last(mut self) -> Option<Self::Item> {
+ self.next_back()
+ }
+
+ fn next(&mut self) -> Option<Self::Item> {
+ impl_next!(self, split_once_raw, convert::identity)
+ }
+}
diff --git a/src/lib.rs b/src/lib.rs
index f73c2d5..40154c9 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -21,9 +21,9 @@
//! However, the following invariants will always be upheld:
//!
//! - The encoding will be compatible with UTF-8. In particular, splitting an
-//! encoded byte sequence by a UTF-8–encoded character always produces other
-//! valid byte sequences. They can be re-encoded without error using
-//! [`OsStrBytes::from_raw_bytes`] and similar methods.
+//! encoded byte sequence by a UTF-8&ndash;encoded character always produces
+//! other valid byte sequences. They can be re-encoded without error using
+//! [`RawOsString::into_os_string`] and similar methods.
//!
//! - All characters valid in platform strings are representable. [`OsStr`] and
//! [`OsString`] can always be losslessly reconstructed from extracted bytes.
@@ -73,10 +73,27 @@
//! For more information, see [`RawOsStr`][memchr complexity].
//!
//! - **raw\_os\_str** -
-//! Enables use of [`RawOsStr`] and [`RawOsString`].
+//! Provides:
+//! - [`iter`]
+//! - [`Pattern`]
+//! - [`RawOsStr`]
+//! - [`RawOsStrCow`]
+//! - [`RawOsString`]
//!
//! ### Optional Features
//!
+//! - **checked\_conversions** -
+//! Provides:
+//! - [`EncodingError`]
+//! - [`OsStrBytes::from_raw_bytes`]
+//! - [`OsStringBytes::from_raw_vec`]
+//! - [`RawOsStr::from_raw_bytes`]
+//! - [`RawOsString::from_raw_vec`]
+//!
+//! Because this feature should not be used in libraries, the
+//! "OS_STR_BYTES_CHECKED_CONVERSIONS" environment variable must be defined
+//! during compilation.
+//!
//! - **print\_bytes** -
//! Provides implementations of [`print_bytes::ToBytes`] for [`RawOsStr`] and
//! [`RawOsString`].
@@ -98,21 +115,23 @@
//!
//! # Complexity
//!
-//! The time complexities of trait methods will vary based on what
-//! functionality is available for the platform. At worst, they will all be
-//! linear, but some can take constant time. For example,
-//! [`OsStringBytes::from_raw_vec`] might be able to reuse the allocation for
-//! its argument.
+//! Conversion method complexities will vary based on what functionality is
+//! available for the platform. At worst, they will all be linear, but some can
+//! take constant time. For example, [`RawOsString::into_os_string`] might be
+//! able to reuse its allocation.
//!
//! # Examples
//!
//! ```
+//! # use std::io;
+//! #
+//! # #[cfg(feature = "raw_os_str")]
+//! # {
//! # #[cfg(any())]
//! use std::env;
//! use std::fs;
-//! # use std::io;
//!
-//! use os_str_bytes::OsStrBytes;
+//! use os_str_bytes::RawOsStr;
//!
//! # mod env {
//! # use std::env;
@@ -126,12 +145,13 @@
//! # }
//! #
//! for file in env::args_os().skip(1) {
-//! if file.to_raw_bytes().first() != Some(&b'-') {
+//! if !RawOsStr::new(&file).starts_with('-') {
//! let string = "Hello, world!";
//! fs::write(&file, string)?;
//! assert_eq!(string, fs::read_to_string(file)?);
//! }
//! }
+//! # }
//! #
//! # Ok::<_, io::Error>(())
//! ```
@@ -146,6 +166,7 @@
//! [sealed]: https://rust-lang.github.io/api-guidelines/future-proofing.html#c-sealed
//! [print\_bytes]: https://crates.io/crates/print_bytes
+#![cfg_attr(not(feature = "checked_conversions"), allow(deprecated))]
// Only require a nightly compiler when building documentation for docs.rs.
// This is a private option that should not be used.
// https://github.com/rust-lang/docs.rs/issues/147#issuecomment-389544407
@@ -170,6 +191,41 @@ use std::path::Path;
use std::path::PathBuf;
use std::result;
+macro_rules! if_checked_conversions {
+ ( $($item:item)+ ) => {
+ $(
+ #[cfg(feature = "checked_conversions")]
+ $item
+ )+
+ };
+}
+
+#[cfg(not(os_str_bytes_docs_rs))]
+if_checked_conversions! {
+ const _: &str = env!(
+ "OS_STR_BYTES_CHECKED_CONVERSIONS",
+ "The 'OS_STR_BYTES_CHECKED_CONVERSIONS' environment variable must be \
+ defined to use the 'checked_conversions' feature.",
+ );
+}
+
+#[rustfmt::skip]
+macro_rules! deprecated_checked_conversion {
+ ( $message:expr , $item:item ) => {
+ #[cfg_attr(
+ not(feature = "checked_conversions"),
+ deprecated = $message
+ )]
+ $item
+ };
+}
+
+macro_rules! expect_encoded {
+ ( $result:expr ) => {
+ $result.expect("invalid raw bytes")
+ };
+}
+
macro_rules! if_raw_str {
( $($item:item)+ ) => {
$(
@@ -180,16 +236,24 @@ macro_rules! if_raw_str {
}
#[cfg_attr(
- all(target_arch = "wasm32", target_os = "unknown"),
- path = "wasm32/mod.rs"
+ all(target_family = "wasm", target_os = "unknown"),
+ path = "wasm/mod.rs"
)]
#[cfg_attr(windows, path = "windows/mod.rs")]
#[cfg_attr(
- not(any(all(target_arch = "wasm32", target_os = "unknown"), windows)),
+ not(any(all(target_family = "wasm", target_os = "unknown"), windows)),
path = "common/mod.rs"
)]
mod imp;
+#[cfg(any(
+ all(
+ feature = "raw_os_str",
+ target_family = "wasm",
+ target_os = "unknown",
+ ),
+ windows,
+))]
mod util;
if_raw_str! {
@@ -200,32 +264,43 @@ if_raw_str! {
mod raw_str;
pub use raw_str::RawOsStr;
+ pub use raw_str::RawOsStrCow;
pub use raw_str::RawOsString;
}
-/// The error that occurs when a byte sequence is not representable in the
-/// platform encoding.
-///
-/// [`Result::unwrap`] should almost always be called on results containing
-/// this error. It should be known whether or not byte sequences are properly
-/// encoded for the platform, since [the module-level documentation][encoding]
-/// discourages using encoded bytes in interchange. Results are returned
-/// primarily to make panicking behavior explicit.
-///
-/// On Unix, this error is never returned, but [`OsStrExt`] or [`OsStringExt`]
-/// should be used instead if that needs to be guaranteed.
-///
-/// [encoding]: self#encoding
-/// [`OsStrExt`]: ::std::os::unix::ffi::OsStrExt
-/// [`OsStringExt`]: ::std::os::unix::ffi::OsStringExt
-/// [`Result::unwrap`]: ::std::result::Result::unwrap
-#[derive(Debug, Eq, PartialEq)]
-pub struct EncodingError(imp::EncodingError);
+deprecated_checked_conversion! {
+ "use `OsStrBytes::assert_from_raw_bytes` or \
+ `OsStringBytes::assert_from_raw_vec` instead, or enable the \
+ 'checked_conversions' feature",
+ /// The error that occurs when a byte sequence is not representable in the
+ /// platform encoding.
+ ///
+ /// [`Result::unwrap`] should almost always be called on results containing
+ /// this error. It should be known whether or not byte sequences are
+ /// properly encoded for the platform, since [the module-level
+ /// documentation][encoding] discourages using encoded bytes in
+ /// interchange. Results are returned primarily to make panicking behavior
+ /// explicit.
+ ///
+ /// On Unix, this error is never returned, but [`OsStrExt`] or
+ /// [`OsStringExt`] should be used instead if that needs to be guaranteed.
+ ///
+ /// [encoding]: self#encoding
+ /// [`OsStrExt`]: ::std::os::unix::ffi::OsStrExt
+ /// [`OsStringExt`]: ::std::os::unix::ffi::OsStringExt
+ /// [`Result::unwrap`]: ::std::result::Result::unwrap
+ #[derive(Clone, Debug, Eq, PartialEq)]
+ #[cfg_attr(
+ os_str_bytes_docs_rs,
+ doc(cfg(feature = "checked_conversions"))
+ )]
+ pub struct EncodingError(imp::EncodingError);
+}
impl Display for EncodingError {
#[inline]
- fn fmt(&self, formatter: &mut Formatter<'_>) -> fmt::Result {
- self.0.fmt(formatter)
+ fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
+ self.0.fmt(f)
}
}
@@ -233,6 +308,25 @@ impl Error for EncodingError {}
type Result<T> = result::Result<T, EncodingError>;
+fn from_raw_bytes<'a, S>(
+ string: S,
+) -> result::Result<Cow<'a, OsStr>, imp::EncodingError>
+where
+ S: Into<Cow<'a, [u8]>>,
+{
+ match string.into() {
+ Cow::Borrowed(string) => imp::os_str_from_bytes(string),
+ Cow::Owned(string) => imp::os_string_from_vec(string).map(Cow::Owned),
+ }
+}
+
+fn cow_os_str_into_path(string: Cow<'_, OsStr>) -> Cow<'_, Path> {
+ match string {
+ Cow::Borrowed(string) => Cow::Borrowed(Path::new(string)),
+ Cow::Owned(string) => Cow::Owned(string.into()),
+ }
+}
+
/// A platform agnostic variant of [`OsStrExt`].
///
/// For more information, see [the module-level documentation][module].
@@ -240,14 +334,12 @@ type Result<T> = result::Result<T, EncodingError>;
/// [module]: self
/// [`OsStrExt`]: ::std::os::unix::ffi::OsStrExt
pub trait OsStrBytes: private::Sealed + ToOwned {
- /// Converts a byte slice into an equivalent platform-native string.
- ///
- /// Provided byte strings should always be valid for the [unspecified
- /// encoding] used by this crate.
+ /// Converts a byte string into an equivalent platform-native string.
///
- /// # Errors
+ /// # Panics
///
- /// See documentation for [`EncodingError`].
+ /// Panics if the string is not valid for the [unspecified encoding] used
+ /// by this crate.
///
/// # Examples
///
@@ -260,32 +352,70 @@ pub trait OsStrBytes: private::Sealed + ToOwned {
///
/// let os_string = env::current_exe()?;
/// let os_bytes = os_string.to_raw_bytes();
- /// assert_eq!(os_string, OsStr::from_raw_bytes(os_bytes).unwrap());
+ /// assert_eq!(os_string, OsStr::assert_from_raw_bytes(os_bytes));
/// #
/// # Ok::<_, io::Error>(())
/// ```
///
/// [unspecified encoding]: self#encoding
- fn from_raw_bytes<'a, S>(string: S) -> Result<Cow<'a, Self>>
+ #[must_use = "method should not be used for validation"]
+ #[track_caller]
+ fn assert_from_raw_bytes<'a, S>(string: S) -> Cow<'a, Self>
where
S: Into<Cow<'a, [u8]>>;
- /// Converts a platform-native string into an equivalent byte slice.
+ deprecated_checked_conversion! {
+ "use `assert_from_raw_bytes` instead, or enable the \
+ 'checked_conversions' feature",
+ /// Converts a byte string into an equivalent platform-native string.
+ ///
+ /// [`assert_from_raw_bytes`] should almost always be used instead. For
+ /// more information, see [`EncodingError`].
+ ///
+ /// # Errors
+ ///
+ /// See documentation for [`EncodingError`].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use std::env;
+ /// use std::ffi::OsStr;
+ /// # use std::io;
+ ///
+ /// use os_str_bytes::OsStrBytes;
+ ///
+ /// let os_string = env::current_exe()?;
+ /// let os_bytes = os_string.to_raw_bytes();
+ /// assert_eq!(os_string, OsStr::from_raw_bytes(os_bytes).unwrap());
+ /// #
+ /// # Ok::<_, io::Error>(())
+ /// ```
+ ///
+ /// [`assert_from_raw_bytes`]: Self::assert_from_raw_bytes
+ #[cfg_attr(
+ os_str_bytes_docs_rs,
+ doc(cfg(feature = "checked_conversions"))
+ )]
+ fn from_raw_bytes<'a, S>(string: S) -> Result<Cow<'a, Self>>
+ where
+ S: Into<Cow<'a, [u8]>>;
+ }
+
+ /// Converts a platform-native string into an equivalent byte string.
///
- /// The returned bytes string will use an [unspecified encoding].
+ /// The returned string will use an [unspecified encoding].
///
/// # Examples
///
/// ```
- /// use std::env;
- /// # use std::io;
+ /// use std::ffi::OsStr;
///
/// use os_str_bytes::OsStrBytes;
///
- /// let os_string = env::current_exe()?;
- /// println!("{:?}", os_string.to_raw_bytes());
- /// #
- /// # Ok::<_, io::Error>(())
+ /// let string = "foobar";
+ /// let os_string = OsStr::new(string);
+ /// assert_eq!(string.as_bytes(), &*os_string.to_raw_bytes());
/// ```
///
/// [unspecified encoding]: self#encoding
@@ -295,18 +425,19 @@ pub trait OsStrBytes: private::Sealed + ToOwned {
impl OsStrBytes for OsStr {
#[inline]
+ fn assert_from_raw_bytes<'a, S>(string: S) -> Cow<'a, Self>
+ where
+ S: Into<Cow<'a, [u8]>>,
+ {
+ expect_encoded!(from_raw_bytes(string))
+ }
+
+ #[inline]
fn from_raw_bytes<'a, S>(string: S) -> Result<Cow<'a, Self>>
where
S: Into<Cow<'a, [u8]>>,
{
- match string.into() {
- Cow::Borrowed(string) => {
- imp::os_str_from_bytes(string).map_err(EncodingError)
- }
- Cow::Owned(string) => {
- OsStringBytes::from_raw_vec(string).map(Cow::Owned)
- }
- }
+ from_raw_bytes(string).map_err(EncodingError)
}
#[inline]
@@ -317,14 +448,19 @@ impl OsStrBytes for OsStr {
impl OsStrBytes for Path {
#[inline]
+ fn assert_from_raw_bytes<'a, S>(string: S) -> Cow<'a, Self>
+ where
+ S: Into<Cow<'a, [u8]>>,
+ {
+ cow_os_str_into_path(OsStr::assert_from_raw_bytes(string))
+ }
+
+ #[inline]
fn from_raw_bytes<'a, S>(string: S) -> Result<Cow<'a, Self>>
where
S: Into<Cow<'a, [u8]>>,
{
- OsStr::from_raw_bytes(string).map(|os_string| match os_string {
- Cow::Borrowed(os_string) => Cow::Borrowed(Self::new(os_string)),
- Cow::Owned(os_string) => Cow::Owned(os_string.into()),
- })
+ OsStr::from_raw_bytes(string).map(cow_os_str_into_path)
}
#[inline]
@@ -340,14 +476,12 @@ impl OsStrBytes for Path {
/// [module]: self
/// [`OsStringExt`]: ::std::os::unix::ffi::OsStringExt
pub trait OsStringBytes: private::Sealed + Sized {
- /// Converts a byte vector into an equivalent platform-native string.
+ /// Converts a byte string into an equivalent platform-native string.
///
- /// Provided byte strings should always be valid for the [unspecified
- /// encoding] used by this crate.
+ /// # Panics
///
- /// # Errors
- ///
- /// See documentation for [`EncodingError`].
+ /// Panics if the string is not valid for the [unspecified encoding] used
+ /// by this crate.
///
/// # Examples
///
@@ -360,30 +494,66 @@ pub trait OsStringBytes: private::Sealed + Sized {
///
/// let os_string = env::current_exe()?;
/// let os_bytes = os_string.clone().into_raw_vec();
- /// assert_eq!(os_string, OsString::from_raw_vec(os_bytes).unwrap());
+ /// assert_eq!(os_string, OsString::assert_from_raw_vec(os_bytes));
/// #
/// # Ok::<_, io::Error>(())
/// ```
///
/// [unspecified encoding]: self#encoding
- fn from_raw_vec(string: Vec<u8>) -> Result<Self>;
+ #[must_use = "method should not be used for validation"]
+ #[track_caller]
+ fn assert_from_raw_vec(string: Vec<u8>) -> Self;
- /// Converts a platform-native string into an equivalent byte vector.
+ deprecated_checked_conversion! {
+ "use `assert_from_raw_vec` instead, or enable the \
+ 'checked_conversions' feature",
+ /// Converts a byte string into an equivalent platform-native string.
+ ///
+ /// [`assert_from_raw_vec`] should almost always be used instead. For
+ /// more information, see [`EncodingError`].
+ ///
+ /// # Errors
+ ///
+ /// See documentation for [`EncodingError`].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use std::env;
+ /// use std::ffi::OsString;
+ /// # use std::io;
+ ///
+ /// use os_str_bytes::OsStringBytes;
+ ///
+ /// let os_string = env::current_exe()?;
+ /// let os_bytes = os_string.clone().into_raw_vec();
+ /// assert_eq!(os_string, OsString::from_raw_vec(os_bytes).unwrap());
+ /// #
+ /// # Ok::<_, io::Error>(())
+ /// ```
+ ///
+ /// [`assert_from_raw_vec`]: Self::assert_from_raw_vec
+ #[cfg_attr(
+ os_str_bytes_docs_rs,
+ doc(cfg(feature = "checked_conversions"))
+ )]
+ fn from_raw_vec(string: Vec<u8>) -> Result<Self>;
+ }
+
+ /// Converts a platform-native string into an equivalent byte string.
///
- /// The returned byte string will use an [unspecified encoding].
+ /// The returned string will use an [unspecified encoding].
///
/// # Examples
///
/// ```
- /// use std::env;
- /// # use std::io;
+ /// use std::ffi::OsString;
///
/// use os_str_bytes::OsStringBytes;
///
- /// let os_string = env::current_exe()?;
- /// println!("{:?}", os_string.into_raw_vec());
- /// #
- /// # Ok::<_, io::Error>(())
+ /// let string = "foobar".to_owned();
+ /// let os_string: OsString = string.clone().into();
+ /// assert_eq!(string.into_bytes(), os_string.into_raw_vec());
/// ```
///
/// [unspecified encoding]: self#encoding
@@ -393,6 +563,11 @@ pub trait OsStringBytes: private::Sealed + Sized {
impl OsStringBytes for OsString {
#[inline]
+ fn assert_from_raw_vec(string: Vec<u8>) -> Self {
+ expect_encoded!(imp::os_string_from_vec(string))
+ }
+
+ #[inline]
fn from_raw_vec(string: Vec<u8>) -> Result<Self> {
imp::os_string_from_vec(string).map_err(EncodingError)
}
@@ -405,6 +580,11 @@ impl OsStringBytes for OsString {
impl OsStringBytes for PathBuf {
#[inline]
+ fn assert_from_raw_vec(string: Vec<u8>) -> Self {
+ OsString::assert_from_raw_vec(string).into()
+ }
+
+ #[inline]
fn from_raw_vec(string: Vec<u8>) -> Result<Self> {
OsString::from_raw_vec(string).map(Into::into)
}
@@ -421,7 +601,14 @@ mod private {
use std::path::Path;
use std::path::PathBuf;
+ if_raw_str! {
+ use std::borrow::Cow;
+
+ use super::RawOsStr;
+ }
+
pub trait Sealed {}
+
impl Sealed for char {}
impl Sealed for OsStr {}
impl Sealed for OsString {}
@@ -429,4 +616,8 @@ mod private {
impl Sealed for PathBuf {}
impl Sealed for &str {}
impl Sealed for &String {}
+
+ if_raw_str! {
+ impl Sealed for Cow<'_, RawOsStr> {}
+ }
}
diff --git a/src/pattern.rs b/src/pattern.rs
index 267a679..11f86bf 100644
--- a/src/pattern.rs
+++ b/src/pattern.rs
@@ -1,24 +1,24 @@
+use std::fmt::Debug;
+
use super::private;
pub trait Encoded {
fn __get(&self) -> &[u8];
}
-#[derive(Clone)]
+#[derive(Clone, Debug)]
pub struct EncodedChar {
buffer: [u8; 4],
length: usize,
}
impl Encoded for EncodedChar {
- #[inline]
fn __get(&self) -> &[u8] {
&self.buffer[..self.length]
}
}
impl Encoded for &str {
- #[inline]
fn __get(&self) -> &[u8] {
self.as_bytes()
}
@@ -35,7 +35,7 @@ impl Encoded for &str {
#[cfg_attr(os_str_bytes_docs_rs, doc(cfg(feature = "raw_os_str")))]
pub trait Pattern: private::Sealed {
#[doc(hidden)]
- type __Encoded: Clone + Encoded;
+ type __Encoded: Clone + Debug + Encoded;
#[doc(hidden)]
fn __encode(self) -> Self::__Encoded;
diff --git a/src/raw_str.rs b/src/raw_str.rs
index ccec858..659b34d 100644
--- a/src/raw_str.rs
+++ b/src/raw_str.rs
@@ -16,6 +16,7 @@ use std::ops::RangeFull;
use std::ops::RangeInclusive;
use std::ops::RangeTo;
use std::ops::RangeToInclusive;
+use std::result;
use std::str;
#[cfg(feature = "memchr")]
@@ -23,73 +24,54 @@ use memchr::memmem::find;
#[cfg(feature = "memchr")]
use memchr::memmem::rfind;
+use super::imp;
use super::imp::raw;
use super::iter::Split;
use super::pattern::Encoded as EncodedPattern;
-use super::OsStrBytes;
-use super::OsStringBytes;
+use super::private;
use super::Pattern;
+if_checked_conversions! {
+ use super::EncodingError;
+ use super::Result;
+}
+
#[cfg(not(feature = "memchr"))]
fn find(string: &[u8], pat: &[u8]) -> Option<usize> {
- for i in 0..=string.len().checked_sub(pat.len())? {
- if string[i..].starts_with(pat) {
- return Some(i);
- }
- }
- None
+ (0..=string.len().checked_sub(pat.len())?)
+ .find(|&x| string[x..].starts_with(pat))
}
#[cfg(not(feature = "memchr"))]
fn rfind(string: &[u8], pat: &[u8]) -> Option<usize> {
- for i in (pat.len()..=string.len()).rev() {
- if string[..i].ends_with(pat) {
- return Some(i - pat.len());
- }
- }
- None
+ (pat.len()..=string.len())
+ .rfind(|&x| string[..x].ends_with(pat))
+ .map(|x| x - pat.len())
}
-macro_rules! impl_trim_matches {
- ( $self:ident , $pat:expr , $strip_method:ident ) => {{
- let pat = $pat.__encode();
- let pat = pat.__get();
- if pat.is_empty() {
- return $self;
- }
-
- let mut string = &$self.0;
- while let Some(substring) = string.$strip_method(pat) {
- string = substring;
- }
- Self::from_raw_bytes_unchecked(string)
- }};
+#[allow(clippy::missing_safety_doc)]
+unsafe trait TransmuteBox {
+ fn transmute_box<R>(self: Box<Self>) -> Box<R>
+ where
+ R: ?Sized + TransmuteBox,
+ {
+ let value = Box::into_raw(self);
+ // SAFETY: This trait is only implemented for types that can be
+ // transmuted.
+ unsafe { Box::from_raw(mem::transmute_copy(&value)) }
+ }
}
-macro_rules! impl_split_once_raw {
- ( $self:ident , $pat:expr , $find_fn:expr ) => {{
- let pat = $pat.__get();
-
- let index = $find_fn(&$self.0, pat)?;
- let prefix = &$self.0[..index];
- let suffix = &$self.0[index + pat.len()..];
- Some((
- Self::from_raw_bytes_unchecked(prefix),
- Self::from_raw_bytes_unchecked(suffix),
- ))
- }};
-}
+// SAFETY: This struct has a layout that makes this operation safe.
+unsafe impl TransmuteBox for RawOsStr {}
+unsafe impl TransmuteBox for [u8] {}
-/// A container for the byte strings converted by [`OsStrBytes`].
+/// A container for borrowed byte strings converted by this crate.
///
/// This wrapper is intended to prevent violating the invariants of the
/// [unspecified encoding] used by this crate and minimize encoding
/// conversions.
///
-/// Although this type is annotated with `#[repr(transparent)]`, the inner
-/// representation is not stable. Transmuting between this type and any other
-/// causes immediate undefined behavior.
-///
/// # Indices
///
/// Methods of this struct that accept indices require that the index lie on a
@@ -110,6 +92,12 @@ macro_rules! impl_split_once_raw {
/// these methods to instead run in linear time in the worst case (documented
/// for [`memchr::memmem::find`][memchr complexity]).
///
+/// # Safety
+///
+/// Although this type is annotated with `#[repr(transparent)]`, the inner
+/// representation is not stable. Transmuting between this type and any other
+/// causes immediate undefined behavior.
+///
/// [memchr complexity]: memchr::memmem::find#complexity
/// [unspecified encoding]: super#encoding
#[derive(Eq, Hash, Ord, PartialEq, PartialOrd)]
@@ -118,7 +106,7 @@ macro_rules! impl_split_once_raw {
pub struct RawOsStr([u8]);
impl RawOsStr {
- fn from_raw_bytes_unchecked(string: &[u8]) -> &Self {
+ const fn from_inner(string: &[u8]) -> &Self {
// SAFETY: This struct has a layout that makes this operation safe.
unsafe { mem::transmute(string) }
}
@@ -147,10 +135,8 @@ impl RawOsStr {
#[inline]
#[must_use]
pub fn new(string: &OsStr) -> Cow<'_, Self> {
- match string.to_raw_bytes() {
- Cow::Borrowed(string) => {
- Cow::Borrowed(Self::from_raw_bytes_unchecked(string))
- }
+ match imp::os_str_to_bytes(string) {
+ Cow::Borrowed(string) => Cow::Borrowed(Self::from_inner(string)),
Cow::Owned(string) => Cow::Owned(RawOsString(string)),
}
}
@@ -175,13 +161,15 @@ impl RawOsStr {
#[inline]
#[must_use]
pub fn from_str(string: &str) -> &Self {
- Self::from_raw_bytes_unchecked(string.as_bytes())
+ Self::from_inner(string.as_bytes())
}
- /// Returns the byte string stored by this container.
+ /// Wraps a byte string, without copying or encoding conversion.
+ ///
+ /// # Panics
///
- /// The result will match what would be returned by
- /// [`OsStrBytes::to_raw_bytes`] for the same string.
+ /// Panics if the string is not valid for the [unspecified encoding] used
+ /// by this crate.
///
/// # Examples
///
@@ -189,15 +177,116 @@ impl RawOsStr {
/// use std::env;
/// # use std::io;
///
- /// use os_str_bytes::OsStrBytes;
/// use os_str_bytes::RawOsStr;
///
/// let os_string = env::current_exe()?.into_os_string();
/// let raw = RawOsStr::new(&os_string);
- /// assert_eq!(os_string.to_raw_bytes(), raw.as_raw_bytes());
+ /// let raw_bytes = raw.as_raw_bytes();
+ /// assert_eq!(&*raw, RawOsStr::assert_from_raw_bytes(raw_bytes));
/// #
/// # Ok::<_, io::Error>(())
/// ```
+ ///
+ /// [unspecified encoding]: super#encoding
+ #[inline]
+ #[must_use = "method should not be used for validation"]
+ #[track_caller]
+ pub fn assert_from_raw_bytes(string: &[u8]) -> &Self {
+ expect_encoded!(raw::validate_bytes(string));
+
+ Self::from_inner(string)
+ }
+
+ if_checked_conversions! {
+ /// Wraps a byte string, without copying or encoding conversion.
+ ///
+ /// [`assert_from_raw_bytes`] should almost always be used instead. For
+ /// more information, see [`EncodingError`].
+ ///
+ /// # Errors
+ ///
+ /// See documentation for [`EncodingError`].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use std::env;
+ /// # use std::io;
+ ///
+ /// use os_str_bytes::RawOsStr;
+ ///
+ /// let os_string = env::current_exe()?.into_os_string();
+ /// let raw = RawOsStr::new(&os_string);
+ /// assert_eq!(Ok(&*raw), RawOsStr::from_raw_bytes(raw.as_raw_bytes()));
+ /// #
+ /// # Ok::<_, io::Error>(())
+ /// ```
+ ///
+ /// [`assert_from_raw_bytes`]: Self::assert_from_raw_bytes
+ #[cfg_attr(
+ os_str_bytes_docs_rs,
+ doc(cfg(feature = "checked_conversions"))
+ )]
+ #[inline]
+ pub fn from_raw_bytes(string: &[u8]) -> Result<&Self> {
+ raw::validate_bytes(string)
+ .map(|()| Self::from_inner(string))
+ .map_err(EncodingError)
+ }
+ }
+
+ /// Wraps a byte string, without copying or encoding conversion.
+ ///
+ /// # Safety
+ ///
+ /// The string must be valid for the [unspecified encoding] used by this
+ /// crate.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use std::env;
+ /// # use std::io;
+ ///
+ /// use os_str_bytes::RawOsStr;
+ ///
+ /// let os_string = env::current_exe()?.into_os_string();
+ /// let raw = RawOsStr::new(&os_string);
+ /// let raw_bytes = raw.as_raw_bytes();
+ /// assert_eq!(&*raw, unsafe {
+ /// RawOsStr::from_raw_bytes_unchecked(raw_bytes)
+ /// });
+ /// #
+ /// # Ok::<_, io::Error>(())
+ /// ```
+ ///
+ /// [unspecified encoding]: super#encoding
+ #[inline]
+ #[must_use]
+ #[track_caller]
+ pub unsafe fn from_raw_bytes_unchecked(string: &[u8]) -> &Self {
+ if cfg!(debug_assertions) {
+ expect_encoded!(raw::validate_bytes(string));
+ }
+
+ Self::from_inner(string)
+ }
+
+ /// Returns the byte string stored by this container.
+ ///
+ /// The returned string will use an [unspecified encoding].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use os_str_bytes::RawOsStr;
+ ///
+ /// let string = "foobar";
+ /// let raw = RawOsStr::from_str(string);
+ /// assert_eq!(string.as_bytes(), raw.as_raw_bytes());
+ /// ```
+ ///
+ /// [unspecified encoding]: super#encoding
#[inline]
#[must_use]
pub fn as_raw_bytes(&self) -> &[u8] {
@@ -206,10 +295,6 @@ impl RawOsStr {
/// Equivalent to [`str::contains`].
///
- /// # Panics
- ///
- /// Panics if the pattern is a byte outside of the ASCII range.
- ///
/// # Examples
///
/// ```
@@ -230,10 +315,6 @@ impl RawOsStr {
/// Equivalent to [`str::ends_with`].
///
- /// # Panics
- ///
- /// Panics if the pattern is a byte outside of the ASCII range.
- ///
/// # Examples
///
/// ```
@@ -257,10 +338,6 @@ impl RawOsStr {
/// Equivalent to [`str::ends_with`] but accepts this type for the pattern.
///
- /// # Panics
- ///
- /// Panics if the pattern is a byte outside of the ASCII range.
- ///
/// # Examples
///
/// ```
@@ -278,10 +355,6 @@ impl RawOsStr {
/// Equivalent to [`str::find`].
///
- /// # Panics
- ///
- /// Panics if the pattern is a byte outside of the ASCII range.
- ///
/// # Examples
///
/// ```
@@ -347,10 +420,6 @@ impl RawOsStr {
/// Equivalent to [`str::rfind`].
///
- /// # Panics
- ///
- /// Panics if the pattern is a byte outside of the ASCII range.
- ///
/// # Examples
///
/// ```
@@ -372,19 +441,32 @@ impl RawOsStr {
rfind(&self.0, pat)
}
+ fn split_once_raw_with<P, F>(
+ &self,
+ pat: &P,
+ find_fn: F,
+ ) -> Option<(&Self, &Self)>
+ where
+ F: FnOnce(&[u8], &[u8]) -> Option<usize>,
+ P: EncodedPattern,
+ {
+ let pat = pat.__get();
+
+ let index = find_fn(&self.0, pat)?;
+ let prefix = &self.0[..index];
+ let suffix = &self.0[index + pat.len()..];
+ Some((Self::from_inner(prefix), Self::from_inner(suffix)))
+ }
+
pub(super) fn rsplit_once_raw<P>(&self, pat: &P) -> Option<(&Self, &Self)>
where
P: EncodedPattern,
{
- impl_split_once_raw!(self, pat, rfind)
+ self.split_once_raw_with(pat, rfind)
}
/// Equivalent to [`str::rsplit_once`].
///
- /// # Panics
- ///
- /// Panics if the pattern is a byte outside of the ASCII range.
- ///
/// # Examples
///
/// ```
@@ -413,19 +495,18 @@ impl RawOsStr {
fn index_boundary_error(&self, index: usize) -> ! {
debug_assert!(raw::is_continuation(self.0[index]));
- let start = self.0[..index]
+ let start = expect_encoded!(self.0[..index]
.iter()
- .rposition(|&x| !raw::is_continuation(x))
- .expect("invalid raw bytes");
+ .rposition(|&x| !raw::is_continuation(x)));
let mut end = index + 1;
end += self.0[end..]
.iter()
- .position(|&x| !raw::is_continuation(x))
- .unwrap_or_else(|| self.raw_len() - end);
+ .take_while(|&&x| raw::is_continuation(x))
+ .count();
let code_point = raw::decode_code_point(&self.0[start..end]);
panic!(
"byte index {} is not a valid boundary; it is inside U+{:04X} \
- (bytes {}..{})",
+ (bytes {}..{})",
index, code_point, start, end,
);
}
@@ -443,7 +524,7 @@ impl RawOsStr {
///
/// # Panics
///
- /// Panics if the pattern is a byte outside of the ASCII range or empty.
+ /// Panics if the pattern is empty.
///
/// # Examples
///
@@ -455,6 +536,7 @@ impl RawOsStr {
/// ```
#[inline]
#[must_use]
+ #[track_caller]
pub fn split<P>(&self, pat: P) -> Split<'_, P>
where
P: Pattern,
@@ -483,29 +565,23 @@ impl RawOsStr {
/// [valid boundary]: #indices
#[inline]
#[must_use]
+ #[track_caller]
pub fn split_at(&self, mid: usize) -> (&Self, &Self) {
self.check_bound(mid);
let (prefix, suffix) = self.0.split_at(mid);
- (
- Self::from_raw_bytes_unchecked(prefix),
- Self::from_raw_bytes_unchecked(suffix),
- )
+ (Self::from_inner(prefix), Self::from_inner(suffix))
}
pub(super) fn split_once_raw<P>(&self, pat: &P) -> Option<(&Self, &Self)>
where
P: EncodedPattern,
{
- impl_split_once_raw!(self, pat, find)
+ self.split_once_raw_with(pat, find)
}
/// Equivalent to [`str::split_once`].
///
- /// # Panics
- ///
- /// Panics if the pattern is a byte outside of the ASCII range.
- ///
/// # Examples
///
/// ```
@@ -529,10 +605,6 @@ impl RawOsStr {
/// Equivalent to [`str::starts_with`].
///
- /// # Panics
- ///
- /// Panics if the pattern is a byte outside of the ASCII range.
- ///
/// # Examples
///
/// ```
@@ -557,10 +629,6 @@ impl RawOsStr {
/// Equivalent to [`str::starts_with`] but accepts this type for the
/// pattern.
///
- /// # Panics
- ///
- /// Panics if the pattern is a byte outside of the ASCII range.
- ///
/// # Examples
///
/// ```
@@ -578,10 +646,6 @@ impl RawOsStr {
/// Equivalent to [`str::strip_prefix`].
///
- /// # Panics
- ///
- /// Panics if the pattern is a byte outside of the ASCII range.
- ///
/// # Examples
///
/// ```
@@ -603,15 +667,11 @@ impl RawOsStr {
let pat = pat.__encode();
let pat = pat.__get();
- self.0.strip_prefix(pat).map(Self::from_raw_bytes_unchecked)
+ self.0.strip_prefix(pat).map(Self::from_inner)
}
/// Equivalent to [`str::strip_suffix`].
///
- /// # Panics
- ///
- /// Panics if the pattern is a byte outside of the ASCII range.
- ///
/// # Examples
///
/// ```
@@ -633,11 +693,14 @@ impl RawOsStr {
let pat = pat.__encode();
let pat = pat.__get();
- self.0.strip_suffix(pat).map(Self::from_raw_bytes_unchecked)
+ self.0.strip_suffix(pat).map(Self::from_inner)
}
/// Converts this representation back to a platform-native string.
///
+ /// When possible, use [`RawOsStrCow::into_os_str`] for a more efficient
+ /// conversion on some platforms.
+ ///
/// # Examples
///
/// ```
@@ -655,7 +718,7 @@ impl RawOsStr {
#[inline]
#[must_use]
pub fn to_os_str(&self) -> Cow<'_, OsStr> {
- OsStr::from_raw_bytes(&self.0).expect("invalid raw bytes")
+ expect_encoded!(imp::os_str_from_bytes(&self.0))
}
/// Equivalent to [`OsStr::to_str`].
@@ -704,12 +767,32 @@ impl RawOsStr {
String::from_utf8_lossy(&self.0)
}
+ fn trim_matches_raw_with<P, F>(&self, pat: &P, strip_fn: F) -> &Self
+ where
+ F: for<'a> Fn(&'a [u8], &[u8]) -> Option<&'a [u8]>,
+ P: EncodedPattern,
+ {
+ let pat = pat.__get();
+ if pat.is_empty() {
+ return self;
+ }
+
+ let mut string = &self.0;
+ while let Some(substring) = strip_fn(string, pat) {
+ string = substring;
+ }
+ Self::from_inner(string)
+ }
+
+ fn trim_end_matches_raw<P>(&self, pat: &P) -> &Self
+ where
+ P: EncodedPattern,
+ {
+ self.trim_matches_raw_with(pat, <[_]>::strip_suffix)
+ }
+
/// Equivalent to [`str::trim_end_matches`].
///
- /// # Panics
- ///
- /// Panics if the pattern is a byte outside of the ASCII range.
- ///
/// # Examples
///
/// ```
@@ -719,19 +802,44 @@ impl RawOsStr {
/// assert_eq!("111foo1bar", raw.trim_end_matches("1"));
/// assert_eq!("111foo1bar111", raw.trim_end_matches("o"));
/// ```
+ #[inline]
#[must_use]
pub fn trim_end_matches<P>(&self, pat: P) -> &Self
where
P: Pattern,
{
- impl_trim_matches!(self, pat, strip_suffix)
+ self.trim_end_matches_raw(&pat.__encode())
}
- /// Equivalent to [`str::trim_start_matches`].
+ /// Equivalent to [`str::trim_matches`].
///
- /// # Panics
+ /// # Examples
///
- /// Panics if the pattern is a byte outside of the ASCII range.
+ /// ```
+ /// use os_str_bytes::RawOsStr;
+ ///
+ /// let raw = RawOsStr::from_str("111foo1bar111");
+ /// assert_eq!("foo1bar", raw.trim_matches("1"));
+ /// assert_eq!("111foo1bar111", raw.trim_matches("o"));
+ /// ```
+ #[inline]
+ #[must_use]
+ pub fn trim_matches<P>(&self, pat: P) -> &Self
+ where
+ P: Pattern,
+ {
+ let pat = pat.__encode();
+ self.trim_start_matches_raw(&pat).trim_end_matches_raw(&pat)
+ }
+
+ fn trim_start_matches_raw<P>(&self, pat: &P) -> &Self
+ where
+ P: EncodedPattern,
+ {
+ self.trim_matches_raw_with(pat, <[_]>::strip_prefix)
+ }
+
+ /// Equivalent to [`str::trim_start_matches`].
///
/// # Examples
///
@@ -742,12 +850,13 @@ impl RawOsStr {
/// assert_eq!("foo1bar111", raw.trim_start_matches("1"));
/// assert_eq!("111foo1bar111", raw.trim_start_matches("o"));
/// ```
+ #[inline]
#[must_use]
pub fn trim_start_matches<P>(&self, pat: P) -> &Self
where
P: Pattern,
{
- impl_trim_matches!(self, pat, strip_prefix)
+ self.trim_start_matches_raw(&pat.__encode())
}
}
@@ -781,40 +890,17 @@ impl Default for &RawOsStr {
impl<'a> From<&'a RawOsStr> for Cow<'a, RawOsStr> {
#[inline]
- fn from(other: &'a RawOsStr) -> Self {
- Cow::Borrowed(other)
+ fn from(value: &'a RawOsStr) -> Self {
+ Cow::Borrowed(value)
}
}
-macro_rules! r#impl {
- (
- $index_type:ty
- $(, $index_var:ident , $first_bound:expr $(, $second_bound:expr)?)?
- ) => {
- impl Index<$index_type> for RawOsStr {
- type Output = Self;
-
- #[inline]
- fn index(&self, idx: $index_type) -> &Self::Output {
- $(
- let $index_var = &idx;
- self.check_bound($first_bound);
- $(self.check_bound($second_bound);)?
- )?
-
- Self::from_raw_bytes_unchecked(&self.0[idx])
- }
- }
- };
+impl From<Box<str>> for Box<RawOsStr> {
+ #[inline]
+ fn from(value: Box<str>) -> Self {
+ value.into_boxed_bytes().transmute_box()
+ }
}
-r#impl!(Range<usize>, x, x.start, x.end);
-r#impl!(RangeFrom<usize>, x, x.start);
-r#impl!(RangeFull);
-// [usize::MAX] will always be a valid inclusive end index.
-#[rustfmt::skip]
-r#impl!(RangeInclusive<usize>, x, *x.start(), x.end().wrapping_add(1));
-r#impl!(RangeTo<usize>, x, x.end);
-r#impl!(RangeToInclusive<usize>, x, x.end.wrapping_add(1));
impl ToOwned for RawOsStr {
type Owned = RawOsString;
@@ -825,11 +911,74 @@ impl ToOwned for RawOsStr {
}
}
-/// A container for the byte strings converted by [`OsStringBytes`].
+/// Extensions to [`Cow<RawOsStr>`] for additional conversions.
///
-/// For more information, see [`RawOsStr`].
+/// [`Cow<RawOsStr>`]: Cow
+#[cfg_attr(os_str_bytes_docs_rs, doc(cfg(feature = "raw_os_str")))]
+pub trait RawOsStrCow<'a>: private::Sealed {
+ /// Converts this representation back to a platform-native string.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use std::env;
+ /// # use std::io;
+ ///
+ /// use os_str_bytes::RawOsStr;
+ /// use os_str_bytes::RawOsStrCow;
+ ///
+ /// let os_string = env::current_exe()?.into_os_string();
+ /// let raw = RawOsStr::new(&os_string);
+ /// assert_eq!(os_string, raw.into_os_str());
+ /// #
+ /// # Ok::<_, io::Error>(())
+ /// ```
+ #[must_use]
+ fn into_os_str(self) -> Cow<'a, OsStr>;
+
+ /// Returns the byte string stored by this container.
+ ///
+ /// The returned string will use an [unspecified encoding].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use std::borrow::Cow;
+ ///
+ /// use os_str_bytes::RawOsStr;
+ /// use os_str_bytes::RawOsStrCow;
+ ///
+ /// let string = "foobar";
+ /// let raw = Cow::Borrowed(RawOsStr::from_str(string));
+ /// assert_eq!(string.as_bytes(), &*raw.into_raw_bytes());
+ /// ```
+ ///
+ /// [unspecified encoding]: super#encoding
+ #[must_use]
+ fn into_raw_bytes(self) -> Cow<'a, [u8]>;
+}
+
+impl<'a> RawOsStrCow<'a> for Cow<'a, RawOsStr> {
+ #[inline]
+ fn into_os_str(self) -> Cow<'a, OsStr> {
+ match self {
+ Cow::Borrowed(string) => string.to_os_str(),
+ Cow::Owned(string) => Cow::Owned(string.into_os_string()),
+ }
+ }
+
+ #[inline]
+ fn into_raw_bytes(self) -> Cow<'a, [u8]> {
+ match self {
+ Cow::Borrowed(string) => Cow::Borrowed(&string.0),
+ Cow::Owned(string) => Cow::Owned(string.0),
+ }
+ }
+}
+
+/// A container for owned byte strings converted by this crate.
///
-/// [unspecified encoding]: super#encoding
+/// For more information, see [`RawOsStr`].
#[derive(Clone, Default, Eq, Hash, Ord, PartialEq, PartialOrd)]
#[cfg_attr(os_str_bytes_docs_rs, doc(cfg(feature = "raw_os_str")))]
pub struct RawOsString(Vec<u8>);
@@ -856,7 +1005,7 @@ impl RawOsString {
#[inline]
#[must_use]
pub fn new(string: OsString) -> Self {
- Self(string.into_raw_vec())
+ Self(imp::os_string_into_vec(string))
}
/// Wraps a string, without copying or encoding conversion.
@@ -881,7 +1030,12 @@ impl RawOsString {
Self(string.into_bytes())
}
- /// Converts this representation back to a platform-native string.
+ /// Wraps a byte string, without copying or encoding conversion.
+ ///
+ /// # Panics
+ ///
+ /// Panics if the string is not valid for the [unspecified encoding] used
+ /// by this crate.
///
/// # Examples
///
@@ -892,21 +1046,139 @@ impl RawOsString {
/// use os_str_bytes::RawOsString;
///
/// let os_string = env::current_exe()?.into_os_string();
- /// let raw = RawOsString::new(os_string.clone());
- /// assert_eq!(os_string, raw.into_os_string());
+ /// let raw = RawOsString::new(os_string);
+ /// let raw_bytes = raw.clone().into_raw_vec();
+ /// assert_eq!(raw, RawOsString::assert_from_raw_vec(raw_bytes));
/// #
/// # Ok::<_, io::Error>(())
/// ```
+ ///
+ /// [unspecified encoding]: super#encoding
+ #[inline]
+ #[must_use = "method should not be used for validation"]
+ #[track_caller]
+ pub fn assert_from_raw_vec(string: Vec<u8>) -> Self {
+ expect_encoded!(raw::validate_bytes(&string));
+
+ Self(string)
+ }
+
+ if_checked_conversions! {
+ /// Wraps a byte string, without copying or encoding conversion.
+ ///
+ /// [`assert_from_raw_vec`] should almost always be used instead. For
+ /// more information, see [`EncodingError`].
+ ///
+ /// # Errors
+ ///
+ /// See documentation for [`EncodingError`].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use std::env;
+ /// # use std::io;
+ ///
+ /// use os_str_bytes::RawOsString;
+ ///
+ /// let os_string = env::current_exe()?.into_os_string();
+ /// let raw = RawOsString::new(os_string);
+ /// let raw_clone = raw.clone();
+ /// assert_eq!(Ok(raw), RawOsString::from_raw_vec(raw_clone.into_raw_vec()));
+ /// #
+ /// # Ok::<_, io::Error>(())
+ /// ```
+ ///
+ /// [`assert_from_raw_vec`]: Self::assert_from_raw_vec
+ #[cfg_attr(
+ os_str_bytes_docs_rs,
+ doc(cfg(feature = "checked_conversions"))
+ )]
+ #[inline]
+ pub fn from_raw_vec(string: Vec<u8>) -> Result<Self> {
+ raw::validate_bytes(&string)
+ .map(|()| Self(string))
+ .map_err(EncodingError)
+ }
+ }
+
+ /// Wraps a byte string, without copying or encoding conversion.
+ ///
+ /// # Safety
+ ///
+ /// The string must be valid for the [unspecified encoding] used by this
+ /// crate.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use std::env;
+ /// # use std::io;
+ ///
+ /// use os_str_bytes::RawOsString;
+ ///
+ /// let os_string = env::current_exe()?.into_os_string();
+ /// let raw = RawOsString::new(os_string);
+ /// let raw_bytes = raw.clone().into_raw_vec();
+ /// assert_eq!(raw, unsafe {
+ /// RawOsString::from_raw_vec_unchecked(raw_bytes)
+ /// });
+ /// #
+ /// # Ok::<_, io::Error>(())
+ /// ```
+ ///
+ /// [unspecified encoding]: super#encoding
#[inline]
#[must_use]
- pub fn into_os_string(self) -> OsString {
- OsString::from_raw_vec(self.0).expect("invalid raw bytes")
+ #[track_caller]
+ pub unsafe fn from_raw_vec_unchecked(string: Vec<u8>) -> Self {
+ if cfg!(debug_assertions) {
+ expect_encoded!(raw::validate_bytes(&string));
+ }
+
+ Self(string)
}
- /// Returns the byte string stored by this container.
+ /// Equivalent to [`String::clear`].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use std::env;
+ /// # use std::io;
///
- /// The result will match what would be returned by
- /// [`OsStringBytes::into_raw_vec`] for the same string.
+ /// use os_str_bytes::RawOsString;
+ ///
+ /// let os_string = env::current_exe()?.into_os_string();
+ /// let mut raw = RawOsString::new(os_string);
+ /// raw.clear();
+ /// assert!(raw.is_empty());
+ /// #
+ /// # Ok::<_, io::Error>(())
+ /// ```
+ #[inline]
+ pub fn clear(&mut self) {
+ self.0.clear();
+ }
+
+ /// Equivalent to [`String::into_boxed_str`].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use os_str_bytes::RawOsString;
+ ///
+ /// let string = "foobar".to_owned();
+ /// let raw = RawOsString::from_string(string.clone());
+ /// assert_eq!(string, *raw.into_box());
+ /// ```
+ #[inline]
+ #[must_use]
+ pub fn into_box(self) -> Box<RawOsStr> {
+ self.0.into_boxed_slice().transmute_box()
+ }
+
+ /// Converts this representation back to a platform-native string.
///
/// # Examples
///
@@ -914,17 +1186,37 @@ impl RawOsString {
/// use std::env;
/// # use std::io;
///
- /// use os_str_bytes::OsStringBytes;
/// use os_str_bytes::RawOsString;
///
/// let os_string = env::current_exe()?.into_os_string();
/// let raw = RawOsString::new(os_string.clone());
- /// assert_eq!(os_string.into_raw_vec(), raw.into_raw_vec());
+ /// assert_eq!(os_string, raw.into_os_string());
/// #
/// # Ok::<_, io::Error>(())
/// ```
#[inline]
#[must_use]
+ pub fn into_os_string(self) -> OsString {
+ expect_encoded!(imp::os_string_from_vec(self.0))
+ }
+
+ /// Returns the byte string stored by this container.
+ ///
+ /// The returned string will use an [unspecified encoding].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use os_str_bytes::RawOsString;
+ ///
+ /// let string = "foobar".to_owned();
+ /// let raw = RawOsString::from_string(string.clone());
+ /// assert_eq!(string.into_bytes(), raw.into_raw_vec());
+ /// ```
+ ///
+ /// [unspecified encoding]: super#encoding
+ #[inline]
+ #[must_use]
pub fn into_raw_vec(self) -> Vec<u8> {
self.0
}
@@ -941,9 +1233,77 @@ impl RawOsString {
/// assert_eq!(Ok(string), raw.into_string());
/// ```
#[inline]
- pub fn into_string(self) -> Result<String, Self> {
+ pub fn into_string(self) -> result::Result<String, Self> {
String::from_utf8(self.0).map_err(|x| Self(x.into_bytes()))
}
+
+ /// Equivalent to [`String::shrink_to_fit`].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use os_str_bytes::RawOsString;
+ ///
+ /// let string = "foobar".to_owned();
+ /// let mut raw = RawOsString::from_string(string.clone());
+ /// raw.shrink_to_fit();
+ /// assert_eq!(string, raw);
+ /// ```
+ #[inline]
+ pub fn shrink_to_fit(&mut self) {
+ self.0.shrink_to_fit();
+ }
+
+ /// Equivalent to [`String::split_off`].
+ ///
+ /// # Panics
+ ///
+ /// Panics if the index is not a [valid boundary].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use os_str_bytes::RawOsString;
+ ///
+ /// let mut raw = RawOsString::from_string("foobar".to_owned());
+ /// assert_eq!("bar", raw.split_off(3));
+ /// assert_eq!("foo", raw);
+ /// ```
+ ///
+ /// [valid boundary]: RawOsStr#indices
+ #[inline]
+ #[must_use]
+ #[track_caller]
+ pub fn split_off(&mut self, at: usize) -> Self {
+ self.check_bound(at);
+
+ Self(self.0.split_off(at))
+ }
+
+ /// Equivalent to [`String::truncate`].
+ ///
+ /// # Panics
+ ///
+ /// Panics if the index is not a [valid boundary].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use os_str_bytes::RawOsString;
+ ///
+ /// let mut raw = RawOsString::from_string("foobar".to_owned());
+ /// raw.truncate(3);
+ /// assert_eq!("foo", raw);
+ /// ```
+ ///
+ /// [valid boundary]: RawOsStr#indices
+ #[inline]
+ #[track_caller]
+ pub fn truncate(&mut self, new_len: usize) {
+ self.check_bound(new_len);
+
+ self.0.truncate(new_len);
+ }
}
impl AsRef<RawOsStr> for RawOsString {
@@ -965,46 +1325,41 @@ impl Deref for RawOsString {
#[inline]
fn deref(&self) -> &Self::Target {
- RawOsStr::from_raw_bytes_unchecked(&self.0)
+ RawOsStr::from_inner(&self.0)
}
}
-impl From<String> for RawOsString {
+impl From<RawOsString> for Box<RawOsStr> {
#[inline]
- fn from(other: String) -> Self {
- Self::from_string(other)
+ fn from(value: RawOsString) -> Self {
+ value.into_box()
}
}
-impl From<RawOsString> for Cow<'_, RawOsStr> {
+impl From<Box<RawOsStr>> for RawOsString {
#[inline]
- fn from(other: RawOsString) -> Self {
- Cow::Owned(other)
+ fn from(value: Box<RawOsStr>) -> Self {
+ Self(value.transmute_box::<[_]>().into_vec())
}
}
-macro_rules! r#impl {
- ( $index_type:ty ) => {
- impl Index<$index_type> for RawOsString {
- type Output = RawOsStr;
+impl From<RawOsString> for Cow<'_, RawOsStr> {
+ #[inline]
+ fn from(value: RawOsString) -> Self {
+ Cow::Owned(value)
+ }
+}
- #[inline]
- fn index(&self, idx: $index_type) -> &Self::Output {
- &(**self)[idx]
- }
- }
- };
+impl From<String> for RawOsString {
+ #[inline]
+ fn from(value: String) -> Self {
+ Self::from_string(value)
+ }
}
-r#impl!(Range<usize>);
-r#impl!(RangeFrom<usize>);
-r#impl!(RangeFull);
-r#impl!(RangeInclusive<usize>);
-r#impl!(RangeTo<usize>);
-r#impl!(RangeToInclusive<usize>);
-struct Buffer<'a>(&'a [u8]);
+struct DebugBuffer<'a>(&'a [u8]);
-impl Debug for Buffer<'_> {
+impl Debug for DebugBuffer<'_> {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
f.write_str("\"")?;
@@ -1050,7 +1405,7 @@ macro_rules! r#impl {
#[inline]
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
f.debug_tuple(stringify!($type))
- .field(&Buffer(&self.0))
+ .field(&DebugBuffer(&self.0))
.finish()
}
}
@@ -1060,6 +1415,42 @@ r#impl!(RawOsStr);
r#impl!(RawOsString);
macro_rules! r#impl {
+ ( $index_type:ty $(, $index_var:ident , $($bound:expr),+)? ) => {
+ impl Index<$index_type> for RawOsStr {
+ type Output = Self;
+
+ #[inline]
+ fn index(&self, idx: $index_type) -> &Self::Output {
+ $(
+ let $index_var = &idx;
+ $(self.check_bound($bound);)+
+ )?
+
+ Self::from_inner(&self.0[idx])
+ }
+ }
+
+ impl Index<$index_type> for RawOsString {
+ type Output = RawOsStr;
+
+ #[allow(clippy::indexing_slicing)]
+ #[inline]
+ fn index(&self, idx: $index_type) -> &Self::Output {
+ &(**self)[idx]
+ }
+ }
+ };
+}
+r#impl!(Range<usize>, x, x.start, x.end);
+r#impl!(RangeFrom<usize>, x, x.start);
+r#impl!(RangeFull);
+// [usize::MAX] will always be a valid inclusive end index.
+#[rustfmt::skip]
+r#impl!(RangeInclusive<usize>, x, *x.start(), x.end().wrapping_add(1));
+r#impl!(RangeTo<usize>, x, x.end);
+r#impl!(RangeToInclusive<usize>, x, x.end.wrapping_add(1));
+
+macro_rules! r#impl {
( $type:ty , $other_type:ty ) => {
impl PartialEq<$other_type> for $type {
#[inline]
diff --git a/src/util.rs b/src/util.rs
index bd28b7b..f931969 100644
--- a/src/util.rs
+++ b/src/util.rs
@@ -4,7 +4,6 @@ pub(super) const CONT_MASK: u8 = (1 << BYTE_SHIFT) - 1;
pub(super) const CONT_TAG: u8 = 0b1000_0000;
-#[cfg_attr(not(windows), allow(dead_code))]
pub(super) const fn is_continuation(byte: u8) -> bool {
byte & !CONT_MASK == CONT_TAG
}
diff --git a/src/wasm32/mod.rs b/src/wasm/mod.rs
index f8ae368..a8a2996 100644
--- a/src/wasm32/mod.rs
+++ b/src/wasm/mod.rs
@@ -13,12 +13,12 @@ if_raw_str! {
pub(super) mod raw;
}
-#[derive(Debug, Eq, PartialEq)]
+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub(super) struct EncodingError(Utf8Error);
impl Display for EncodingError {
- fn fmt(&self, formatter: &mut Formatter<'_>) -> fmt::Result {
- write!(formatter, "os_str_bytes: {}", self.0)
+ fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
+ write!(f, "os_str_bytes: {}", self.0)
}
}
@@ -30,15 +30,17 @@ macro_rules! expect_utf8 {
( $result:expr ) => {
$result.expect(
"platform string contains invalid UTF-8, which should not be \
- possible",
+ possible",
)
};
}
+fn from_bytes(string: &[u8]) -> Result<&str> {
+ str::from_utf8(string).map_err(EncodingError)
+}
+
pub(super) fn os_str_from_bytes(string: &[u8]) -> Result<Cow<'_, OsStr>> {
- str::from_utf8(string)
- .map(|x| Cow::Borrowed(OsStr::new(x)))
- .map_err(EncodingError)
+ from_bytes(string).map(|x| Cow::Borrowed(OsStr::new(x)))
}
pub(super) fn os_str_to_bytes(os_string: &OsStr) -> Cow<'_, [u8]> {
diff --git a/src/wasm32/raw.rs b/src/wasm/raw.rs
index 5645900..fb291a6 100644
--- a/src/wasm32/raw.rs
+++ b/src/wasm/raw.rs
@@ -4,8 +4,22 @@ use std::str;
pub(crate) use crate::util::is_continuation;
+use super::Result;
+
+#[allow(dead_code)]
+#[path = "../common/raw.rs"]
+mod common_raw;
+pub(crate) use common_raw::ends_with;
+pub(crate) use common_raw::starts_with;
+#[cfg(feature = "uniquote")]
+pub(crate) use common_raw::uniquote;
+
+pub(crate) fn validate_bytes(string: &[u8]) -> Result<()> {
+ super::from_bytes(string).map(drop)
+}
+
pub(crate) fn decode_code_point(string: &[u8]) -> u32 {
- let string = str::from_utf8(string).expect("invalid string");
+ let string = expect_encoded!(str::from_utf8(string));
let mut chars = string.chars();
let ch = chars
.next()
@@ -14,26 +28,7 @@ pub(crate) fn decode_code_point(string: &[u8]) -> u32 {
ch.into()
}
-pub(crate) fn ends_with(string: &[u8], suffix: &[u8]) -> bool {
- string.ends_with(suffix)
-}
-
-pub(crate) fn starts_with(string: &[u8], prefix: &[u8]) -> bool {
- string.starts_with(prefix)
-}
-
pub(crate) fn debug(string: &[u8], _: &mut Formatter<'_>) -> fmt::Result {
assert!(string.is_empty());
Ok(())
}
-
-#[cfg(feature = "uniquote")]
-pub(crate) mod uniquote {
- use uniquote::Formatter;
- use uniquote::Quote;
- use uniquote::Result;
-
- pub(crate) fn escape(string: &[u8], f: &mut Formatter<'_>) -> Result {
- string.escape(f)
- }
-}
diff --git a/src/windows/mod.rs b/src/windows/mod.rs
index 3b6105b..ed9e60b 100644
--- a/src/windows/mod.rs
+++ b/src/windows/mod.rs
@@ -9,6 +9,7 @@ use std::ffi::OsString;
use std::fmt;
use std::fmt::Display;
use std::fmt::Formatter;
+use std::ops::Not;
use std::os::windows::ffi::OsStrExt;
use std::os::windows::ffi::OsStringExt;
use std::result;
@@ -19,10 +20,12 @@ if_raw_str! {
}
mod wtf8;
-use wtf8::encode_wide;
use wtf8::DecodeWide;
-#[derive(Debug, Eq, PartialEq)]
+#[cfg(test)]
+mod tests;
+
+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub(super) enum EncodingError {
Byte(u8),
CodePoint(u32),
@@ -42,11 +45,11 @@ impl EncodingError {
}
impl Display for EncodingError {
- fn fmt(&self, formatter: &mut Formatter<'_>) -> fmt::Result {
+ fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
write!(
- formatter,
+ f,
"byte sequence is not representable in the platform encoding; \
- error at {}",
+ error at {}",
self.position(),
)
}
@@ -56,16 +59,21 @@ impl Error for EncodingError {}
type Result<T> = result::Result<T, EncodingError>;
-fn from_bytes(string: &[u8]) -> Result<OsString> {
- let encoder = encode_wide(string);
+fn from_bytes(string: &[u8]) -> Result<Option<OsString>> {
+ let mut encoder = wtf8::encode_wide(string);
// Collecting an iterator into a result ignores the size hint:
// https://github.com/rust-lang/rust/issues/48994
let mut encoded_string = Vec::with_capacity(encoder.size_hint().0);
- for wchar in encoder {
+ for wchar in &mut encoder {
encoded_string.push(wchar?);
}
- Ok(OsStringExt::from_wide(&encoded_string))
+
+ debug_assert_eq!(str::from_utf8(string).is_ok(), encoder.is_still_utf8());
+ Ok(encoder
+ .is_still_utf8()
+ .not()
+ .then(|| OsStringExt::from_wide(&encoded_string)))
}
fn to_bytes(os_string: &OsStr) -> Vec<u8> {
@@ -77,7 +85,14 @@ fn to_bytes(os_string: &OsStr) -> Vec<u8> {
}
pub(super) fn os_str_from_bytes(string: &[u8]) -> Result<Cow<'_, OsStr>> {
- from_bytes(string).map(Cow::Owned)
+ from_bytes(string).map(|os_string| {
+ os_string.map(Cow::Owned).unwrap_or_else(|| {
+ // SAFETY: This slice was validated to be UTF-8.
+ Cow::Borrowed(OsStr::new(unsafe {
+ str::from_utf8_unchecked(string)
+ }))
+ })
+ })
}
pub(super) fn os_str_to_bytes(os_string: &OsStr) -> Cow<'_, [u8]> {
@@ -85,68 +100,14 @@ pub(super) fn os_str_to_bytes(os_string: &OsStr) -> Cow<'_, [u8]> {
}
pub(super) fn os_string_from_vec(string: Vec<u8>) -> Result<OsString> {
- from_bytes(&string)
+ from_bytes(&string).map(|os_string| {
+ os_string.unwrap_or_else(|| {
+ // SAFETY: This slice was validated to be UTF-8.
+ unsafe { String::from_utf8_unchecked(string) }.into()
+ })
+ })
}
pub(super) fn os_string_into_vec(os_string: OsString) -> Vec<u8> {
to_bytes(&os_string)
}
-
-#[cfg(test)]
-mod tests {
- use std::ffi::OsStr;
-
- use crate::OsStrBytes;
-
- use super::EncodingError;
-
- #[test]
- fn test_invalid() {
- use EncodingError::Byte;
- use EncodingError::CodePoint;
- use EncodingError::End;
-
- test_error(Byte(b'\x83'), b"\x0C\x83\xD7\x3E");
- test_error(Byte(b'\x52'), b"\x19\xF7\x52\x84");
- test_error(Byte(b'\xB8'), b"\x70\xB8\x1F\x66");
- test_error(CodePoint(0x34_0388), b"\x70\xFD\x80\x8E\x88");
- test_error(Byte(b'\x80'), b"\x80");
- test_error(Byte(b'\x80'), b"\x80\x80");
- test_error(Byte(b'\x80'), b"\x80\x80\x80");
- test_error(Byte(b'\x81'), b"\x81");
- test_error(Byte(b'\x88'), b"\x88\xB4\xC7\x46");
- test_error(Byte(b'\x97'), b"\x97\xCE\x06");
- test_error(Byte(b'\x00'), b"\xC2\x00");
- test_error(Byte(b'\x7F'), b"\xC2\x7F");
- test_error(Byte(b'\x09'), b"\xCD\x09\x95");
- test_error(Byte(b'\x43'), b"\xCD\x43\x5F\xA0");
- test_error(Byte(b'\x69'), b"\xD7\x69\xB2");
- test_error(CodePoint(0x528), b"\xE0\x94\xA8");
- test_error(CodePoint(0x766), b"\xE0\x9D\xA6\x12\xAE");
- test_error(Byte(b'\xFD'), b"\xE2\xAB\xFD\x51");
- test_error(Byte(b'\xC4'), b"\xE3\xC4");
- test_error(CodePoint(0xDC00), b"\xED\xA0\x80\xED\xB0\x80");
- test_error(End(), b"\xF1");
- test_error(End(), b"\xF1\x80");
- test_error(End(), b"\xF1\x80\x80");
- test_error(Byte(b'\xF1'), b"\xF1\x80\x80\xF1");
- test_error(CodePoint(0x11_09CC), b"\xF4\x90\xA7\x8C");
- test_error(CodePoint(0x15_EC46), b"\xF5\x9E\xB1\x86");
- test_error(End(), b"\xFB");
- test_error(End(), b"\xFB\x80");
- test_error(End(), b"\xFB\x80\x80");
- test_error(CodePoint(0x2C_0000), b"\xFB\x80\x80\x80");
- test_error(End(), b"\xFF");
- test_error(End(), b"\xFF\x80");
- test_error(End(), b"\xFF\x80\x80");
- test_error(CodePoint(0x3C_0000), b"\xFF\x80\x80\x80");
- test_error(CodePoint(0x3C_6143), b"\xFF\x86\x85\x83");
-
- fn test_error(error: EncodingError, string: &[u8]) {
- assert_eq!(
- Err(error),
- OsStr::from_raw_bytes(string).map_err(|x| x.0),
- );
- }
- }
-}
diff --git a/src/windows/raw.rs b/src/windows/raw.rs
index 630eb01..80953de 100644
--- a/src/windows/raw.rs
+++ b/src/windows/raw.rs
@@ -7,19 +7,23 @@ use super::wtf8;
pub(crate) use super::wtf8::ends_with;
pub(crate) use super::wtf8::starts_with;
use super::wtf8::CodePoints;
+use super::Result;
+
+pub(crate) fn validate_bytes(string: &[u8]) -> Result<()> {
+ wtf8::encode_wide(string).try_for_each(|x| x.map(drop))
+}
pub(crate) fn encode_wide_unchecked(
string: &[u8],
) -> impl '_ + Iterator<Item = u16> {
- wtf8::encode_wide(string).map(|x| x.expect("invalid string"))
+ wtf8::encode_wide(string).map(|x| expect_encoded!(x))
}
pub(crate) fn decode_code_point(string: &[u8]) -> u32 {
let mut code_points = CodePoints::new(string.iter().copied());
- let code_point = code_points
+ let code_point = expect_encoded!(code_points
.next()
- .expect("cannot parse code point from empty string")
- .expect("invalid string");
+ .expect("cannot parse code point from empty string"));
assert_eq!(None, code_points.next(), "multiple code points found");
code_point
}
diff --git a/src/windows/wtf8/code_points.rs b/src/windows/wtf8/code_points.rs
index b265db3..9800d78 100644
--- a/src/windows/wtf8/code_points.rs
+++ b/src/windows/wtf8/code_points.rs
@@ -1,3 +1,4 @@
+use std::iter::FusedIterator;
use std::iter::Peekable;
use std::mem;
@@ -14,6 +15,7 @@ where
{
iter: Peekable<I>,
surrogate: bool,
+ still_utf8: bool,
}
impl<I> CodePoints<I>
@@ -22,30 +24,34 @@ where
{
pub(in super::super) fn new<S>(string: S) -> Self
where
- S: IntoIterator<IntoIter = I, Item = I::Item>,
+ S: IntoIterator<IntoIter = I>,
{
Self {
iter: string.into_iter().peekable(),
surrogate: false,
+ still_utf8: true,
}
}
+ pub(super) fn is_still_utf8(&self) -> bool {
+ self.still_utf8
+ }
+
fn consume_next(&mut self, code_point: &mut u32) -> Result<()> {
- if let Some(&byte) = self.iter.peek() {
- if !is_continuation(byte) {
- self.surrogate = false;
- // Not consuming this byte will be useful if this crate ever
- // offers a way to encode lossily.
- return Err(EncodingError::Byte(byte));
- }
- *code_point =
- (*code_point << BYTE_SHIFT) | u32::from(byte & CONT_MASK);
+ let &byte = self.iter.peek().ok_or(EncodingError::End())?;
- let removed = self.iter.next();
- debug_assert_eq!(Some(byte), removed);
- } else {
- return Err(EncodingError::End());
+ if !is_continuation(byte) {
+ self.surrogate = false;
+ // Not consuming this byte will be useful if this crate ever offers
+ // a way to encode lossily.
+ return Err(EncodingError::Byte(byte));
}
+ *code_point =
+ (*code_point << BYTE_SHIFT) | u32::from(byte & CONT_MASK);
+
+ let removed = self.iter.next();
+ debug_assert_eq!(Some(byte), removed);
+
Ok(())
}
@@ -54,6 +60,11 @@ where
}
}
+impl<I> FusedIterator for CodePoints<I> where
+ I: FusedIterator + Iterator<Item = u8>
+{
+}
+
impl<I> Iterator for CodePoints<I>
where
I: Iterator<Item = u8>,
@@ -94,6 +105,7 @@ where
// This condition is optimized to detect surrogate code points.
} else if code_point & 0xFE0 == 0x360 {
+ self.still_utf8 = false;
if code_point & 0x10 == 0 {
self.surrogate = true;
} else if prev_surrogate {
diff --git a/src/windows/wtf8/convert.rs b/src/windows/wtf8/convert.rs
index fcaf562..70a8a9f 100644
--- a/src/windows/wtf8/convert.rs
+++ b/src/windows/wtf8/convert.rs
@@ -1,5 +1,6 @@
use std::char;
use std::char::DecodeUtf16;
+use std::iter::FusedIterator;
use std::num::NonZeroU16;
use crate::util::BYTE_SHIFT;
@@ -27,7 +28,7 @@ where
{
iter: DecodeUtf16<I>,
code_point: u32,
- shift: u8,
+ shifts: u8,
}
impl<I> DecodeWide<I>
@@ -41,9 +42,14 @@ where
Self {
iter: char::decode_utf16(string),
code_point: 0,
- shift: 0,
+ shifts: 0,
}
}
+
+ #[inline(always)]
+ fn get_raw_byte(&self) -> u8 {
+ (self.code_point >> (self.shifts * BYTE_SHIFT)) as u8
+ }
}
impl<I> Iterator for DecodeWide<I>
@@ -53,11 +59,9 @@ where
type Item = u8;
fn next(&mut self) -> Option<Self::Item> {
- if let Some(shift) = self.shift.checked_sub(BYTE_SHIFT) {
- self.shift = shift;
- return Some(
- ((self.code_point >> self.shift) as u8 & CONT_MASK) | CONT_TAG,
- );
+ if let Some(shifts) = self.shifts.checked_sub(1) {
+ self.shifts = shifts;
+ return Some((self.get_raw_byte() & CONT_MASK) | CONT_TAG);
}
self.code_point = self
@@ -68,7 +72,7 @@ where
macro_rules! decode {
( $tag:expr ) => {
- Some((self.code_point >> self.shift) as u8 | $tag)
+ Some(self.get_raw_byte() | $tag)
};
}
macro_rules! try_decode {
@@ -76,7 +80,7 @@ where
if self.code_point < $upper_bound {
return decode!($tag);
}
- self.shift += BYTE_SHIFT;
+ self.shifts += 1;
};
}
try_decode!(0, 0x80);
@@ -87,16 +91,16 @@ where
fn size_hint(&self) -> (usize, Option<usize>) {
let (low, high) = self.iter.size_hint();
- let shift = self.shift.into();
+ let shifts = self.shifts.into();
(
- low.saturating_add(shift),
+ low.saturating_add(shifts),
high.and_then(|x| x.checked_mul(4))
- .and_then(|x| x.checked_add(shift)),
+ .and_then(|x| x.checked_add(shifts)),
)
}
}
-struct EncodeWide<I>
+pub(in super::super) struct EncodeWide<I>
where
I: Iterator<Item = u8>,
{
@@ -108,15 +112,24 @@ impl<I> EncodeWide<I>
where
I: Iterator<Item = u8>,
{
- pub(in super::super) fn new<S>(string: S) -> Self
+ fn new<S>(string: S) -> Self
where
- S: IntoIterator<IntoIter = I, Item = I::Item>,
+ S: IntoIterator<IntoIter = I>,
{
Self {
iter: CodePoints::new(string),
surrogate: None,
}
}
+
+ pub(in super::super) fn is_still_utf8(&self) -> bool {
+ self.iter.is_still_utf8()
+ }
+}
+
+impl<I> FusedIterator for EncodeWide<I> where
+ I: FusedIterator + Iterator<Item = u8>
+{
}
impl<I> Iterator for EncodeWide<I>
@@ -137,6 +150,8 @@ where
.map(|offset| {
static_assert!(MIN_LOW_SURROGATE != 0);
+ // SAFETY: The above static assertion guarantees that
+ // this value will not be zero.
self.surrogate = Some(unsafe {
NonZeroU16::new_unchecked(
(offset & 0x3FF) as u16 | MIN_LOW_SURROGATE,
@@ -161,6 +176,6 @@ where
pub(in super::super) fn encode_wide(
string: &[u8],
-) -> impl '_ + Iterator<Item = Result<u16>> {
+) -> EncodeWide<impl '_ + Iterator<Item = u8>> {
EncodeWide::new(string.iter().copied())
}
diff --git a/src/windows/wtf8/string.rs b/src/windows/wtf8/string.rs
index 10b8faf..b3523a2 100644
--- a/src/windows/wtf8/string.rs
+++ b/src/windows/wtf8/string.rs
@@ -1,27 +1,28 @@
-use crate::util::is_continuation;
-
-use super::encode_wide;
+use crate::util;
const SURROGATE_LENGTH: usize = 3;
pub(crate) fn ends_with(string: &[u8], mut suffix: &[u8]) -> bool {
- let index = match string.len().checked_sub(suffix.len()) {
- Some(index) => index,
- None => return false,
+ let index = if let Some(index) = string.len().checked_sub(suffix.len()) {
+ index
+ } else {
+ return false;
};
if let Some(&byte) = string.get(index) {
- if is_continuation(byte) {
- let index = index.checked_sub(1).expect("invalid string");
- let mut wide_surrogate = match suffix.get(..SURROGATE_LENGTH) {
- Some(surrogate) => encode_wide(surrogate),
- None => return false,
- };
+ if util::is_continuation(byte) {
+ let index = expect_encoded!(index.checked_sub(1));
+ let mut wide_surrogate =
+ if let Some(surrogate) = suffix.get(..SURROGATE_LENGTH) {
+ super::encode_wide(surrogate)
+ } else {
+ return false;
+ };
let surrogate_wchar = wide_surrogate
.next()
.expect("failed decoding non-empty suffix");
if wide_surrogate.next().is_some()
- || encode_wide(&string[index..])
+ || super::encode_wide(&string[index..])
.take_while(Result::is_ok)
.nth(1)
!= Some(surrogate_wchar)
@@ -36,20 +37,23 @@ pub(crate) fn ends_with(string: &[u8], mut suffix: &[u8]) -> bool {
pub(crate) fn starts_with(string: &[u8], mut prefix: &[u8]) -> bool {
if let Some(&byte) = string.get(prefix.len()) {
- if is_continuation(byte) {
- let index = match prefix.len().checked_sub(SURROGATE_LENGTH) {
- Some(index) => index,
- None => return false,
+ if util::is_continuation(byte) {
+ let index = if let Some(index) =
+ prefix.len().checked_sub(SURROGATE_LENGTH)
+ {
+ index
+ } else {
+ return false;
};
let (substring, surrogate) = prefix.split_at(index);
- let mut wide_surrogate = encode_wide(surrogate);
+ let mut wide_surrogate = super::encode_wide(surrogate);
let surrogate_wchar = wide_surrogate
.next()
.expect("failed decoding non-empty prefix");
if surrogate_wchar.is_err()
|| wide_surrogate.next().is_some()
- || encode_wide(&string[index..])
+ || super::encode_wide(&string[index..])
.next()
.expect("failed decoding non-empty substring")
!= surrogate_wchar
diff --git a/tests/common.rs b/tests/common.rs
deleted file mode 100644
index c0909bc..0000000
--- a/tests/common.rs
+++ /dev/null
@@ -1,94 +0,0 @@
-#![allow(dead_code)]
-#![warn(unsafe_op_in_unsafe_fn)]
-
-use std::borrow::Cow;
-use std::ffi::OsStr;
-use std::ffi::OsString;
-#[cfg(feature = "raw_os_str")]
-use std::mem;
-use std::path::Path;
-use std::path::PathBuf;
-use std::result;
-
-use os_str_bytes::EncodingError;
-use os_str_bytes::OsStrBytes;
-use os_str_bytes::OsStringBytes;
-#[cfg(feature = "raw_os_str")]
-use os_str_bytes::RawOsStr;
-
-pub(crate) type Result<T> = result::Result<T, EncodingError>;
-
-pub(crate) const WTF8_STRING: &[u8] = b"foo\xED\xA0\xBD\xF0\x9F\x92\xA9bar";
-
-// SAFETY: This string is valid in WTF-8.
-#[cfg(all(any(unix, windows), feature = "raw_os_str"))]
-pub(crate) const RAW_WTF8_STRING: &RawOsStr =
- unsafe { from_raw_bytes_unchecked(WTF8_STRING) };
-
-#[cfg(feature = "raw_os_str")]
-pub(crate) const unsafe fn from_raw_bytes_unchecked(
- string: &[u8],
-) -> &RawOsStr {
- // SAFETY: This implementation detail can only be assumed by this crate.
- unsafe { mem::transmute(string) }
-}
-
-#[track_caller]
-fn test_from_bytes<'a, T, U, S>(result: &Result<U>, string: S)
-where
- S: Into<Cow<'a, [u8]>>,
- T: 'a + AsRef<OsStr> + OsStrBytes + ?Sized,
- U: AsRef<OsStr>,
-{
- assert_eq!(
- result.as_ref().map(AsRef::as_ref),
- T::from_raw_bytes(string).as_deref().map(AsRef::as_ref),
- );
-}
-
-pub(crate) fn from_bytes(string: &[u8]) -> Result<Cow<'_, OsStr>> {
- let os_string = OsStr::from_raw_bytes(string);
-
- test_from_bytes::<Path, _, _>(&os_string, string);
-
- os_string
-}
-
-pub(crate) fn from_vec(string: Vec<u8>) -> Result<OsString> {
- let os_string = OsString::from_raw_vec(string.clone());
- test_from_bytes::<OsStr, _, _>(&os_string, string.clone());
-
- let path = PathBuf::from_raw_vec(string.clone());
- test_from_bytes::<Path, _, _>(&path, string);
- assert_eq!(os_string, path.map(PathBuf::into_os_string));
-
- os_string
-}
-
-pub(crate) fn test_bytes(string: &[u8]) -> Result<()> {
- let os_string = from_bytes(string)?;
- assert_eq!(string.len(), os_string.len());
- assert_eq!(string, &*os_string.to_raw_bytes());
- Ok(())
-}
-
-pub(crate) fn test_vec(string: &[u8]) -> Result<()> {
- let os_string = from_vec(string.to_owned())?;
- assert_eq!(string.len(), os_string.len());
- assert_eq!(string, os_string.into_raw_vec());
- Ok(())
-}
-
-pub(crate) fn test_utf8_bytes(string: &str) {
- let os_string = OsStr::new(string);
- let string = string.as_bytes();
- assert_eq!(Ok(Cow::Borrowed(os_string)), from_bytes(string));
- assert_eq!(string, &*os_string.to_raw_bytes());
-}
-
-pub(crate) fn test_utf8_vec(string: &str) {
- let os_string = string.to_owned().into();
- let string = string.as_bytes();
- assert_eq!(Ok(&os_string), from_vec(string.to_owned()).as_ref());
- assert_eq!(string, os_string.into_raw_vec());
-}
diff --git a/tests/debug.rs b/tests/debug.rs
deleted file mode 100644
index c252deb..0000000
--- a/tests/debug.rs
+++ /dev/null
@@ -1,34 +0,0 @@
-#![cfg(feature = "raw_os_str")]
-
-use os_str_bytes::RawOsStr;
-
-mod common;
-use common::RAW_WTF8_STRING;
-
-fn test(result: &str, string: &RawOsStr) {
- assert_eq!(format!("RawOsStr({})", result), format!("{:?}", string));
- assert_eq!(
- format!("RawOsString({})", result),
- format!("{:?}", string.to_owned()),
- );
-}
-
-#[test]
-fn test_debug_empty() {
- test("\"\"", RawOsStr::from_str(""));
-}
-
-#[test]
-fn test_debug_wft8() {
- let wchar = if cfg!(unix) {
- "\\xED\\xA0\\xBD"
- } else {
- "\\u{D83D}"
- };
- test(&format!("\"foo{}\u{1F4A9}bar\"", wchar), RAW_WTF8_STRING);
-}
-
-#[test]
-fn test_debug_quote() {
- test("\"foo\\\"bar\"", RawOsStr::from_str("foo\"bar"));
-}
diff --git a/tests/edge_cases.rs b/tests/edge_cases.rs
deleted file mode 100644
index a0fa529..0000000
--- a/tests/edge_cases.rs
+++ /dev/null
@@ -1,7 +0,0 @@
-mod common;
-use common::test_bytes;
-
-#[test]
-fn test_edge_cases() {
- assert_eq!(Ok(()), test_bytes(b"\xED\xAB\xBE\xF4\x8D\xBC\x9A"));
-}
diff --git a/tests/index.rs b/tests/index.rs
deleted file mode 100644
index 50abd6c..0000000
--- a/tests/index.rs
+++ /dev/null
@@ -1,86 +0,0 @@
-#![cfg(feature = "raw_os_str")]
-
-use std::ops::Index;
-use std::panic;
-use std::panic::UnwindSafe;
-
-use os_str_bytes::RawOsStr;
-
-mod common;
-use common::RAW_WTF8_STRING;
-
-#[test]
-fn test_valid_indices() {
- test(0);
- test(1);
- test(2);
- test(3);
- test(6);
- test(10);
- test(11);
- test(12);
- test(13);
-
- #[track_caller]
- fn test(index: usize) {
- let _ = RAW_WTF8_STRING.index(index..);
- }
-}
-
-macro_rules! test {
- ( $name:ident , $index:literal , $code_point:expr ) => {
- // https://github.com/rust-lang/rust/issues/88430
- #[test]
- fn $name() {
- let index_fn = || RAW_WTF8_STRING.index($index..);
- if cfg!(unix) {
- let _ = index_fn();
- return;
- }
-
- let error = panic::catch_unwind(index_fn)
- .expect_err("test did not panic as expected");
- let error: &String =
- error.downcast_ref().expect("incorrect panic message type");
- assert_eq!(
- concat!(
- "byte index ",
- $index,
- " is not a valid boundary; it is inside ",
- $code_point
- ),
- error,
- );
- }
- };
-}
-
-test!(test_index_4, 4, "U+D83D (bytes 3..6)");
-
-test!(test_index_5, 5, "U+D83D (bytes 3..6)");
-
-test!(test_index_7, 7, "U+1F4A9 (bytes 6..10)");
-
-test!(test_index_8, 8, "U+1F4A9 (bytes 6..10)");
-
-test!(test_index_9, 9, "U+1F4A9 (bytes 6..10)");
-
-#[test]
-fn test_index_panics() {
- let string = RawOsStr::from_str("\u{F6}");
- test(|| string.index(1..2));
- test(|| string.index(0..1));
- test(|| string.index(1..));
- test(|| string.index(0..=0));
- test(|| string.index(..1));
- test(|| string.index(..=0));
- test(|| string.split_at(1));
-
- #[track_caller]
- fn test<F, R>(f: F)
- where
- F: FnOnce() -> R + UnwindSafe,
- {
- assert_eq!(!cfg!(unix), panic::catch_unwind(f).is_err());
- }
-}
diff --git a/tests/integration.rs b/tests/integration.rs
deleted file mode 100644
index 0107fe5..0000000
--- a/tests/integration.rs
+++ /dev/null
@@ -1,75 +0,0 @@
-use std::str;
-
-mod common;
-use common::test_bytes;
-use common::test_utf8_bytes;
-use common::test_utf8_vec;
-use common::test_vec;
-use common::Result;
-use common::WTF8_STRING;
-
-const INVALID_STRING: &[u8] = b"\xF1foo\xF1\x80bar\xF1\x80\x80baz";
-
-const UTF8_STRING: &str = "string";
-
-fn test_string_is_invalid_utf8(string: &[u8]) {
- assert!(str::from_utf8(string).is_err());
-}
-
-fn test_invalid_result(result: &Result<()>) {
- if cfg!(windows) {
- assert!(result.is_err());
- } else {
- assert_eq!(&Ok(()), result);
- }
-}
-
-#[test]
-fn test_empty_bytes() {
- test_utf8_bytes("");
-}
-
-#[test]
-fn test_empty_vec() {
- test_utf8_vec("");
-}
-
-#[test]
-fn test_nonempty_utf8_bytes() {
- test_utf8_bytes(UTF8_STRING);
-}
-
-#[test]
-fn test_nonempty_utf8_vec() {
- test_utf8_vec(UTF8_STRING);
-}
-
-#[test]
-fn test_invalid_string_is_invalid_utf8() {
- test_string_is_invalid_utf8(INVALID_STRING);
-}
-
-#[test]
-fn test_invalid_bytes() {
- test_invalid_result(&test_bytes(INVALID_STRING));
-}
-
-#[test]
-fn test_invalid_vec() {
- test_invalid_result(&test_vec(INVALID_STRING));
-}
-
-#[test]
-fn test_wtf8_string_is_invalid_utf8() {
- test_string_is_invalid_utf8(WTF8_STRING);
-}
-
-#[test]
-fn test_wtf8_bytes() {
- assert_eq!(Ok(()), test_bytes(WTF8_STRING));
-}
-
-#[test]
-fn test_wtf8_vec() {
- assert_eq!(Ok(()), test_vec(WTF8_STRING));
-}
diff --git a/tests/random.rs b/tests/random.rs
deleted file mode 100644
index ad6e8d2..0000000
--- a/tests/random.rs
+++ /dev/null
@@ -1,126 +0,0 @@
-use std::borrow::Cow;
-use std::ffi::OsStr;
-use std::ffi::OsString;
-
-use getrandom::getrandom;
-
-use os_str_bytes::OsStrBytes;
-use os_str_bytes::OsStringBytes;
-
-mod common;
-use common::from_bytes;
-use common::from_vec;
-
-const SMALL_LENGTH: usize = 16;
-
-const LARGE_LENGTH: usize = 1024;
-
-const ITERATIONS: usize = 1024;
-
-fn random_os_string(
- buffer_length: usize,
-) -> Result<OsString, getrandom::Error> {
- let mut buffer = vec![0; buffer_length];
- #[cfg(unix)]
- {
- use std::os::unix::ffi::OsStringExt;
-
- getrandom(&mut buffer)?;
- Ok(OsStringExt::from_vec(buffer))
- }
- #[cfg(windows)]
- {
- use std::os::windows::ffi::OsStringExt;
- use std::slice;
-
- getrandom(as_mut_bytes(&mut buffer))?;
- return Ok(OsStringExt::from_wide(&buffer));
-
- fn as_mut_bytes(buffer: &mut [u16]) -> &mut [u8] {
- // SAFETY: [u16] can always be transmuted to two [u8] bytes.
- unsafe {
- slice::from_raw_parts_mut(
- buffer.as_mut_ptr() as *mut u8,
- buffer.len() * 2,
- )
- }
- }
- }
- #[cfg(not(any(unix, windows)))]
- Err(getrandom::Error::UNSUPPORTED)
-}
-
-#[test]
-fn test_random_bytes() -> Result<(), getrandom::Error> {
- let os_string = random_os_string(LARGE_LENGTH)?;
- let string = os_string.to_raw_bytes();
- assert_eq!(os_string.len(), string.len());
- assert_eq!(Ok(Cow::Borrowed(&*os_string)), from_bytes(&string));
- Ok(())
-}
-
-#[test]
-fn test_random_vec() -> Result<(), getrandom::Error> {
- let os_string = random_os_string(LARGE_LENGTH)?;
- let string = os_string.clone().into_raw_vec();
- assert_eq!(os_string.len(), string.len());
- assert_eq!(Ok(os_string), from_vec(string));
- Ok(())
-}
-
-#[test]
-fn test_lossless() -> Result<(), getrandom::Error> {
- for _ in 0..ITERATIONS {
- let mut string = vec![0; SMALL_LENGTH];
- getrandom(&mut string)?;
- if let Ok(os_string) = OsStr::from_raw_bytes(&string) {
- let encoded_string = os_string.to_raw_bytes();
- assert_eq!(string, &*encoded_string);
- }
- }
- Ok(())
-}
-
-#[cfg(feature = "raw_os_str")]
-#[test]
-fn test_raw() -> Result<(), getrandom::Error> {
- use os_str_bytes::RawOsStr;
- use os_str_bytes::RawOsString;
-
- macro_rules! test {
- (
- $result:expr ,
- $method:ident (& $string:ident , & $substring:ident )
- ) => {
- #[allow(clippy::bool_assert_comparison)]
- {
- assert_eq!(
- $result,
- $string.$method(&$substring),
- concat!(stringify!($method), "({:?}, {:?})"),
- $string,
- $substring,
- );
- }
- };
- }
-
- for _ in 0..ITERATIONS {
- let mut string = random_os_string(SMALL_LENGTH)?;
- let prefix = RawOsStr::new(&string).into_owned();
- let suffix = random_os_string(SMALL_LENGTH)?;
- string.push(&suffix);
-
- let string = RawOsString::new(string);
- let suffix = RawOsString::new(suffix);
-
- test!(true, ends_with_os(&string, &suffix));
- test!(true, starts_with_os(&string, &prefix));
-
- if prefix != suffix {
- test!(false, ends_with_os(&string, &prefix));
- test!(false, starts_with_os(&string, &suffix));
- }
- }
- Ok(())
-}
diff --git a/tests/raw.rs b/tests/raw.rs
deleted file mode 100644
index fe29705..0000000
--- a/tests/raw.rs
+++ /dev/null
@@ -1,108 +0,0 @@
-#![cfg(feature = "raw_os_str")]
-
-use std::ffi::OsStr;
-
-use os_str_bytes::EncodingError;
-use os_str_bytes::OsStrBytes;
-use os_str_bytes::RawOsStr;
-
-mod common;
-use common::RAW_WTF8_STRING;
-
-fn from_raw_bytes(string: &[u8]) -> Result<&RawOsStr, EncodingError> {
- // SAFETY: The string is validated before conversion.
- OsStr::from_raw_bytes(string)
- .map(|_| unsafe { common::from_raw_bytes_unchecked(string) })
-}
-
-#[test]
-fn test_ends_with() {
- test(true, b"");
- test(true, b"r");
- test(true, b"ar");
- test(true, b"bar");
- if cfg!(not(windows)) {
- test(true, b"\xA9bar");
- test(true, b"\x92\xA9bar");
- test(true, b"\x9F\x92\xA9bar");
- }
- test(cfg!(windows), b"\xED\xB2\xA9bar");
- test(true, b"\xF0\x9F\x92\xA9bar");
- test(true, b"\xED\xA0\xBD\xF0\x9F\x92\xA9bar");
- test(true, b"o\xED\xA0\xBD\xF0\x9F\x92\xA9bar");
- test(true, b"oo\xED\xA0\xBD\xF0\x9F\x92\xA9bar");
- test(true, b"foo\xED\xA0\xBD\xF0\x9F\x92\xA9bar");
-
- test(false, b"\xED\xA0\xBDbar");
- test(false, b"\xED\xB2\xA9aar");
-
- fn test(result: bool, suffix: &[u8]) {
- let suffix = from_raw_bytes(suffix).unwrap();
- assert_eq!(result, RAW_WTF8_STRING.ends_with_os(suffix));
- }
-}
-
-#[test]
-fn test_empty_ends_with() {
- macro_rules! test {
- ( $result:expr , $string:expr , $substring:expr ) => {
- #[allow(clippy::bool_assert_comparison)]
- {
- assert_eq!(
- $result,
- RawOsStr::from_str($string)
- .ends_with_os(RawOsStr::from_str($substring)),
- );
- }
- };
- }
- test!(true, "", "");
- test!(false, "", "r");
- test!(false, "", "ar");
-}
-
-#[test]
-fn test_starts_with() {
- test(true, b"");
- test(true, b"f");
- test(true, b"fo");
- test(true, b"foo");
- test(true, b"foo\xED\xA0\xBD");
- if cfg!(not(windows)) {
- test(true, b"foo\xED\xA0\xBD\xF0");
- test(true, b"foo\xED\xA0\xBD\xF0\x9F");
- test(true, b"foo\xED\xA0\xBD\xF0\x9F\x92");
- }
- test(cfg!(windows), b"foo\xED\xA0\xBD\xED\xA0\xBD");
- test(true, b"foo\xED\xA0\xBD\xF0\x9F\x92\xA9");
- test(true, b"foo\xED\xA0\xBD\xF0\x9F\x92\xA9b");
- test(true, b"foo\xED\xA0\xBD\xF0\x9F\x92\xA9ba");
- test(true, b"foo\xED\xA0\xBD\xF0\x9F\x92\xA9bar");
-
- test(false, b"foo\xED\xB2\xA9");
- test(false, b"fof\xED\xA0\xBD\xED\xA0\xBD");
-
- fn test(result: bool, prefix: &[u8]) {
- let prefix = from_raw_bytes(prefix).unwrap();
- assert_eq!(result, RAW_WTF8_STRING.starts_with_os(prefix));
- }
-}
-
-#[test]
-fn test_empty_starts_with() {
- macro_rules! test {
- ( $result:expr , $string:expr , $substring:expr ) => {
- #[allow(clippy::bool_assert_comparison)]
- {
- assert_eq!(
- $result,
- RawOsStr::from_str($string)
- .starts_with_os(RawOsStr::from_str($substring)),
- );
- }
- };
- }
- test!(true, "", "");
- test!(false, "", "f");
- test!(false, "", "fo");
-}