diff options
author | Android Build Coastguard Worker <android-build-coastguard-worker@google.com> | 2024-02-02 23:52:03 +0000 |
---|---|---|
committer | Android Build Coastguard Worker <android-build-coastguard-worker@google.com> | 2024-02-02 23:52:03 +0000 |
commit | 7d653f3bd3f888d745509de507a5b69f01055597 (patch) | |
tree | f926c571cbd4fd2e9116e5ee98efcbe17dc354ae | |
parent | 833562ca05db671f59197ee5672363af2e2e8fbc (diff) | |
parent | 86d8e5b747bb44b483754be095afebfe03ad0875 (diff) | |
download | base64-simpleperf-release.tar.gz |
Snap for 11400057 from 86d8e5b747bb44b483754be095afebfe03ad0875 to simpleperf-releasesimpleperf-release
Change-Id: I60dfcdb501724b51ab706d5dba1175fa519eb285
-rw-r--r-- | .cargo_vcs_info.json | 2 | ||||
-rw-r--r-- | .circleci/config.yml | 38 | ||||
-rw-r--r-- | .github/ISSUE_TEMPLATE/general-purpose-issue.md | 21 | ||||
-rw-r--r-- | Android.bp | 5 | ||||
-rw-r--r-- | Cargo.toml | 42 | ||||
-rw-r--r-- | Cargo.toml.orig | 37 | ||||
-rw-r--r-- | METADATA | 25 | ||||
-rw-r--r-- | README.md | 10 | ||||
-rw-r--r-- | RELEASE-NOTES.md | 42 | ||||
-rw-r--r-- | benches/benchmarks.rs | 9 | ||||
-rw-r--r-- | clippy.toml | 2 | ||||
-rw-r--r-- | examples/base64.rs | 50 | ||||
-rw-r--r-- | patches/doc-string-fix.patch | 41 | ||||
-rw-r--r-- | src/alphabet.rs | 62 | ||||
-rw-r--r-- | src/chunked_encoder.rs | 107 | ||||
-rw-r--r-- | src/decode.rs | 21 | ||||
-rw-r--r-- | src/encode.rs | 66 | ||||
-rw-r--r-- | src/engine/general_purpose/decode.rs | 59 | ||||
-rw-r--r-- | src/engine/general_purpose/decode_suffix.rs | 17 | ||||
-rw-r--r-- | src/engine/general_purpose/mod.rs | 7 | ||||
-rw-r--r-- | src/engine/mod.rs | 234 | ||||
-rw-r--r-- | src/engine/naive.rs | 7 | ||||
-rw-r--r-- | src/engine/tests.rs | 393 | ||||
-rw-r--r-- | src/lib.rs | 241 | ||||
-rw-r--r-- | src/prelude.rs | 3 | ||||
-rw-r--r-- | src/read/decoder.rs | 65 | ||||
-rw-r--r-- | src/read/decoder_tests.rs | 159 | ||||
-rw-r--r-- | src/write/encoder_string_writer.rs | 45 | ||||
-rw-r--r-- | src/write/encoder_tests.rs | 2 | ||||
-rw-r--r-- | tests/encode.rs | 53 |
30 files changed, 1308 insertions, 557 deletions
diff --git a/.cargo_vcs_info.json b/.cargo_vcs_info.json index 7b32cf5..d61e543 100644 --- a/.cargo_vcs_info.json +++ b/.cargo_vcs_info.json @@ -1,6 +1,6 @@ { "git": { - "sha1": "d7fb31c4ada4ca45df5ae80ec691fa3a050d9c3e" + "sha1": "9652c787730e58515ce7b44fcafd2430ab424628" }, "path_in_vcs": "" }
\ No newline at end of file diff --git a/.circleci/config.yml b/.circleci/config.yml index fa98f9c..ac0fae1 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -14,13 +14,15 @@ workflows: # be easier on the CI hosts since presumably those fat lower layers will already be cached, and # therefore faster than a minimal, customized alpine. # MSRV - 'rust:1.57.0' + 'rust:1.48.0' ] # a hacky scheme to work around CircleCI's inability to deal with mutable docker tags, forcing us to # get a nightly or stable toolchain via rustup instead of a mutable docker tag toolchain_override: [ '__msrv__', # won't add any other toolchains, just uses what's in the docker image + '1.65.0', # minimum needed to build dev-dependencies 'stable', + 'beta', 'nightly' ] @@ -49,6 +51,12 @@ jobs: name: Log rustc version command: rustc --version - run: + name: Build main target + # update first to select dependencies appropriate for this toolchain + command: | + cargo update + cargo build + - run: name: Check formatting command: | rustup component add rustfmt @@ -64,13 +72,27 @@ jobs: fi - run: name: Build all targets - command: cargo build --all-targets + command: | + if [[ '<< parameters.toolchain_override >>' != '__msrv__' ]] + then + cargo build --all-targets + fi - run: name: Build without default features - command: cargo build --no-default-features + command: | + cargo build --no-default-features + if [[ '<< parameters.toolchain_override >>' != '__msrv__' ]] + then + cargo build --no-default-features --all-targets + fi - run: name: Build with only alloc - command: cargo build --no-default-features --features alloc + command: | + cargo build --no-default-features --features alloc + if [[ '<< parameters.toolchain_override >>' != '__msrv__' ]] + then + cargo build --no-default-features --features alloc --all-targets + fi - run: name: Add arm toolchain command: rustup target add thumbv6m-none-eabi @@ -81,8 +103,14 @@ jobs: name: Build ARM with only alloc feature command: cargo build --target thumbv6m-none-eabi --no-default-features --features alloc - run: + # dev dependencies can't build on 1.48.0 name: Run tests - command: cargo test --verbose + command: | + if [[ '<< parameters.toolchain_override >>' != '__msrv__' ]] + then + cargo test --no-default-features + cargo test + fi - run: name: Build docs command: cargo doc --verbose diff --git a/.github/ISSUE_TEMPLATE/general-purpose-issue.md b/.github/ISSUE_TEMPLATE/general-purpose-issue.md new file mode 100644 index 0000000..b35b2f3 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/general-purpose-issue.md @@ -0,0 +1,21 @@ +--- +name: General purpose issue +about: General purpose issue +title: Default issue +labels: '' +assignees: '' + +--- + +# Before you file an issue + +- Did you read the docs? +- Did you read the README? + +# The problem + +- + +# How I, the issue filer, am going to help solve it + +- @@ -42,10 +42,11 @@ rust_library { host_supported: true, crate_name: "base64", cargo_env_compat: true, - cargo_pkg_version: "0.21.0", + cargo_pkg_version: "0.21.7", srcs: ["src/lib.rs"], - edition: "2021", + edition: "2018", features: [ + "alloc", "default", "std", ], @@ -10,10 +10,10 @@ # See Cargo.toml.orig for the original contents. [package] -edition = "2021" -rust-version = "1.57.0" +edition = "2018" +rust-version = "1.48.0" name = "base64" -version = "0.21.0" +version = "0.21.7" authors = [ "Alice Maz <alice@alicemaz.com>", "Marshall Pierce <marshall@mpierce.org>", @@ -32,33 +32,57 @@ categories = ["encoding"] license = "MIT OR Apache-2.0" repository = "https://github.com/marshallpierce/rust-base64" +[package.metadata.docs.rs] +rustdoc-args = ["--generate-link-to-definition"] + [profile.bench] -debug = true +debug = 2 [profile.test] opt-level = 3 +[[example]] +name = "base64" +required-features = ["std"] + +[[test]] +name = "tests" +required-features = ["alloc"] + +[[test]] +name = "encode" +required-features = ["alloc"] + [[bench]] name = "benchmarks" harness = false +required-features = ["std"] + +[dev-dependencies.clap] +version = "3.2.25" +features = ["derive"] [dev-dependencies.criterion] version = "0.4.0" +[dev-dependencies.once_cell] +version = "1" + [dev-dependencies.rand] version = "0.8.5" features = ["small_rng"] [dev-dependencies.rstest] -version = "0.12.0" +version = "0.13.0" [dev-dependencies.rstest_reuse] -version = "0.3.0" +version = "0.6.0" -[dev-dependencies.structopt] -version = "0.3.26" +[dev-dependencies.strum] +version = "0.25" +features = ["derive"] [features] alloc = [] default = ["std"] -std = [] +std = ["alloc"] diff --git a/Cargo.toml.orig b/Cargo.toml.orig index 33847db..4db5d26 100644 --- a/Cargo.toml.orig +++ b/Cargo.toml.orig @@ -1,6 +1,6 @@ [package] name = "base64" -version = "0.21.0" +version = "0.21.7" authors = ["Alice Maz <alice@alicemaz.com>", "Marshall Pierce <marshall@mpierce.org>"] description = "encodes and decodes base64 as bytes or utf8" repository = "https://github.com/marshallpierce/rust-base64" @@ -9,26 +9,47 @@ readme = "README.md" keywords = ["base64", "utf8", "encode", "decode", "no_std"] categories = ["encoding"] license = "MIT OR Apache-2.0" -edition = "2021" -rust-version = "1.57.0" +edition = "2018" +# dev-dependencies require 1.65, but the main code doesn't +# This option was added in 1.56, keep it for when we bump MSRV. +rust-version = "1.48.0" [[bench]] name = "benchmarks" harness = false +required-features = ["std"] + +[[example]] +name = "base64" +required-features = ["std"] + +[[test]] +name = "tests" +required-features = ["alloc"] + +[[test]] +name = "encode" +required-features = ["alloc"] + +[package.metadata.docs.rs] +rustdoc-args = ["--generate-link-to-definition"] [dev-dependencies] criterion = "0.4.0" rand = { version = "0.8.5", features = ["small_rng"] } -# clap 4 would require 1.60 -structopt = "0.3.26" +# Latest is 4.4.13 but specifies MSRV in Cargo.toml which means we can't depend +# on it (even though we won't compile it in MSRV CI). +clap = { version = "3.2.25", features = ["derive"] } +strum = { version = "0.25", features = ["derive"] } # test fixtures for engine tests -rstest = "0.12.0" -rstest_reuse = "0.3.0" +rstest = "0.13.0" +rstest_reuse = "0.6.0" +once_cell = "1" [features] default = ["std"] alloc = [] -std = [] +std = ["alloc"] [profile.bench] # Useful for better disassembly when using `perf record` and `perf annotate` @@ -1,23 +1,20 @@ # This project was upgraded with external_updater. -# Usage: tools/external_updater/updater.sh update rust/crates/base64 -# For more info, check https://cs.android.com/android/platform/superproject/+/master:tools/external_updater/README.md +# Usage: tools/external_updater/updater.sh update external/rust/crates/base64 +# For more info, check https://cs.android.com/android/platform/superproject/+/main:tools/external_updater/README.md name: "base64" description: "encodes and decodes base64 as bytes or utf8" third_party { - url { - type: HOMEPAGE - value: "https://crates.io/crates/base64" - } - url { - type: ARCHIVE - value: "https://static.crates.io/crates/base64/base64-0.21.0.crate" - } - version: "0.21.0" license_type: NOTICE last_upgrade_date { - year: 2023 - month: 2 - day: 1 + year: 2024 + month: 1 + day: 31 + } + homepage: "https://crates.io/crates/base64" + identifier { + type: "Archive" + value: "https://static.crates.io/crates/base64/base64-0.21.7.crate" + version: "0.21.7" } } @@ -63,7 +63,7 @@ optionally may allow other behaviors. ## Rust version compatibility -The minimum supported Rust version is 1.57.0. +The minimum supported Rust version is 1.48.0. # Contributing @@ -76,10 +76,10 @@ free time to give each PR the attention it deserves. I will get to everyone even ## Developing -Benchmarks are in `benches/`. Running them requires nightly rust, but `rustup` makes it easy: +Benchmarks are in `benches/`. ```bash -rustup run nightly cargo bench +cargo bench ``` ## no_std @@ -92,12 +92,12 @@ to bring back the support for heap allocations. ## Profiling On Linux, you can use [perf](https://perf.wiki.kernel.org/index.php/Main_Page) for profiling. Then compile the -benchmarks with `rustup nightly run cargo bench --no-run`. +benchmarks with `cargo bench --no-run`. Run the benchmark binary with `perf` (shown here filtering to one particular benchmark, which will make the results easier to read). `perf` is only available to the root user on most systems as it fiddles with event counters in your CPU, so use `sudo`. We need to run the actual benchmark binary, hence the path into `target`. You can see the actual -full path with `rustup run nightly cargo bench -v`; it will print out the commands it runs. If you use the exact path +full path with `cargo bench -v`; it will print out the commands it runs. If you use the exact path that `bench` outputs, make sure you get the one that's for the benchmarks, not the tests. You may also want to `cargo clean` so you have only one `benchmarks-` binary (they tend to accumulate). diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md index 4fcadda..0031215 100644 --- a/RELEASE-NOTES.md +++ b/RELEASE-NOTES.md @@ -1,7 +1,42 @@ -# 0.21.0 +# 0.21.7 + +- Support getting an alphabet's contents as a str via `Alphabet::as_str()` + +# 0.21.6 + +- Improved introductory documentation and example + +# 0.21.5 + +- Add `Debug` and `Clone` impls for the general purpose Engine + +# 0.21.4 + +- Make `encoded_len` `const`, allowing the creation of arrays sized to encode compile-time-known data lengths + +# 0.21.3 -(not yet released) +- Implement `source` instead of `cause` on Error types +- Roll back MSRV to 1.48.0 so Debian can continue to live in a time warp +- Slightly faster chunked encoding for short inputs +- Decrease binary size +# 0.21.2 + +- Rollback MSRV to 1.57.0 -- only dev dependencies need 1.60, not the main code + +# 0.21.1 + +- Remove the possibility of panicking during decoded length calculations +- `DecoderReader` no longer sometimes erroneously ignores + padding [#226](https://github.com/marshallpierce/rust-base64/issues/226) + +## Breaking changes + +- `Engine.internal_decode` return type changed +- Update MSRV to 1.60.0 + +# 0.21.0 ## Migration @@ -46,7 +81,8 @@ precisely, see the following table. ## Breaking changes -- Re-exports of preconfigured engines in `engine` are removed in favor of `base64::prelude::...` that are better suited to those who wish to `use` the entire path to a name. +- Re-exports of preconfigured engines in `engine` are removed in favor of `base64::prelude::...` that are better suited + to those who wish to `use` the entire path to a name. # 0.21.0-beta.1 diff --git a/benches/benchmarks.rs b/benches/benchmarks.rs index 61d542f..802c8cc 100644 --- a/benches/benchmarks.rs +++ b/benches/benchmarks.rs @@ -39,8 +39,7 @@ fn do_decode_bench_slice(b: &mut Bencher, &size: &usize) { fill(&mut v); let encoded = STANDARD.encode(&v); - let mut buf = Vec::new(); - buf.resize(size, 0); + let mut buf = vec![0; size]; b.iter(|| { STANDARD.decode_slice(&encoded, &mut buf).unwrap(); black_box(&buf); @@ -52,8 +51,7 @@ fn do_decode_bench_stream(b: &mut Bencher, &size: &usize) { fill(&mut v); let encoded = STANDARD.encode(&v); - let mut buf = Vec::new(); - buf.resize(size, 0); + let mut buf = vec![0; size]; buf.truncate(0); b.iter(|| { @@ -96,9 +94,8 @@ fn do_encode_bench_reuse_buf(b: &mut Bencher, &size: &usize) { fn do_encode_bench_slice(b: &mut Bencher, &size: &usize) { let mut v: Vec<u8> = Vec::with_capacity(size); fill(&mut v); - let mut buf = Vec::new(); // conservative estimate of encoded size - buf.resize(v.len() * 2, 0); + let mut buf = vec![0; v.len() * 2]; b.iter(|| STANDARD.encode_slice(&v, &mut buf).unwrap()); } diff --git a/clippy.toml b/clippy.toml index 23b32c1..11d46a7 100644 --- a/clippy.toml +++ b/clippy.toml @@ -1 +1 @@ -msrv = "1.57.0" +msrv = "1.48.0" diff --git a/examples/base64.rs b/examples/base64.rs index 0a214d2..0c8aa3f 100644 --- a/examples/base64.rs +++ b/examples/base64.rs @@ -2,51 +2,40 @@ use std::fs::File; use std::io::{self, Read}; use std::path::PathBuf; use std::process; -use std::str::FromStr; use base64::{alphabet, engine, read, write}; -use structopt::StructOpt; +use clap::Parser; -#[derive(Debug, StructOpt)] +#[derive(Clone, Debug, Parser, strum::EnumString, Default)] +#[strum(serialize_all = "kebab-case")] enum Alphabet { + #[default] Standard, UrlSafe, } -impl Default for Alphabet { - fn default() -> Self { - Self::Standard - } -} - -impl FromStr for Alphabet { - type Err = String; - fn from_str(s: &str) -> Result<Self, String> { - match s { - "standard" => Ok(Self::Standard), - "urlsafe" => Ok(Self::UrlSafe), - _ => Err(format!("alphabet '{}' unrecognized", s)), - } - } -} - /// Base64 encode or decode FILE (or standard input), to standard output. -#[derive(Debug, StructOpt)] +#[derive(Debug, Parser)] struct Opt { - /// decode data - #[structopt(short = "d", long = "decode")] + /// Decode the base64-encoded input (default: encode the input as base64). + #[structopt(short = 'd', long = "decode")] decode: bool, - /// The alphabet to choose. Defaults to the standard base64 alphabet. - /// Supported alphabets include "standard" and "urlsafe". + + /// The encoding alphabet: "standard" (default) or "url-safe". #[structopt(long = "alphabet")] alphabet: Option<Alphabet>, - /// The file to encode/decode. - #[structopt(parse(from_os_str))] + + /// Omit padding characters while encoding, and reject them while decoding. + #[structopt(short = 'p', long = "no-padding")] + no_padding: bool, + + /// The file to encode or decode. + #[structopt(name = "FILE", parse(from_os_str))] file: Option<PathBuf>, } fn main() { - let opt = Opt::from_args(); + let opt = Opt::parse(); let stdin; let mut input: Box<dyn Read> = match opt.file { None => { @@ -66,7 +55,10 @@ fn main() { Alphabet::Standard => alphabet::STANDARD, Alphabet::UrlSafe => alphabet::URL_SAFE, }, - engine::general_purpose::PAD, + match opt.no_padding { + true => engine::general_purpose::NO_PAD, + false => engine::general_purpose::PAD, + }, ); let stdout = io::stdout(); diff --git a/patches/doc-string-fix.patch b/patches/doc-string-fix.patch deleted file mode 100644 index 15370bf..0000000 --- a/patches/doc-string-fix.patch +++ /dev/null @@ -1,41 +0,0 @@ -From 27fc4ecc69aab7b31e23aefbeed10b252b176d5a Mon Sep 17 00:00:00 2001 -From: Chris Wailes <chriswailes@google.com> -Date: Thu, 9 Feb 2023 23:15:26 -0800 -Subject: [PATCH] Fix an error in the documentation strings - -This file explicitly turns on this warning meaning that it can't be -overridden by command line lint arguments. The rustdoc from version -1.66.1 will fail without this patch. - -Test: m rustdoc -Bug: 263153841 -Change-Id: Idcf3779cbd46300691232302bba10c46143a2dbc ---- - src/decode.rs | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/src/decode.rs b/src/decode.rs -index e349240..3f627c7 100644 ---- a/src/decode.rs -+++ b/src/decode.rs -@@ -71,7 +71,7 @@ impl error::Error for DecodeError { - } - - ///Decode from string reference as octets. --///Returns a Result containing a Vec<u8>. -+///Returns a Result containing a `Vec<u8>`. - ///Convenience `decode_config(input, base64::STANDARD);`. - /// - ///# Example -@@ -90,7 +90,7 @@ pub fn decode<T: AsRef<[u8]>>(input: T) -> Result<Vec<u8>, DecodeError> { - } - - ///Decode from string reference as octets. --///Returns a Result containing a Vec<u8>. -+///Returns a Result containing a `Vec<u8>`. - /// - ///# Example - /// --- -2.39.1.581.gbfd45094c4-goog - diff --git a/src/alphabet.rs b/src/alphabet.rs index 7cd1b57..7895914 100644 --- a/src/alphabet.rs +++ b/src/alphabet.rs @@ -1,7 +1,7 @@ //! Provides [Alphabet] and constants for alphabets commonly used in the wild. use crate::PAD_BYTE; -use core::fmt; +use core::{convert, fmt}; #[cfg(any(feature = "std", test))] use std::error; @@ -12,6 +12,10 @@ const ALPHABET_SIZE: usize = 64; /// Common alphabets are provided as constants, and custom alphabets /// can be made via `from_str` or the `TryFrom<str>` implementation. /// +/// # Examples +/// +/// Building and using a custom Alphabet: +/// /// ``` /// let custom = base64::alphabet::Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/").unwrap(); /// @@ -19,6 +23,34 @@ const ALPHABET_SIZE: usize = 64; /// &custom, /// base64::engine::general_purpose::PAD); /// ``` +/// +/// Building a const: +/// +/// ``` +/// use base64::alphabet::Alphabet; +/// +/// static CUSTOM: Alphabet = { +/// // Result::unwrap() isn't const yet, but panic!() is OK +/// match Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/") { +/// Ok(x) => x, +/// Err(_) => panic!("creation of alphabet failed"), +/// } +/// }; +/// ``` +/// +/// Building lazily: +/// +/// ``` +/// use base64::{ +/// alphabet::Alphabet, +/// engine::{general_purpose::GeneralPurpose, GeneralPurposeConfig}, +/// }; +/// use once_cell::sync::Lazy; +/// +/// static CUSTOM: Lazy<Alphabet> = Lazy::new(|| +/// Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/").unwrap() +/// ); +/// ``` #[derive(Clone, Debug, Eq, PartialEq)] pub struct Alphabet { pub(crate) symbols: [u8; ALPHABET_SIZE], @@ -91,9 +123,14 @@ impl Alphabet { Ok(Self::from_str_unchecked(alphabet)) } + + /// Create a `&str` from the symbols in the `Alphabet` + pub fn as_str(&self) -> &str { + core::str::from_utf8(&self.symbols).unwrap() + } } -impl TryFrom<&str> for Alphabet { +impl convert::TryFrom<&str> for Alphabet { type Error = ParseAlphabetError; fn try_from(value: &str) -> Result<Self, Self::Error> { @@ -128,21 +165,21 @@ impl fmt::Display for ParseAlphabetError { #[cfg(any(feature = "std", test))] impl error::Error for ParseAlphabetError {} -/// The standard alphabet (uses `+` and `/`). +/// The standard alphabet (with `+` and `/`) specified in [RFC 4648][]. /// -/// See [RFC 3548](https://tools.ietf.org/html/rfc3548#section-3). +/// [RFC 4648]: https://datatracker.ietf.org/doc/html/rfc4648#section-4 pub const STANDARD: Alphabet = Alphabet::from_str_unchecked( "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/", ); -/// The URL safe alphabet (uses `-` and `_`). +/// The URL-safe alphabet (with `-` and `_`) specified in [RFC 4648][]. /// -/// See [RFC 3548](https://tools.ietf.org/html/rfc3548#section-4). +/// [RFC 4648]: https://datatracker.ietf.org/doc/html/rfc4648#section-5 pub const URL_SAFE: Alphabet = Alphabet::from_str_unchecked( "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_", ); -/// The `crypt(3)` alphabet (uses `.` and `/` as the first two values). +/// The `crypt(3)` alphabet (with `.` and `/` as the _first_ two characters). /// /// Not standardized, but folk wisdom on the net asserts that this alphabet is what crypt uses. pub const CRYPT: Alphabet = Alphabet::from_str_unchecked( @@ -154,7 +191,7 @@ pub const BCRYPT: Alphabet = Alphabet::from_str_unchecked( "./ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789", ); -/// The alphabet used in IMAP-modified UTF-7 (uses `+` and `,`). +/// The alphabet used in IMAP-modified UTF-7 (with `+` and `,`). /// /// See [RFC 3501](https://tools.ietf.org/html/rfc3501#section-5.1.3) pub const IMAP_MUTF7: Alphabet = Alphabet::from_str_unchecked( @@ -171,7 +208,7 @@ pub const BIN_HEX: Alphabet = Alphabet::from_str_unchecked( #[cfg(test)] mod tests { use crate::alphabet::*; - use std::convert::TryFrom as _; + use core::convert::TryFrom as _; #[test] fn detects_duplicate_start() { @@ -238,4 +275,11 @@ mod tests { .unwrap() ); } + + #[test] + fn str_same_as_input() { + let alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + let a = Alphabet::try_from(alphabet).unwrap(); + assert_eq!(alphabet, a.as_str()) + } } diff --git a/src/chunked_encoder.rs b/src/chunked_encoder.rs index 0457259..817b339 100644 --- a/src/chunked_encoder.rs +++ b/src/chunked_encoder.rs @@ -1,12 +1,12 @@ -#[cfg(any(feature = "alloc", feature = "std", test))] +use crate::{ + encode::add_padding, + engine::{Config, Engine}, +}; +#[cfg(any(feature = "alloc", test))] use alloc::string::String; -use core::cmp; -#[cfg(any(feature = "alloc", feature = "std", test))] +#[cfg(any(feature = "alloc", test))] use core::str; -use crate::encode::add_padding; -use crate::engine::{Config, Engine}; - /// The output mechanism for ChunkedEncoder's encoded bytes. pub trait Sink { type Error; @@ -15,85 +15,51 @@ pub trait Sink { fn write_encoded_bytes(&mut self, encoded: &[u8]) -> Result<(), Self::Error>; } -const BUF_SIZE: usize = 1024; - /// A base64 encoder that emits encoded bytes in chunks without heap allocation. pub struct ChunkedEncoder<'e, E: Engine + ?Sized> { engine: &'e E, - max_input_chunk_len: usize, } impl<'e, E: Engine + ?Sized> ChunkedEncoder<'e, E> { pub fn new(engine: &'e E) -> ChunkedEncoder<'e, E> { - ChunkedEncoder { - engine, - max_input_chunk_len: max_input_length(BUF_SIZE, engine.config().encode_padding()), - } + ChunkedEncoder { engine } } pub fn encode<S: Sink>(&self, bytes: &[u8], sink: &mut S) -> Result<(), S::Error> { - let mut encode_buf: [u8; BUF_SIZE] = [0; BUF_SIZE]; - let mut input_index = 0; - - while input_index < bytes.len() { - // either the full input chunk size, or it's the last iteration - let input_chunk_len = cmp::min(self.max_input_chunk_len, bytes.len() - input_index); - - let chunk = &bytes[input_index..(input_index + input_chunk_len)]; - - let mut b64_bytes_written = self.engine.internal_encode(chunk, &mut encode_buf); - - input_index += input_chunk_len; - let more_input_left = input_index < bytes.len(); - - if self.engine.config().encode_padding() && !more_input_left { - // no more input, add padding if needed. Buffer will have room because - // max_input_length leaves room for it. - b64_bytes_written += add_padding(bytes.len(), &mut encode_buf[b64_bytes_written..]); + const BUF_SIZE: usize = 1024; + const CHUNK_SIZE: usize = BUF_SIZE / 4 * 3; + + let mut buf = [0; BUF_SIZE]; + for chunk in bytes.chunks(CHUNK_SIZE) { + let mut len = self.engine.internal_encode(chunk, &mut buf); + if chunk.len() != CHUNK_SIZE && self.engine.config().encode_padding() { + // Final, potentially partial, chunk. + // Only need to consider if padding is needed on a partial chunk since full chunk + // is a multiple of 3, which therefore won't be padded. + // Pad output to multiple of four bytes if required by config. + len += add_padding(len, &mut buf[len..]); } - - sink.write_encoded_bytes(&encode_buf[0..b64_bytes_written])?; + sink.write_encoded_bytes(&buf[..len])?; } Ok(()) } } -/// Calculate the longest input that can be encoded for the given output buffer size. -/// -/// If the config requires padding, two bytes of buffer space will be set aside so that the last -/// chunk of input can be encoded safely. -/// -/// The input length will always be a multiple of 3 so that no encoding state has to be carried over -/// between chunks. -fn max_input_length(encoded_buf_len: usize, padded: bool) -> usize { - let effective_buf_len = if padded { - // make room for padding - encoded_buf_len - .checked_sub(2) - .expect("Don't use a tiny buffer") - } else { - encoded_buf_len - }; - - // No padding, so just normal base64 expansion. - (effective_buf_len / 4) * 3 -} - // A really simple sink that just appends to a string -#[cfg(any(feature = "alloc", feature = "std", test))] +#[cfg(any(feature = "alloc", test))] pub(crate) struct StringSink<'a> { string: &'a mut String, } -#[cfg(any(feature = "alloc", feature = "std", test))] +#[cfg(any(feature = "alloc", test))] impl<'a> StringSink<'a> { pub(crate) fn new(s: &mut String) -> StringSink { StringSink { string: s } } } -#[cfg(any(feature = "alloc", feature = "std", test))] +#[cfg(any(feature = "alloc", test))] impl<'a> Sink for StringSink<'a> { type Error = (); @@ -151,38 +117,13 @@ pub mod tests { chunked_encode_matches_normal_encode_random(&helper); } - #[test] - fn max_input_length_no_pad() { - assert_eq!(768, max_input_length(1024, false)); - } - - #[test] - fn max_input_length_with_pad_decrements_one_triple() { - assert_eq!(765, max_input_length(1024, true)); - } - - #[test] - fn max_input_length_with_pad_one_byte_short() { - assert_eq!(765, max_input_length(1025, true)); - } - - #[test] - fn max_input_length_with_pad_fits_exactly() { - assert_eq!(768, max_input_length(1026, true)); - } - - #[test] - fn max_input_length_cant_use_extra_single_encoded_byte() { - assert_eq!(300, max_input_length(401, false)); - } - pub fn chunked_encode_matches_normal_encode_random<S: SinkTestHelper>(sink_test_helper: &S) { let mut input_buf: Vec<u8> = Vec::new(); let mut output_buf = String::new(); let mut rng = rand::rngs::SmallRng::from_entropy(); let input_len_range = Uniform::new(1, 10_000); - for _ in 0..5_000 { + for _ in 0..20_000 { input_buf.clear(); output_buf.clear(); diff --git a/src/decode.rs b/src/decode.rs index 0471518..5230fd3 100644 --- a/src/decode.rs +++ b/src/decode.rs @@ -1,5 +1,5 @@ use crate::engine::{general_purpose::STANDARD, DecodeEstimate, Engine}; -#[cfg(any(feature = "alloc", feature = "std", test))] +#[cfg(any(feature = "alloc", test))] use alloc::vec::Vec; use core::fmt; #[cfg(any(feature = "std", test))] @@ -41,11 +41,7 @@ impl fmt::Display for DecodeError { } #[cfg(any(feature = "std", test))] -impl error::Error for DecodeError { - fn cause(&self) -> Option<&dyn error::Error> { - None - } -} +impl error::Error for DecodeError {} /// Errors that can occur while decoding into a slice. #[derive(Clone, Debug, PartialEq, Eq)] @@ -69,7 +65,7 @@ impl fmt::Display for DecodeSliceError { #[cfg(any(feature = "std", test))] impl error::Error for DecodeSliceError { - fn cause(&self) -> Option<&dyn error::Error> { + fn source(&self) -> Option<&(dyn error::Error + 'static)> { match self { DecodeSliceError::DecodeError(e) => Some(e), DecodeSliceError::OutputSliceTooSmall => None, @@ -87,7 +83,7 @@ impl From<DecodeError> for DecodeSliceError { /// /// See [Engine::decode]. #[deprecated(since = "0.21.0", note = "Use Engine::decode")] -#[cfg(any(feature = "alloc", feature = "std", test))] +#[cfg(any(feature = "alloc", test))] pub fn decode<T: AsRef<[u8]>>(input: T) -> Result<Vec<u8>, DecodeError> { STANDARD.decode(input) } @@ -97,7 +93,7 @@ pub fn decode<T: AsRef<[u8]>>(input: T) -> Result<Vec<u8>, DecodeError> { /// See [Engine::decode]. ///Returns a `Result` containing a `Vec<u8>`. #[deprecated(since = "0.21.0", note = "Use Engine::decode")] -#[cfg(any(feature = "alloc", feature = "std", test))] +#[cfg(any(feature = "alloc", test))] pub fn decode_engine<E: Engine, T: AsRef<[u8]>>( input: T, engine: &E, @@ -108,7 +104,7 @@ pub fn decode_engine<E: Engine, T: AsRef<[u8]>>( /// Decode from string reference as octets. /// /// See [Engine::decode_vec]. -#[cfg(any(feature = "alloc", feature = "std", test))] +#[cfg(any(feature = "alloc", test))] #[deprecated(since = "0.21.0", note = "Use Engine::decode_vec")] pub fn decode_engine_vec<E: Engine, T: AsRef<[u8]>>( input: T, @@ -148,11 +144,6 @@ pub fn decode_engine_slice<E: Engine, T: AsRef<[u8]>>( /// // start of the next quad of encoded symbols /// assert_eq!(6, decoded_len_estimate(5)); /// ``` -/// -/// # Panics -/// -/// Panics if decoded length estimation overflows. -/// This would happen for sizes within a few bytes of the maximum value of `usize`. pub fn decoded_len_estimate(encoded_len: usize) -> usize { STANDARD .internal_decoded_len_estimate(encoded_len) diff --git a/src/encode.rs b/src/encode.rs index cb17650..ae6d790 100644 --- a/src/encode.rs +++ b/src/encode.rs @@ -1,10 +1,10 @@ -#[cfg(any(feature = "alloc", feature = "std", test))] +#[cfg(any(feature = "alloc", test))] use alloc::string::String; use core::fmt; #[cfg(any(feature = "std", test))] use std::error; -#[cfg(any(feature = "alloc", feature = "std", test))] +#[cfg(any(feature = "alloc", test))] use crate::engine::general_purpose::STANDARD; use crate::engine::{Config, Engine}; use crate::PAD_BYTE; @@ -14,7 +14,7 @@ use crate::PAD_BYTE; /// See [Engine::encode]. #[allow(unused)] #[deprecated(since = "0.21.0", note = "Use Engine::encode")] -#[cfg(any(feature = "alloc", feature = "std", test))] +#[cfg(any(feature = "alloc", test))] pub fn encode<T: AsRef<[u8]>>(input: T) -> String { STANDARD.encode(input) } @@ -24,7 +24,7 @@ pub fn encode<T: AsRef<[u8]>>(input: T) -> String { /// See [Engine::encode]. #[allow(unused)] #[deprecated(since = "0.21.0", note = "Use Engine::encode")] -#[cfg(any(feature = "alloc", feature = "std", test))] +#[cfg(any(feature = "alloc", test))] pub fn encode_engine<E: Engine, T: AsRef<[u8]>>(input: T, engine: &E) -> String { engine.encode(input) } @@ -34,7 +34,7 @@ pub fn encode_engine<E: Engine, T: AsRef<[u8]>>(input: T, engine: &E) -> String /// See [Engine::encode_string]. #[allow(unused)] #[deprecated(since = "0.21.0", note = "Use Engine::encode_string")] -#[cfg(any(feature = "alloc", feature = "std", test))] +#[cfg(any(feature = "alloc", test))] pub fn encode_engine_string<E: Engine, T: AsRef<[u8]>>( input: T, output_buf: &mut String, @@ -77,7 +77,7 @@ pub(crate) fn encode_with_padding<E: Engine + ?Sized>( let b64_bytes_written = engine.internal_encode(input, output); let padding_bytes = if engine.config().encode_padding() { - add_padding(input.len(), &mut output[b64_bytes_written..]) + add_padding(b64_bytes_written, &mut output[b64_bytes_written..]) } else { 0 }; @@ -94,43 +94,51 @@ pub(crate) fn encode_with_padding<E: Engine + ?Sized>( /// /// Returns `None` if the encoded length can't be represented in `usize`. This will happen for /// input lengths in approximately the top quarter of the range of `usize`. -pub fn encoded_len(bytes_len: usize, padding: bool) -> Option<usize> { +pub const fn encoded_len(bytes_len: usize, padding: bool) -> Option<usize> { let rem = bytes_len % 3; let complete_input_chunks = bytes_len / 3; - let complete_chunk_output = complete_input_chunks.checked_mul(4); + // `?` is disallowed in const, and `let Some(_) = _ else` requires 1.65.0, whereas this + // messier syntax works on 1.48 + let complete_chunk_output = + if let Some(complete_chunk_output) = complete_input_chunks.checked_mul(4) { + complete_chunk_output + } else { + return None; + }; if rem > 0 { if padding { - complete_chunk_output.and_then(|c| c.checked_add(4)) + complete_chunk_output.checked_add(4) } else { let encoded_rem = match rem { 1 => 2, - 2 => 3, - _ => unreachable!("Impossible remainder"), + // only other possible remainder is 2 + // can't use a separate _ => unreachable!() in const fns in ancient rust versions + _ => 3, }; - complete_chunk_output.and_then(|c| c.checked_add(encoded_rem)) + complete_chunk_output.checked_add(encoded_rem) } } else { - complete_chunk_output + Some(complete_chunk_output) } } /// Write padding characters. -/// `input_len` is the size of the original, not encoded, input. +/// `unpadded_output_len` is the size of the unpadded but base64 encoded data. /// `output` is the slice where padding should be written, of length at least 2. /// /// Returns the number of padding bytes written. -pub(crate) fn add_padding(input_len: usize, output: &mut [u8]) -> usize { - // TODO base on encoded len to use cheaper mod by 4 (aka & 7) - let rem = input_len % 3; - let mut bytes_written = 0; - for _ in 0..((3 - rem) % 3) { - output[bytes_written] = PAD_BYTE; - bytes_written += 1; +pub(crate) fn add_padding(unpadded_output_len: usize, output: &mut [u8]) -> usize { + let pad_bytes = (4 - (unpadded_output_len % 4)) % 4; + // for just a couple bytes, this has better performance than using + // .fill(), or iterating over mutable refs, which call memset() + #[allow(clippy::needless_range_loop)] + for i in 0..pad_bytes { + output[i] = PAD_BYTE; } - bytes_written + pad_bytes } /// Errors that can occur while encoding into a slice. @@ -149,11 +157,7 @@ impl fmt::Display for EncodeSliceError { } #[cfg(any(feature = "std", test))] -impl error::Error for EncodeSliceError { - fn cause(&self) -> Option<&dyn error::Error> { - None - } -} +impl error::Error for EncodeSliceError {} #[cfg(test)] mod tests { @@ -434,18 +438,18 @@ mod tests { let mut rng = rand::rngs::SmallRng::from_entropy(); - // cover our bases for length % 3 - for input_len in 0..10 { + // cover our bases for length % 4 + for unpadded_output_len in 0..20 { output.clear(); // fill output with random - for _ in 0..10 { + for _ in 0..100 { output.push(rng.gen()); } let orig_output_buf = output.clone(); - let bytes_written = add_padding(input_len, &mut output); + let bytes_written = add_padding(unpadded_output_len, &mut output); // make sure the part beyond bytes_written is the same garbage it was before assert_eq!(orig_output_buf[bytes_written..], output[bytes_written..]); diff --git a/src/engine/general_purpose/decode.rs b/src/engine/general_purpose/decode.rs index e9fd788..21a386f 100644 --- a/src/engine/general_purpose/decode.rs +++ b/src/engine/general_purpose/decode.rs @@ -1,5 +1,5 @@ use crate::{ - engine::{general_purpose::INVALID_VALUE, DecodeEstimate, DecodePaddingMode}, + engine::{general_purpose::INVALID_VALUE, DecodeEstimate, DecodeMetadata, DecodePaddingMode}, DecodeError, PAD_BYTE, }; @@ -30,16 +30,11 @@ pub struct GeneralPurposeEstimate { impl GeneralPurposeEstimate { pub(crate) fn new(encoded_len: usize) -> Self { + // Formulas that won't overflow Self { - num_chunks: encoded_len - .checked_add(INPUT_CHUNK_LEN - 1) - .expect("Overflow when calculating number of chunks in input") - / INPUT_CHUNK_LEN, - decoded_len_estimate: encoded_len - .checked_add(3) - .expect("Overflow when calculating decoded len estimate") - / 4 - * 3, + num_chunks: encoded_len / INPUT_CHUNK_LEN + + (encoded_len % INPUT_CHUNK_LEN > 0) as usize, + decoded_len_estimate: (encoded_len / 4 + (encoded_len % 4 > 0) as usize) * 3, } } } @@ -51,7 +46,7 @@ impl DecodeEstimate for GeneralPurposeEstimate { } /// Helper to avoid duplicating num_chunks calculation, which is costly on short inputs. -/// Returns the number of bytes written, or an error. +/// Returns the decode metadata, or an error. // We're on the fragile edge of compiler heuristics here. If this is not inlined, slow. If this is // inlined(always), a different slow. plain ol' inline makes the benchmarks happiest at the moment, // but this is fragile and the best setting changes with only minor code modifications. @@ -63,7 +58,7 @@ pub(crate) fn decode_helper( decode_table: &[u8; 256], decode_allow_trailing_bits: bool, padding_mode: DecodePaddingMode, -) -> Result<usize, DecodeError> { +) -> Result<DecodeMetadata, DecodeError> { let remainder_len = input.len() % INPUT_CHUNK_LEN; // Because the fast decode loop writes in groups of 8 bytes (unrolled to @@ -345,4 +340,44 @@ mod tests { decode_chunk(&input[..], 0, &STANDARD.decode_table, &mut output).unwrap(); assert_eq!(&vec![b'f', b'o', b'o', b'b', b'a', b'r', 0, 0], &output); } + + #[test] + fn estimate_short_lengths() { + for (range, (num_chunks, decoded_len_estimate)) in [ + (0..=0, (0, 0)), + (1..=4, (1, 3)), + (5..=8, (1, 6)), + (9..=12, (2, 9)), + (13..=16, (2, 12)), + (17..=20, (3, 15)), + ] { + for encoded_len in range { + let estimate = GeneralPurposeEstimate::new(encoded_len); + assert_eq!(num_chunks, estimate.num_chunks); + assert_eq!(decoded_len_estimate, estimate.decoded_len_estimate); + } + } + } + + #[test] + fn estimate_via_u128_inflation() { + // cover both ends of usize + (0..1000) + .chain(usize::MAX - 1000..=usize::MAX) + .for_each(|encoded_len| { + // inflate to 128 bit type to be able to safely use the easy formulas + let len_128 = encoded_len as u128; + + let estimate = GeneralPurposeEstimate::new(encoded_len); + assert_eq!( + ((len_128 + (INPUT_CHUNK_LEN - 1) as u128) / (INPUT_CHUNK_LEN as u128)) + as usize, + estimate.num_chunks + ); + assert_eq!( + ((len_128 + 3) / 4 * 3) as usize, + estimate.decoded_len_estimate + ); + }) + } } diff --git a/src/engine/general_purpose/decode_suffix.rs b/src/engine/general_purpose/decode_suffix.rs index 5652035..e1e005d 100644 --- a/src/engine/general_purpose/decode_suffix.rs +++ b/src/engine/general_purpose/decode_suffix.rs @@ -1,13 +1,13 @@ use crate::{ - engine::{general_purpose::INVALID_VALUE, DecodePaddingMode}, + engine::{general_purpose::INVALID_VALUE, DecodeMetadata, DecodePaddingMode}, DecodeError, PAD_BYTE, }; /// Decode the last 1-8 bytes, checking for trailing set bits and padding per the provided /// parameters. /// -/// Returns the total number of bytes decoded, including the ones indicated as already written by -/// `output_index`. +/// Returns the decode metadata representing the total number of bytes decoded, including the ones +/// indicated as already written by `output_index`. pub(crate) fn decode_suffix( input: &[u8], input_index: usize, @@ -16,7 +16,7 @@ pub(crate) fn decode_suffix( decode_table: &[u8; 256], decode_allow_trailing_bits: bool, padding_mode: DecodePaddingMode, -) -> Result<usize, DecodeError> { +) -> Result<DecodeMetadata, DecodeError> { // Decode any leftovers that aren't a complete input block of 8 bytes. // Use a u64 as a stack-resident 8 byte buffer. let mut leftover_bits: u64 = 0; @@ -157,5 +157,12 @@ pub(crate) fn decode_suffix( leftover_bits_appended_to_buf += 8; } - Ok(output_index) + Ok(DecodeMetadata::new( + output_index, + if padding_bytes > 0 { + Some(input_index + first_padding_index) + } else { + None + }, + )) } diff --git a/src/engine/general_purpose/mod.rs b/src/engine/general_purpose/mod.rs index af8897b..e0227f3 100644 --- a/src/engine/general_purpose/mod.rs +++ b/src/engine/general_purpose/mod.rs @@ -2,13 +2,14 @@ use crate::{ alphabet, alphabet::Alphabet, - engine::{Config, DecodePaddingMode}, + engine::{Config, DecodeMetadata, DecodePaddingMode}, DecodeError, }; use core::convert::TryInto; mod decode; pub(crate) mod decode_suffix; + pub use decode::GeneralPurposeEstimate; pub(crate) const INVALID_VALUE: u8 = 255; @@ -18,6 +19,8 @@ pub(crate) const INVALID_VALUE: u8 = 255; /// - It uses no vector CPU instructions, so it will work on any system. /// - It is reasonably fast (~2-3GiB/s). /// - It is not constant-time, though, so it is vulnerable to timing side-channel attacks. For loading cryptographic keys, etc, it is suggested to use the forthcoming constant-time implementation. + +#[derive(Debug, Clone)] pub struct GeneralPurpose { encode_table: [u8; 64], decode_table: [u8; 256], @@ -170,7 +173,7 @@ impl super::Engine for GeneralPurpose { input: &[u8], output: &mut [u8], estimate: Self::DecodeEstimate, - ) -> Result<usize, DecodeError> { + ) -> Result<DecodeMetadata, DecodeError> { decode::decode_helper( input, estimate, diff --git a/src/engine/mod.rs b/src/engine/mod.rs index 12dfaa8..16c05d7 100644 --- a/src/engine/mod.rs +++ b/src/engine/mod.rs @@ -1,14 +1,14 @@ //! Provides the [Engine] abstraction and out of the box implementations. -#[cfg(any(feature = "alloc", feature = "std", test))] +#[cfg(any(feature = "alloc", test))] use crate::chunked_encoder; use crate::{ encode::{encode_with_padding, EncodeSliceError}, encoded_len, DecodeError, DecodeSliceError, }; -#[cfg(any(feature = "alloc", feature = "std", test))] +#[cfg(any(feature = "alloc", test))] use alloc::vec::Vec; -#[cfg(any(feature = "alloc", feature = "std", test))] +#[cfg(any(feature = "alloc", test))] use alloc::{string::String, vec}; pub mod general_purpose; @@ -62,10 +62,6 @@ pub trait Engine: Send + Sync { /// As an optimization to prevent the decoded length from being calculated twice, it is /// sometimes helpful to have a conservative estimate of the decoded size before doing the /// decoding, so this calculation is done separately and passed to [Engine::decode()] as needed. - /// - /// # Panics - /// - /// Panics if decoded length estimation overflows. #[doc(hidden)] fn internal_decoded_len_estimate(&self, input_len: usize) -> Self::DecodeEstimate; @@ -77,8 +73,6 @@ pub trait Engine: Send + Sync { /// `decode_estimate` is the result of [Engine::internal_decoded_len_estimate()], which is passed in to avoid /// calculating it again (expensive on short inputs).` /// - /// Returns the number of bytes written to `output`. - /// /// Each complete 4-byte chunk of encoded data decodes to 3 bytes of decoded data, but this /// function must also handle the final possibly partial chunk. /// If the input length is not a multiple of 4, or uses padding bytes to reach a multiple of 4, @@ -99,7 +93,7 @@ pub trait Engine: Send + Sync { input: &[u8], output: &mut [u8], decode_estimate: Self::DecodeEstimate, - ) -> Result<usize, DecodeError>; + ) -> Result<DecodeMetadata, DecodeError>; /// Returns the config for this engine. fn config(&self) -> &Self::Config; @@ -119,15 +113,24 @@ pub trait Engine: Send + Sync { /// engine::GeneralPurpose::new(&alphabet::URL_SAFE, general_purpose::NO_PAD); /// /// let b64_url = CUSTOM_ENGINE.encode(b"hello internet~"); - #[cfg(any(feature = "alloc", feature = "std", test))] + #[cfg(any(feature = "alloc", test))] + #[inline] fn encode<T: AsRef<[u8]>>(&self, input: T) -> String { - let encoded_size = encoded_len(input.as_ref().len(), self.config().encode_padding()) - .expect("integer overflow when calculating buffer size"); - let mut buf = vec![0; encoded_size]; + fn inner<E>(engine: &E, input_bytes: &[u8]) -> String + where + E: Engine + ?Sized, + { + let encoded_size = encoded_len(input_bytes.len(), engine.config().encode_padding()) + .expect("integer overflow when calculating buffer size"); + + let mut buf = vec![0; encoded_size]; - encode_with_padding(input.as_ref(), &mut buf[..], self, encoded_size); + encode_with_padding(input_bytes, &mut buf[..], engine, encoded_size); - String::from_utf8(buf).expect("Invalid UTF8") + String::from_utf8(buf).expect("Invalid UTF8") + } + + inner(self, input.as_ref()) } /// Encode arbitrary octets as base64 into a supplied `String`. @@ -150,17 +153,21 @@ pub trait Engine: Send + Sync { /// println!("{}", buf); /// } /// ``` - #[cfg(any(feature = "alloc", feature = "std", test))] + #[cfg(any(feature = "alloc", test))] + #[inline] fn encode_string<T: AsRef<[u8]>>(&self, input: T, output_buf: &mut String) { - let input_bytes = input.as_ref(); - + fn inner<E>(engine: &E, input_bytes: &[u8], output_buf: &mut String) + where + E: Engine + ?Sized, { let mut sink = chunked_encoder::StringSink::new(output_buf); - chunked_encoder::ChunkedEncoder::new(self) + chunked_encoder::ChunkedEncoder::new(engine) .encode(input_bytes, &mut sink) .expect("Writing to a String shouldn't fail"); } + + inner(self, input.as_ref(), output_buf) } /// Encode arbitrary octets as base64 into a supplied slice. @@ -171,7 +178,8 @@ pub trait Engine: Send + Sync { /// /// # Example /// - /// ```rust + #[cfg_attr(feature = "alloc", doc = "```")] + #[cfg_attr(not(feature = "alloc"), doc = "```ignore")] /// use base64::{Engine as _, engine::general_purpose}; /// let s = b"hello internet!"; /// let mut buf = Vec::new(); @@ -185,29 +193,38 @@ pub trait Engine: Send + Sync { /// /// assert_eq!(s, general_purpose::STANDARD.decode(&buf).unwrap().as_slice()); /// ``` + #[inline] fn encode_slice<T: AsRef<[u8]>>( &self, input: T, output_buf: &mut [u8], ) -> Result<usize, EncodeSliceError> { - let input_bytes = input.as_ref(); + fn inner<E>( + engine: &E, + input_bytes: &[u8], + output_buf: &mut [u8], + ) -> Result<usize, EncodeSliceError> + where + E: Engine + ?Sized, + { + let encoded_size = encoded_len(input_bytes.len(), engine.config().encode_padding()) + .expect("usize overflow when calculating buffer size"); - let encoded_size = encoded_len(input_bytes.len(), self.config().encode_padding()) - .expect("usize overflow when calculating buffer size"); + if output_buf.len() < encoded_size { + return Err(EncodeSliceError::OutputSliceTooSmall); + } - if output_buf.len() < encoded_size { - return Err(EncodeSliceError::OutputSliceTooSmall); - } + let b64_output = &mut output_buf[0..encoded_size]; - let b64_output = &mut output_buf[0..encoded_size]; + encode_with_padding(input_bytes, b64_output, engine, encoded_size); - encode_with_padding(input_bytes, b64_output, self, encoded_size); + Ok(encoded_size) + } - Ok(encoded_size) + inner(self, input.as_ref(), output_buf) } - /// Decode from string reference as octets using the specified [Engine]. - /// Returns a `Result` containing a `Vec<u8>`. + /// Decode the input into a new `Vec`. /// /// # Example /// @@ -225,25 +242,30 @@ pub trait Engine: Send + Sync { /// .decode("aGVsbG8gaW50ZXJuZXR-Cg").unwrap(); /// println!("{:?}", bytes_url); /// ``` - /// - /// # Panics - /// - /// Panics if decoded length estimation overflows. - /// This would happen for sizes within a few bytes of the maximum value of `usize`. - #[cfg(any(feature = "alloc", feature = "std", test))] + #[cfg(any(feature = "alloc", test))] + #[inline] fn decode<T: AsRef<[u8]>>(&self, input: T) -> Result<Vec<u8>, DecodeError> { - let input_bytes = input.as_ref(); + fn inner<E>(engine: &E, input_bytes: &[u8]) -> Result<Vec<u8>, DecodeError> + where + E: Engine + ?Sized, + { + let estimate = engine.internal_decoded_len_estimate(input_bytes.len()); + let mut buffer = vec![0; estimate.decoded_len_estimate()]; - let estimate = self.internal_decoded_len_estimate(input_bytes.len()); - let mut buffer = vec![0; estimate.decoded_len_estimate()]; + let bytes_written = engine + .internal_decode(input_bytes, &mut buffer, estimate)? + .decoded_len; - let bytes_written = self.internal_decode(input_bytes, &mut buffer, estimate)?; - buffer.truncate(bytes_written); + buffer.truncate(bytes_written); - Ok(buffer) + Ok(buffer) + } + + inner(self, input.as_ref()) } - /// Decode from string reference as octets. + /// Decode the `input` into the supplied `buffer`. + /// /// Writes into the supplied `Vec`, which may allocate if its internal buffer isn't big enough. /// Returns a `Result` containing an empty tuple, aka `()`. /// @@ -272,39 +294,45 @@ pub trait Engine: Send + Sync { /// println!("{:?}", buffer); /// } /// ``` - /// - /// # Panics - /// - /// Panics if decoded length estimation overflows. - /// This would happen for sizes within a few bytes of the maximum value of `usize`. - #[cfg(any(feature = "alloc", feature = "std", test))] + #[cfg(any(feature = "alloc", test))] + #[inline] fn decode_vec<T: AsRef<[u8]>>( &self, input: T, buffer: &mut Vec<u8>, ) -> Result<(), DecodeError> { - let input_bytes = input.as_ref(); + fn inner<E>(engine: &E, input_bytes: &[u8], buffer: &mut Vec<u8>) -> Result<(), DecodeError> + where + E: Engine + ?Sized, + { + let starting_output_len = buffer.len(); + let estimate = engine.internal_decoded_len_estimate(input_bytes.len()); + + let total_len_estimate = estimate + .decoded_len_estimate() + .checked_add(starting_output_len) + .expect("Overflow when calculating output buffer length"); - let starting_output_len = buffer.len(); + buffer.resize(total_len_estimate, 0); - let estimate = self.internal_decoded_len_estimate(input_bytes.len()); - let total_len_estimate = estimate - .decoded_len_estimate() - .checked_add(starting_output_len) - .expect("Overflow when calculating output buffer length"); - buffer.resize(total_len_estimate, 0); + let buffer_slice = &mut buffer.as_mut_slice()[starting_output_len..]; - let buffer_slice = &mut buffer.as_mut_slice()[starting_output_len..]; - let bytes_written = self.internal_decode(input_bytes, buffer_slice, estimate)?; + let bytes_written = engine + .internal_decode(input_bytes, buffer_slice, estimate)? + .decoded_len; - buffer.truncate(starting_output_len + bytes_written); + buffer.truncate(starting_output_len + bytes_written); - Ok(()) + Ok(()) + } + + inner(self, input.as_ref(), buffer) } /// Decode the input into the provided output slice. /// - /// Returns an error if `output` is smaller than the estimated decoded length. + /// Returns the number of bytes written to the slice, or an error if `output` is smaller than + /// the estimated decoded length. /// /// This will not write any bytes past exactly what is decoded (no stray garbage bytes at the end). /// @@ -312,29 +340,39 @@ pub trait Engine: Send + Sync { /// /// See [Engine::decode_slice_unchecked] for a version that panics instead of returning an error /// if the output buffer is too small. - /// - /// # Panics - /// - /// Panics if decoded length estimation overflows. - /// This would happen for sizes within a few bytes of the maximum value of `usize`. + #[inline] fn decode_slice<T: AsRef<[u8]>>( &self, input: T, output: &mut [u8], ) -> Result<usize, DecodeSliceError> { - let input_bytes = input.as_ref(); + fn inner<E>( + engine: &E, + input_bytes: &[u8], + output: &mut [u8], + ) -> Result<usize, DecodeSliceError> + where + E: Engine + ?Sized, + { + let estimate = engine.internal_decoded_len_estimate(input_bytes.len()); - let estimate = self.internal_decoded_len_estimate(input_bytes.len()); - if output.len() < estimate.decoded_len_estimate() { - return Err(DecodeSliceError::OutputSliceTooSmall); + if output.len() < estimate.decoded_len_estimate() { + return Err(DecodeSliceError::OutputSliceTooSmall); + } + + engine + .internal_decode(input_bytes, output, estimate) + .map_err(|e| e.into()) + .map(|dm| dm.decoded_len) } - self.internal_decode(input_bytes, output, estimate) - .map_err(|e| e.into()) + inner(self, input.as_ref(), output) } /// Decode the input into the provided output slice. /// + /// Returns the number of bytes written to the slice. + /// /// This will not write any bytes past exactly what is decoded (no stray garbage bytes at the end). /// /// See [crate::decoded_len_estimate] for calculating buffer sizes. @@ -344,22 +382,27 @@ pub trait Engine: Send + Sync { /// /// # Panics /// - /// Panics if decoded length estimation overflows. - /// This would happen for sizes within a few bytes of the maximum value of `usize`. - /// /// Panics if the provided output buffer is too small for the decoded data. + #[inline] fn decode_slice_unchecked<T: AsRef<[u8]>>( &self, input: T, output: &mut [u8], ) -> Result<usize, DecodeError> { - let input_bytes = input.as_ref(); + fn inner<E>(engine: &E, input_bytes: &[u8], output: &mut [u8]) -> Result<usize, DecodeError> + where + E: Engine + ?Sized, + { + engine + .internal_decode( + input_bytes, + output, + engine.internal_decoded_len_estimate(input_bytes.len()), + ) + .map(|dm| dm.decoded_len) + } - self.internal_decode( - input_bytes, - output, - self.internal_decoded_len_estimate(input_bytes.len()), - ) + inner(self, input.as_ref(), output) } } @@ -387,11 +430,6 @@ pub trait DecodeEstimate { /// /// The estimate must be no larger than the next largest complete triple of decoded bytes. /// That is, the final quad of tokens to decode may be assumed to be complete with no padding. - /// - /// # Panics - /// - /// Panics if decoded length estimation overflows. - /// This would happen for sizes within a few bytes of the maximum value of `usize`. fn decoded_len_estimate(&self) -> usize; } @@ -408,3 +446,21 @@ pub enum DecodePaddingMode { /// Padding must be absent -- for when you want predictable padding, without any wasted bytes. RequireNone, } + +/// Metadata about the result of a decode operation +#[derive(PartialEq, Eq, Debug)] +pub struct DecodeMetadata { + /// Number of decoded bytes output + pub(crate) decoded_len: usize, + /// Offset of the first padding byte in the input, if any + pub(crate) padding_offset: Option<usize>, +} + +impl DecodeMetadata { + pub(crate) fn new(decoded_bytes: usize, padding_index: Option<usize>) -> Self { + Self { + decoded_len: decoded_bytes, + padding_offset: padding_index, + } + } +} diff --git a/src/engine/naive.rs b/src/engine/naive.rs index 6665c5e..6a50cbe 100644 --- a/src/engine/naive.rs +++ b/src/engine/naive.rs @@ -2,12 +2,11 @@ use crate::{ alphabet::Alphabet, engine::{ general_purpose::{self, decode_table, encode_table}, - Config, DecodeEstimate, DecodePaddingMode, Engine, + Config, DecodeEstimate, DecodeMetadata, DecodePaddingMode, Engine, }, DecodeError, PAD_BYTE, }; -use alloc::ops::BitOr; -use std::ops::{BitAnd, Shl, Shr}; +use std::ops::{BitAnd, BitOr, Shl, Shr}; /// Comparatively simple implementation that can be used as something to compare against in tests pub struct Naive { @@ -112,7 +111,7 @@ impl Engine for Naive { input: &[u8], output: &mut [u8], estimate: Self::DecodeEstimate, - ) -> Result<usize, DecodeError> { + ) -> Result<DecodeMetadata, DecodeError> { if estimate.rem == 1 { // trailing whitespace is so common that it's worth it to check the last byte to // possibly return a better error message diff --git a/src/engine/tests.rs b/src/engine/tests.rs index 906bba0..b048005 100644 --- a/src/engine/tests.rs +++ b/src/engine/tests.rs @@ -8,13 +8,16 @@ use rand::{ }; use rstest::rstest; use rstest_reuse::{apply, template}; -use std::{collections, fmt}; +use std::{collections, fmt, io::Read as _}; use crate::{ alphabet::{Alphabet, STANDARD}, encode::add_padding, encoded_len, - engine::{general_purpose, naive, Config, DecodeEstimate, DecodePaddingMode, Engine}, + engine::{ + general_purpose, naive, Config, DecodeEstimate, DecodeMetadata, DecodePaddingMode, Engine, + }, + read::DecoderReader, tests::{assert_encode_sanity, random_alphabet, random_config}, DecodeError, PAD_BYTE, }; @@ -24,9 +27,20 @@ use crate::{ #[rstest(engine_wrapper, case::general_purpose(GeneralPurposeWrapper {}), case::naive(NaiveWrapper {}), +case::decoder_reader(DecoderReaderEngineWrapper {}), )] fn all_engines<E: EngineWrapper>(engine_wrapper: E) {} +/// Some decode tests don't make sense for use with `DecoderReader` as they are difficult to +/// reason about or otherwise inapplicable given how DecoderReader slice up its input along +/// chunk boundaries. +#[template] +#[rstest(engine_wrapper, +case::general_purpose(GeneralPurposeWrapper {}), +case::naive(NaiveWrapper {}), +)] +fn all_engines_except_decoder_reader<E: EngineWrapper>(engine_wrapper: E) {} + #[apply(all_engines)] fn rfc_test_vectors_std_alphabet<E: EngineWrapper>(engine_wrapper: E) { let data = vec![ @@ -86,7 +100,7 @@ fn rfc_test_vectors_std_alphabet<E: EngineWrapper>(engine_wrapper: E) { &encoded_without_padding, &std::str::from_utf8(&encode_buf[0..encode_len]).unwrap() ); - let pad_len = add_padding(orig.len(), &mut encode_buf[encode_len..]); + let pad_len = add_padding(encode_len, &mut encode_buf[encode_len..]); assert_eq!(encoded.as_bytes(), &encode_buf[..encode_len + pad_len]); let decode_len = engine @@ -195,7 +209,10 @@ fn encode_doesnt_write_extra_bytes<E: EngineWrapper>(engine_wrapper: E) { // pad so we can decode it in case our random engine requires padding let pad_len = if padded { - add_padding(orig_len, &mut encode_buf[prefix_len + encoded_len_no_pad..]) + add_padding( + encoded_len_no_pad, + &mut encode_buf[prefix_len + encoded_len_no_pad..], + ) } else { 0 }; @@ -382,7 +399,7 @@ fn decode_detect_invalid_last_symbol_every_possible_two_symbols<E: EngineWrapper for b in 0_u8..=255 { let mut b64 = vec![0_u8; 4]; assert_eq!(2, engine.internal_encode(&[b], &mut b64[..])); - let _ = add_padding(1, &mut b64[2..]); + let _ = add_padding(2, &mut b64[2..]); assert!(base64_to_bytes.insert(b64, vec![b]).is_none()); } @@ -442,7 +459,7 @@ fn decode_detect_invalid_last_symbol_every_possible_three_symbols<E: EngineWrapp bytes[1] = b2; let mut b64 = vec![0_u8; 4]; assert_eq!(3, engine.internal_encode(&bytes, &mut b64[..])); - let _ = add_padding(2, &mut b64[3..]); + let _ = add_padding(3, &mut b64[3..]); let mut v = Vec::with_capacity(2); v.extend_from_slice(&bytes[..]); @@ -549,7 +566,7 @@ fn decode_invalid_byte_error<E: EngineWrapper>(engine_wrapper: E) { let len_range = distributions::Uniform::new(1, 1_000); - for _ in 0..10_000 { + for _ in 0..100_000 { let alphabet = random_alphabet(&mut rng); let engine = E::random_alphabet(&mut rng, alphabet); @@ -573,7 +590,7 @@ fn decode_invalid_byte_error<E: EngineWrapper>(engine_wrapper: E) { let invalid_byte: u8 = loop { let byte: u8 = rng.gen(); - if alphabet.symbols.contains(&byte) { + if alphabet.symbols.contains(&byte) || byte == PAD_BYTE { continue; } else { break byte; @@ -597,14 +614,16 @@ fn decode_invalid_byte_error<E: EngineWrapper>(engine_wrapper: E) { /// Any amount of padding anywhere before the final non padding character = invalid byte at first /// pad byte. /// From this, we know padding must extend to the end of the input. -#[apply(all_engines)] +// DecoderReader pseudo-engine detects InvalidLastSymbol instead of InvalidLength because it +// can end a decode on the quad that happens to contain the start of the padding +#[apply(all_engines_except_decoder_reader)] fn decode_padding_before_final_non_padding_char_error_invalid_byte<E: EngineWrapper>( engine_wrapper: E, ) { let mut rng = seeded_rng(); // the different amounts of proper padding, w/ offset from end for the last non-padding char - let suffixes = vec![("/w==", 2), ("iYu=", 1), ("zzzz", 0)]; + let suffixes = [("/w==", 2), ("iYu=", 1), ("zzzz", 0)]; let prefix_quads_range = distributions::Uniform::from(0..=256); @@ -641,10 +660,13 @@ fn decode_padding_before_final_non_padding_char_error_invalid_byte<E: EngineWrap } } -/// Any amount of padding before final chunk that crosses over into final chunk with 1-4 bytes = -/// invalid byte at first pad byte (except for 1 byte suffix = invalid length). -/// From this we know the padding must start in the final chunk. -#[apply(all_engines)] +/// Any amount of padding before final chunk that crosses over into final chunk with 2-4 bytes = +/// invalid byte at first pad byte. +/// From this and [decode_padding_starts_before_final_chunk_error_invalid_length] we know the +/// padding must start in the final chunk. +// DecoderReader pseudo-engine detects InvalidLastSymbol instead of InvalidLength because it +// can end a decode on the quad that happens to contain the start of the padding +#[apply(all_engines_except_decoder_reader)] fn decode_padding_starts_before_final_chunk_error_invalid_byte<E: EngineWrapper>( engine_wrapper: E, ) { @@ -652,8 +674,8 @@ fn decode_padding_starts_before_final_chunk_error_invalid_byte<E: EngineWrapper> // must have at least one prefix quad let prefix_quads_range = distributions::Uniform::from(1..256); - // including 1 just to make sure that it really does produce invalid length - let suffix_pad_len_range = distributions::Uniform::from(1..=4); + // excluding 1 since we don't care about invalid length in this test + let suffix_pad_len_range = distributions::Uniform::from(2..=4); for mode in all_pad_modes() { // we don't encode so we don't care about encode padding let engine = E::standard_with_pad_mode(true, mode); @@ -671,14 +693,48 @@ fn decode_padding_starts_before_final_chunk_error_invalid_byte<E: EngineWrapper> let padding_start = encoded.len() - padding_len; encoded[padding_start..].fill(PAD_BYTE); - if suffix_len == 1 { - assert_eq!(Err(DecodeError::InvalidLength), engine.decode(&encoded),); - } else { - assert_eq!( - Err(DecodeError::InvalidByte(padding_start, PAD_BYTE)), - engine.decode(&encoded), - ); - } + assert_eq!( + Err(DecodeError::InvalidByte(padding_start, PAD_BYTE)), + engine.decode(&encoded), + "suffix_len: {}, padding_len: {}, b64: {}", + suffix_len, + padding_len, + std::str::from_utf8(&encoded).unwrap() + ); + } + } +} + +/// Any amount of padding before final chunk that crosses over into final chunk with 1 byte = +/// invalid length. +/// From this we know the padding must start in the final chunk. +// DecoderReader pseudo-engine detects InvalidByte instead of InvalidLength because it starts by +// decoding only the available complete quads +#[apply(all_engines_except_decoder_reader)] +fn decode_padding_starts_before_final_chunk_error_invalid_length<E: EngineWrapper>( + engine_wrapper: E, +) { + let mut rng = seeded_rng(); + + // must have at least one prefix quad + let prefix_quads_range = distributions::Uniform::from(1..256); + for mode in all_pad_modes() { + // we don't encode so we don't care about encode padding + let engine = E::standard_with_pad_mode(true, mode); + for _ in 0..100_000 { + let mut encoded = "ABCD" + .repeat(prefix_quads_range.sample(&mut rng)) + .into_bytes(); + encoded.resize(encoded.len() + 1, PAD_BYTE); + + // amount of padding must be long enough to extend back from suffix into previous + // quads + let padding_len = rng.gen_range(1 + 1..encoded.len()); + // no non-padding after padding in this test, so padding goes to the end + let padding_start = encoded.len() - padding_len; + encoded[padding_start..].fill(PAD_BYTE); + + assert_eq!(Err(DecodeError::InvalidLength), engine.decode(&encoded),); } } } @@ -787,7 +843,9 @@ fn decode_malleability_test_case_2_byte_suffix_no_padding<E: EngineWrapper>(engi } // https://eprint.iacr.org/2022/361.pdf table 2, test 7 -#[apply(all_engines)] +// DecoderReader pseudo-engine gets InvalidByte at 8 (extra padding) since it decodes the first +// two complete quads correctly. +#[apply(all_engines_except_decoder_reader)] fn decode_malleability_test_case_2_byte_suffix_too_much_padding<E: EngineWrapper>( engine_wrapper: E, ) { @@ -811,7 +869,7 @@ fn decode_pad_mode_requires_canonical_accepts_canonical<E: EngineWrapper>(engine fn decode_pad_mode_requires_canonical_rejects_non_canonical<E: EngineWrapper>(engine_wrapper: E) { let engine = E::standard_with_pad_mode(true, DecodePaddingMode::RequireCanonical); - let suffixes = vec!["/w", "/w=", "iYU"]; + let suffixes = ["/w", "/w=", "iYU"]; for num_prefix_quads in 0..256 { for &suffix in suffixes.iter() { let mut encoded = "AAAA".repeat(num_prefix_quads); @@ -838,7 +896,7 @@ fn decode_pad_mode_requires_no_padding_accepts_no_padding<E: EngineWrapper>(engi fn decode_pad_mode_requires_no_padding_rejects_any_padding<E: EngineWrapper>(engine_wrapper: E) { let engine = E::standard_with_pad_mode(true, DecodePaddingMode::RequireNone); - let suffixes = vec!["/w=", "/w==", "iYU="]; + let suffixes = ["/w=", "/w==", "iYU="]; for num_prefix_quads in 0..256 { for &suffix in suffixes.iter() { let mut encoded = "AAAA".repeat(num_prefix_quads); @@ -861,7 +919,11 @@ fn decode_pad_mode_indifferent_padding_accepts_anything<E: EngineWrapper>(engine } //this is a MAY in the rfc: https://tools.ietf.org/html/rfc4648#section-3.3 -#[apply(all_engines)] +// DecoderReader pseudo-engine finds the first padding, but doesn't report it as an error, +// because in the next decode it finds more padding, which is reported as InvalidByte, just +// with an offset at its position in the second decode, rather than being linked to the start +// of the padding that was first seen in the previous decode. +#[apply(all_engines_except_decoder_reader)] fn decode_pad_byte_in_penultimate_quad_error<E: EngineWrapper>(engine_wrapper: E) { for mode in all_pad_modes() { // we don't encode so we don't care about encode padding @@ -895,7 +957,7 @@ fn decode_pad_byte_in_penultimate_quad_error<E: EngineWrapper>(engine_wrapper: E num_prefix_quads * 4 + num_valid_bytes_penultimate_quad, b'=', ), - engine.decode(&s).unwrap_err() + engine.decode(&s).unwrap_err(), ); } } @@ -955,7 +1017,9 @@ fn decode_absurd_pad_error<E: EngineWrapper>(engine_wrapper: E) { } } -#[apply(all_engines)] +// DecoderReader pseudo-engine detects InvalidByte instead of InvalidLength because it starts by +// decoding only the available complete quads +#[apply(all_engines_except_decoder_reader)] fn decode_too_much_padding_returns_error<E: EngineWrapper>(engine_wrapper: E) { for mode in all_pad_modes() { // we don't encode so we don't care about encode padding @@ -981,7 +1045,9 @@ fn decode_too_much_padding_returns_error<E: EngineWrapper>(engine_wrapper: E) { } } -#[apply(all_engines)] +// DecoderReader pseudo-engine detects InvalidByte instead of InvalidLength because it starts by +// decoding only the available complete quads +#[apply(all_engines_except_decoder_reader)] fn decode_padding_followed_by_non_padding_returns_error<E: EngineWrapper>(engine_wrapper: E) { for mode in all_pad_modes() { // we don't encode so we don't care about encode padding @@ -1079,27 +1145,43 @@ fn decode_too_few_symbols_in_final_quad_error<E: EngineWrapper>(engine_wrapper: } } -#[apply(all_engines)] +// DecoderReader pseudo-engine can't handle DecodePaddingMode::RequireNone since it will decode +// a complete quad with padding in it before encountering the stray byte that makes it an invalid +// length +#[apply(all_engines_except_decoder_reader)] fn decode_invalid_trailing_bytes<E: EngineWrapper>(engine_wrapper: E) { for mode in all_pad_modes() { - // we don't encode so we don't care about encode padding - let engine = E::standard_with_pad_mode(true, mode); + do_invalid_trailing_byte(E::standard_with_pad_mode(true, mode), mode); + } +} - for num_prefix_quads in 0..256 { - let mut s: String = "ABCD".repeat(num_prefix_quads); - s.push_str("Cg==\n"); +#[apply(all_engines)] +fn decode_invalid_trailing_bytes_all_modes<E: EngineWrapper>(engine_wrapper: E) { + // excluding no padding mode because the DecoderWrapper pseudo-engine will fail with + // InvalidPadding because it will decode the last complete quad with padding first + for mode in pad_modes_allowing_padding() { + do_invalid_trailing_byte(E::standard_with_pad_mode(true, mode), mode); + } +} - // The case of trailing newlines is common enough to warrant a test for a good error - // message. - assert_eq!( - Err(DecodeError::InvalidByte(num_prefix_quads * 4 + 4, b'\n')), - engine.decode(&s) - ); +#[apply(all_engines)] +fn decode_invalid_trailing_padding_as_invalid_length<E: EngineWrapper>(engine_wrapper: E) { + // excluding no padding mode because the DecoderWrapper pseudo-engine will fail with + // InvalidPadding because it will decode the last complete quad with padding first + for mode in pad_modes_allowing_padding() { + do_invalid_trailing_padding_as_invalid_length(E::standard_with_pad_mode(true, mode), mode); + } +} - // extra padding, however, is still InvalidLength - let s = s.replace('\n', "="); - assert_eq!(Err(DecodeError::InvalidLength), engine.decode(s)); - } +// DecoderReader pseudo-engine can't handle DecodePaddingMode::RequireNone since it will decode +// a complete quad with padding in it before encountering the stray byte that makes it an invalid +// length +#[apply(all_engines_except_decoder_reader)] +fn decode_invalid_trailing_padding_as_invalid_length_all_modes<E: EngineWrapper>( + engine_wrapper: E, +) { + for mode in all_pad_modes() { + do_invalid_trailing_padding_as_invalid_length(E::standard_with_pad_mode(true, mode), mode); } } @@ -1178,6 +1260,53 @@ fn decode_into_slice_fits_in_precisely_sized_slice<E: EngineWrapper>(engine_wrap } #[apply(all_engines)] +fn inner_decode_reports_padding_position<E: EngineWrapper>(engine_wrapper: E) { + let mut b64 = String::new(); + let mut decoded = Vec::new(); + let engine = E::standard(); + + for pad_position in 1..10_000 { + b64.clear(); + decoded.clear(); + // plenty of room for original data + decoded.resize(pad_position, 0); + + for _ in 0..pad_position { + b64.push('A'); + } + // finish the quad with padding + for _ in 0..(4 - (pad_position % 4)) { + b64.push('='); + } + + let decode_res = engine.internal_decode( + b64.as_bytes(), + &mut decoded[..], + engine.internal_decoded_len_estimate(b64.len()), + ); + if pad_position % 4 < 2 { + // impossible padding + assert_eq!( + Err(DecodeError::InvalidByte(pad_position, PAD_BYTE)), + decode_res + ); + } else { + let decoded_bytes = pad_position / 4 * 3 + + match pad_position % 4 { + 0 => 0, + 2 => 1, + 3 => 2, + _ => unreachable!(), + }; + assert_eq!( + Ok(DecodeMetadata::new(decoded_bytes, Some(pad_position))), + decode_res + ); + } + } +} + +#[apply(all_engines)] fn decode_length_estimate_delta<E: EngineWrapper>(engine_wrapper: E) { for engine in [E::standard(), E::standard_unpadded()] { for &padding in &[true, false] { @@ -1200,6 +1329,64 @@ fn decode_length_estimate_delta<E: EngineWrapper>(engine_wrapper: E) { } } +#[apply(all_engines)] +fn estimate_via_u128_inflation<E: EngineWrapper>(engine_wrapper: E) { + // cover both ends of usize + (0..1000) + .chain(usize::MAX - 1000..=usize::MAX) + .for_each(|encoded_len| { + // inflate to 128 bit type to be able to safely use the easy formulas + let len_128 = encoded_len as u128; + + let estimate = E::standard() + .internal_decoded_len_estimate(encoded_len) + .decoded_len_estimate(); + + // This check is a little too strict: it requires using the (len + 3) / 4 * 3 formula + // or equivalent, but until other engines come along that use a different formula + // requiring that we think more carefully about what the allowable criteria are, this + // will do. + assert_eq!( + ((len_128 + 3) / 4 * 3) as usize, + estimate, + "enc len {}", + encoded_len + ); + }) +} + +fn do_invalid_trailing_byte(engine: impl Engine, mode: DecodePaddingMode) { + for num_prefix_quads in 0..256 { + let mut s: String = "ABCD".repeat(num_prefix_quads); + s.push_str("Cg==\n"); + + // The case of trailing newlines is common enough to warrant a test for a good error + // message. + assert_eq!( + Err(DecodeError::InvalidByte(num_prefix_quads * 4 + 4, b'\n')), + engine.decode(&s), + "mode: {:?}, input: {}", + mode, + s + ); + } +} + +fn do_invalid_trailing_padding_as_invalid_length(engine: impl Engine, mode: DecodePaddingMode) { + for num_prefix_quads in 0..256 { + let mut s: String = "ABCD".repeat(num_prefix_quads); + s.push_str("Cg==="); + + assert_eq!( + Err(DecodeError::InvalidLength), + engine.decode(&s), + "mode: {:?}, input: {}", + mode, + s + ); + } +} + /// Returns a tuple of the original data length, the encoded data length (just data), and the length including padding. /// /// Vecs provided should be empty. @@ -1219,7 +1406,7 @@ fn generate_random_encoded_data<E: Engine, R: rand::Rng, D: distributions::Distr let base_encoded_len = engine.internal_encode(&orig_data[..], &mut encode_buf[..]); let enc_len_with_padding = if padding { - base_encoded_len + add_padding(orig_len, &mut encode_buf[base_encoded_len..]) + base_encoded_len + add_padding(base_encoded_len, &mut encode_buf[base_encoded_len..]) } else { base_encoded_len }; @@ -1249,11 +1436,7 @@ fn fill_rand_len<R: rand::Rng>(vec: &mut Vec<u8>, rng: &mut R, len: usize) { } } -fn prefixed_data<'i, 'd>( - input_with_prefix: &'i mut String, - prefix_len: usize, - data: &'d str, -) -> &'i str { +fn prefixed_data<'i>(input_with_prefix: &'i mut String, prefix_len: usize, data: &str) -> &'i str { input_with_prefix.truncate(prefix_len); input_with_prefix.push_str(data); input_with_prefix.as_str() @@ -1405,6 +1588,103 @@ impl EngineWrapper for NaiveWrapper { } } +/// A pseudo-Engine that routes all decoding through [DecoderReader] +struct DecoderReaderEngine<E: Engine> { + engine: E, +} + +impl<E: Engine> From<E> for DecoderReaderEngine<E> { + fn from(value: E) -> Self { + Self { engine: value } + } +} + +impl<E: Engine> Engine for DecoderReaderEngine<E> { + type Config = E::Config; + type DecodeEstimate = E::DecodeEstimate; + + fn internal_encode(&self, input: &[u8], output: &mut [u8]) -> usize { + self.engine.internal_encode(input, output) + } + + fn internal_decoded_len_estimate(&self, input_len: usize) -> Self::DecodeEstimate { + self.engine.internal_decoded_len_estimate(input_len) + } + + fn internal_decode( + &self, + input: &[u8], + output: &mut [u8], + decode_estimate: Self::DecodeEstimate, + ) -> Result<DecodeMetadata, DecodeError> { + let mut reader = DecoderReader::new(input, &self.engine); + let mut buf = vec![0; input.len()]; + // to avoid effects like not detecting invalid length due to progressively growing + // the output buffer in read_to_end etc, read into a big enough buffer in one go + // to make behavior more consistent with normal engines + let _ = reader + .read(&mut buf) + .and_then(|len| { + buf.truncate(len); + // make sure we got everything + reader.read_to_end(&mut buf) + }) + .map_err(|io_error| { + *io_error + .into_inner() + .and_then(|inner| inner.downcast::<DecodeError>().ok()) + .unwrap() + })?; + output[..buf.len()].copy_from_slice(&buf); + Ok(DecodeMetadata::new( + buf.len(), + input + .iter() + .enumerate() + .filter(|(_offset, byte)| **byte == PAD_BYTE) + .map(|(offset, _byte)| offset) + .next(), + )) + } + + fn config(&self) -> &Self::Config { + self.engine.config() + } +} + +struct DecoderReaderEngineWrapper {} + +impl EngineWrapper for DecoderReaderEngineWrapper { + type Engine = DecoderReaderEngine<general_purpose::GeneralPurpose>; + + fn standard() -> Self::Engine { + GeneralPurposeWrapper::standard().into() + } + + fn standard_unpadded() -> Self::Engine { + GeneralPurposeWrapper::standard_unpadded().into() + } + + fn standard_with_pad_mode( + encode_pad: bool, + decode_pad_mode: DecodePaddingMode, + ) -> Self::Engine { + GeneralPurposeWrapper::standard_with_pad_mode(encode_pad, decode_pad_mode).into() + } + + fn standard_allow_trailing_bits() -> Self::Engine { + GeneralPurposeWrapper::standard_allow_trailing_bits().into() + } + + fn random<R: rand::Rng>(rng: &mut R) -> Self::Engine { + GeneralPurposeWrapper::random(rng).into() + } + + fn random_alphabet<R: rand::Rng>(rng: &mut R, alphabet: &Alphabet) -> Self::Engine { + GeneralPurposeWrapper::random_alphabet(rng, alphabet).into() + } +} + fn seeded_rng() -> impl rand::Rng { rngs::SmallRng::from_entropy() } @@ -1417,6 +1697,13 @@ fn all_pad_modes() -> Vec<DecodePaddingMode> { ] } +fn pad_modes_allowing_padding() -> Vec<DecodePaddingMode> { + vec![ + DecodePaddingMode::Indifferent, + DecodePaddingMode::RequireCanonical, + ] +} + fn assert_all_suffixes_ok<E: Engine>(engine: E, suffixes: Vec<&str>) { for num_prefix_quads in 0..256 { for &suffix in suffixes.iter() { @@ -1,101 +1,127 @@ -//! # Getting started +//! Correct, fast, and configurable [base64][] decoding and encoding. Base64 +//! transports binary data efficiently in contexts where only plain text is +//! allowed. //! -//! 1. Perhaps one of the preconfigured engines in [engine::general_purpose] will suit, e.g. -//! [engine::general_purpose::STANDARD_NO_PAD]. -//! - These are re-exported in [prelude] with a `BASE64_` prefix for those who prefer to -//! `use base64::prelude::*` or equivalent, e.g. [prelude::BASE64_STANDARD_NO_PAD] -//! 1. If not, choose which alphabet you want. Most usage will want [alphabet::STANDARD] or [alphabet::URL_SAFE]. -//! 1. Choose which [Engine] implementation you want. For the moment there is only one: [engine::GeneralPurpose]. -//! 1. Configure the engine appropriately using the engine's `Config` type. -//! - This is where you'll select whether to add padding (when encoding) or expect it (when -//! decoding). If given the choice, prefer no padding. -//! 1. Build the engine using the selected alphabet and config. +//! [base64]: https://developer.mozilla.org/en-US/docs/Glossary/Base64 //! -//! For more detail, see below. +//! # Usage //! -//! ## Alphabets +//! Use an [`Engine`] to decode or encode base64, configured with the base64 +//! alphabet and padding behavior best suited to your application. //! -//! An [alphabet::Alphabet] defines what ASCII symbols are used to encode to or decode from. +//! ## Engine setup //! -//! Constants in [alphabet] like [alphabet::STANDARD] or [alphabet::URL_SAFE] provide commonly used -//! alphabets, but you can also build your own custom [alphabet::Alphabet] if needed. +//! There is more than one way to encode a stream of bytes as “base64”. +//! Different applications use different encoding +//! [alphabets][alphabet::Alphabet] and +//! [padding behaviors][engine::general_purpose::GeneralPurposeConfig]. //! -//! ## Engines +//! ### Encoding alphabet //! -//! Once you have an `Alphabet`, you can pick which `Engine` you want. A few parts of the public -//! API provide a default, but otherwise the user must provide an `Engine` to use. +//! Almost all base64 [alphabets][alphabet::Alphabet] use `A-Z`, `a-z`, and +//! `0-9`, which gives nearly 64 characters (26 + 26 + 10 = 62), but they differ +//! in their choice of their final 2. //! -//! See [Engine] for more. +//! Most applications use the [standard][alphabet::STANDARD] alphabet specified +//! in [RFC 4648][rfc-alphabet]. If that’s all you need, you can get started +//! quickly by using the pre-configured +//! [`STANDARD`][engine::general_purpose::STANDARD] engine, which is also available +//! in the [`prelude`] module as shown here, if you prefer a minimal `use` +//! footprint. //! -//! ## Config +#![cfg_attr(feature = "alloc", doc = "```")] +#![cfg_attr(not(feature = "alloc"), doc = "```ignore")] +//! use base64::prelude::*; //! -//! In addition to an `Alphabet`, constructing an `Engine` also requires an [engine::Config]. Each -//! `Engine` has a corresponding `Config` implementation since different `Engine`s may offer different -//! levels of configurability. -//! -//! # Encoding -//! -//! Several different encoding methods on [Engine] are available to you depending on your desire for -//! convenience vs performance. +//! # fn main() -> Result<(), base64::DecodeError> { +//! assert_eq!(BASE64_STANDARD.decode(b"+uwgVQA=")?, b"\xFA\xEC\x20\x55\0"); +//! assert_eq!(BASE64_STANDARD.encode(b"\xFF\xEC\x20\x55\0"), "/+wgVQA="); +//! # Ok(()) +//! # } +//! ``` //! -//! | Method | Output | Allocates | -//! | ------------------------ | ---------------------------- | ------------------------------ | -//! | [Engine::encode] | Returns a new `String` | Always | -//! | [Engine::encode_string] | Appends to provided `String` | Only if `String` needs to grow | -//! | [Engine::encode_slice] | Writes to provided `&[u8]` | Never - fastest | +//! [rfc-alphabet]: https://datatracker.ietf.org/doc/html/rfc4648#section-4 //! -//! All of the encoding methods will pad as per the engine's config. +//! Other common alphabets are available in the [`alphabet`] module. //! -//! # Decoding +//! #### URL-safe alphabet //! -//! Just as for encoding, there are different decoding methods available. +//! The standard alphabet uses `+` and `/` as its two non-alphanumeric tokens, +//! which cannot be safely used in URL’s without encoding them as `%2B` and +//! `%2F`. //! -//! | Method | Output | Allocates | -//! | ------------------------ | ----------------------------- | ------------------------------ | -//! | [Engine::decode] | Returns a new `Vec<u8>` | Always | -//! | [Engine::decode_vec] | Appends to provided `Vec<u8>` | Only if `Vec` needs to grow | -//! | [Engine::decode_slice] | Writes to provided `&[u8]` | Never - fastest | +//! To avoid that, some applications use a [“URL-safe” alphabet][alphabet::URL_SAFE], +//! which uses `-` and `_` instead. To use that alternative alphabet, use the +//! [`URL_SAFE`][engine::general_purpose::URL_SAFE] engine. This example doesn't +//! use [`prelude`] to show what a more explicit `use` would look like. //! -//! Unlike encoding, where all possible input is valid, decoding can fail (see [DecodeError]). +#![cfg_attr(feature = "alloc", doc = "```")] +#![cfg_attr(not(feature = "alloc"), doc = "```ignore")] +//! use base64::{engine::general_purpose::URL_SAFE, Engine as _}; //! -//! Input can be invalid because it has invalid characters or invalid padding. The nature of how -//! padding is checked depends on the engine's config. -//! Whitespace in the input is invalid, just like any other non-base64 byte. +//! # fn main() -> Result<(), base64::DecodeError> { +//! assert_eq!(URL_SAFE.decode(b"-uwgVQA=")?, b"\xFA\xEC\x20\x55\0"); +//! assert_eq!(URL_SAFE.encode(b"\xFF\xEC\x20\x55\0"), "_-wgVQA="); +//! # Ok(()) +//! # } +//! ``` //! -//! # `Read` and `Write` +//! ### Padding characters //! -//! To decode a [std::io::Read] of b64 bytes, wrap a reader (file, network socket, etc) with -//! [read::DecoderReader]. +//! Each base64 character represents 6 bits (2⁶ = 64) of the original binary +//! data, and every 3 bytes of input binary data will encode to 4 base64 +//! characters (8 bits × 3 = 6 bits × 4 = 24 bits). //! -//! To write raw bytes and have them b64 encoded on the fly, wrap a [std::io::Write] with -//! [write::EncoderWriter]. +//! When the input is not an even multiple of 3 bytes in length, [canonical][] +//! base64 encoders insert padding characters at the end, so that the output +//! length is always a multiple of 4: //! -//! There is some performance overhead (15% or so) because of the necessary buffer shuffling -- -//! still fast enough that almost nobody cares. Also, these implementations do not heap allocate. +//! [canonical]: https://datatracker.ietf.org/doc/html/rfc4648#section-3.5 //! -//! # `Display` +#![cfg_attr(feature = "alloc", doc = "```")] +#![cfg_attr(not(feature = "alloc"), doc = "```ignore")] +//! use base64::{engine::general_purpose::STANDARD, Engine as _}; //! -//! See [display] for how to transparently base64 data via a `Display` implementation. +//! assert_eq!(STANDARD.encode(b""), ""); +//! assert_eq!(STANDARD.encode(b"f"), "Zg=="); +//! assert_eq!(STANDARD.encode(b"fo"), "Zm8="); +//! assert_eq!(STANDARD.encode(b"foo"), "Zm9v"); +//! ``` //! -//! # Examples +//! Canonical encoding ensures that base64 encodings will be exactly the same, +//! byte-for-byte, regardless of input length. But the `=` padding characters +//! aren’t necessary for decoding, and they may be omitted by using a +//! [`NO_PAD`][engine::general_purpose::NO_PAD] configuration: //! -//! ## Using predefined engines +#![cfg_attr(feature = "alloc", doc = "```")] +#![cfg_attr(not(feature = "alloc"), doc = "```ignore")] +//! use base64::{engine::general_purpose::STANDARD_NO_PAD, Engine as _}; //! +//! assert_eq!(STANDARD_NO_PAD.encode(b""), ""); +//! assert_eq!(STANDARD_NO_PAD.encode(b"f"), "Zg"); +//! assert_eq!(STANDARD_NO_PAD.encode(b"fo"), "Zm8"); +//! assert_eq!(STANDARD_NO_PAD.encode(b"foo"), "Zm9v"); //! ``` -//! use base64::{Engine as _, engine::general_purpose}; //! -//! let orig = b"data"; -//! let encoded: String = general_purpose::STANDARD_NO_PAD.encode(orig); -//! assert_eq!("ZGF0YQ", encoded); -//! assert_eq!(orig.as_slice(), &general_purpose::STANDARD_NO_PAD.decode(encoded).unwrap()); +//! The pre-configured `NO_PAD` engines will reject inputs containing padding +//! `=` characters. To encode without padding and still accept padding while +//! decoding, create an [engine][engine::general_purpose::GeneralPurpose] with +//! that [padding mode][engine::DecodePaddingMode]. //! -//! // or, URL-safe -//! let encoded_url = general_purpose::URL_SAFE_NO_PAD.encode(orig); +#![cfg_attr(feature = "alloc", doc = "```")] +#![cfg_attr(not(feature = "alloc"), doc = "```ignore")] +//! # use base64::{engine::general_purpose::STANDARD_NO_PAD, Engine as _}; +//! assert_eq!(STANDARD_NO_PAD.decode(b"Zm8="), Err(base64::DecodeError::InvalidPadding)); //! ``` //! -//! ## Custom alphabet, config, and engine +//! ### Further customization //! -//! ``` +//! Decoding and encoding behavior can be customized by creating an +//! [engine][engine::GeneralPurpose] with an [alphabet][alphabet::Alphabet] and +//! [padding configuration][engine::GeneralPurposeConfig]: +//! +#![cfg_attr(feature = "alloc", doc = "```")] +#![cfg_attr(not(feature = "alloc"), doc = "```ignore")] //! use base64::{engine, alphabet, Engine as _}; //! //! // bizarro-world base64: +/ as the first symbols instead of the last @@ -115,6 +141,81 @@ //! //! ``` //! +//! ## Memory allocation +//! +//! The [decode][Engine::decode()] and [encode][Engine::encode()] engine methods +//! allocate memory for their results – `decode` returns a `Vec<u8>` and +//! `encode` returns a `String`. To instead decode or encode into a buffer that +//! you allocated, use one of the alternative methods: +//! +//! #### Decoding +//! +//! | Method | Output | Allocates memory | +//! | -------------------------- | ----------------------------- | ----------------------------- | +//! | [`Engine::decode`] | returns a new `Vec<u8>` | always | +//! | [`Engine::decode_vec`] | appends to provided `Vec<u8>` | if `Vec` lacks capacity | +//! | [`Engine::decode_slice`] | writes to provided `&[u8]` | never +//! +//! #### Encoding +//! +//! | Method | Output | Allocates memory | +//! | -------------------------- | ---------------------------- | ------------------------------ | +//! | [`Engine::encode`] | returns a new `String` | always | +//! | [`Engine::encode_string`] | appends to provided `String` | if `String` lacks capacity | +//! | [`Engine::encode_slice`] | writes to provided `&[u8]` | never | +//! +//! ## Input and output +//! +//! The `base64` crate can [decode][Engine::decode()] and +//! [encode][Engine::encode()] values in memory, or +//! [`DecoderReader`][read::DecoderReader] and +//! [`EncoderWriter`][write::EncoderWriter] provide streaming decoding and +//! encoding for any [readable][std::io::Read] or [writable][std::io::Write] +//! byte stream. +//! +//! #### Decoding +//! +#![cfg_attr(feature = "std", doc = "```")] +#![cfg_attr(not(feature = "std"), doc = "```ignore")] +//! # use std::io; +//! use base64::{engine::general_purpose::STANDARD, read::DecoderReader}; +//! +//! # fn main() -> Result<(), Box<dyn std::error::Error>> { +//! let mut input = io::stdin(); +//! let mut decoder = DecoderReader::new(&mut input, &STANDARD); +//! io::copy(&mut decoder, &mut io::stdout())?; +//! # Ok(()) +//! # } +//! ``` +//! +//! #### Encoding +//! +#![cfg_attr(feature = "std", doc = "```")] +#![cfg_attr(not(feature = "std"), doc = "```ignore")] +//! # use std::io; +//! use base64::{engine::general_purpose::STANDARD, write::EncoderWriter}; +//! +//! # fn main() -> Result<(), Box<dyn std::error::Error>> { +//! let mut output = io::stdout(); +//! let mut encoder = EncoderWriter::new(&mut output, &STANDARD); +//! io::copy(&mut io::stdin(), &mut encoder)?; +//! # Ok(()) +//! # } +//! ``` +//! +//! #### Display +//! +//! If you only need a base64 representation for implementing the +//! [`Display`][std::fmt::Display] trait, use +//! [`Base64Display`][display::Base64Display]: +//! +//! ``` +//! use base64::{display::Base64Display, engine::general_purpose::STANDARD}; +//! +//! let value = Base64Display::new(b"\0\x01\x02\x03", &STANDARD); +//! assert_eq!("base64: AAECAw==", format!("base64: {}", value)); +//! ``` +//! //! # Panics //! //! If length calculations result in overflowing `usize`, a panic will result. @@ -136,10 +237,8 @@ #![allow(clippy::single_component_path_imports)] #![cfg_attr(not(any(feature = "std", test)), no_std)] -#[cfg(all(feature = "alloc", not(any(feature = "std", test))))] +#[cfg(any(feature = "alloc", test))] extern crate alloc; -#[cfg(any(feature = "std", test))] -extern crate std as alloc; // has to be included at top level because of the way rstest_reuse defines its macros #[cfg(test)] @@ -159,14 +258,14 @@ pub mod alphabet; mod encode; #[allow(deprecated)] -#[cfg(any(feature = "alloc", feature = "std", test))] +#[cfg(any(feature = "alloc", test))] pub use crate::encode::{encode, encode_engine, encode_engine_string}; #[allow(deprecated)] pub use crate::encode::{encode_engine_slice, encoded_len, EncodeSliceError}; mod decode; #[allow(deprecated)] -#[cfg(any(feature = "alloc", feature = "std", test))] +#[cfg(any(feature = "alloc", test))] pub use crate::decode::{decode, decode_engine, decode_engine_vec}; #[allow(deprecated)] pub use crate::decode::{decode_engine_slice, decoded_len_estimate, DecodeError, DecodeSliceError}; diff --git a/src/prelude.rs b/src/prelude.rs index fbeb5ba..df5fdb4 100644 --- a/src/prelude.rs +++ b/src/prelude.rs @@ -5,7 +5,8 @@ //! //! # Examples //! -//! ``` +#![cfg_attr(feature = "alloc", doc = "```")] +#![cfg_attr(not(feature = "alloc"), doc = "```ignore")] //! use base64::prelude::{Engine as _, BASE64_STANDARD_NO_PAD}; //! //! assert_eq!("c29tZSBieXRlcw", &BASE64_STANDARD_NO_PAD.encode(b"some bytes")); diff --git a/src/read/decoder.rs b/src/read/decoder.rs index 4888c9c..b656ae3 100644 --- a/src/read/decoder.rs +++ b/src/read/decoder.rs @@ -1,4 +1,4 @@ -use crate::{engine::Engine, DecodeError}; +use crate::{engine::Engine, DecodeError, PAD_BYTE}; use std::{cmp, fmt, io}; // This should be large, but it has to fit on the stack. @@ -46,13 +46,15 @@ pub struct DecoderReader<'e, E: Engine, R: io::Read> { // Technically we only need to hold 2 bytes but then we'd need a separate temporary buffer to // decode 3 bytes into and then juggle copying one byte into the provided read buf and the rest // into here, which seems like a lot of complexity for 1 extra byte of storage. - decoded_buffer: [u8; 3], + decoded_buffer: [u8; DECODED_CHUNK_SIZE], // index of start of decoded data decoded_offset: usize, // length of decoded data decoded_len: usize, // used to provide accurate offsets in errors total_b64_decoded: usize, + // offset of previously seen padding, if any + padding_offset: Option<usize>, } impl<'e, E: Engine, R: io::Read> fmt::Debug for DecoderReader<'e, E, R> { @@ -64,6 +66,7 @@ impl<'e, E: Engine, R: io::Read> fmt::Debug for DecoderReader<'e, E, R> { .field("decoded_offset", &self.decoded_offset) .field("decoded_len", &self.decoded_len) .field("total_b64_decoded", &self.total_b64_decoded) + .field("padding_offset", &self.padding_offset) .finish() } } @@ -81,6 +84,7 @@ impl<'e, E: Engine, R: io::Read> DecoderReader<'e, E, R> { decoded_offset: 0, decoded_len: 0, total_b64_decoded: 0, + padding_offset: None, } } @@ -127,20 +131,28 @@ impl<'e, E: Engine, R: io::Read> DecoderReader<'e, E, R> { /// caller's responsibility to choose the number of b64 bytes to decode correctly. /// /// Returns a Result with the number of decoded bytes written to `buf`. - fn decode_to_buf(&mut self, num_bytes: usize, buf: &mut [u8]) -> io::Result<usize> { - debug_assert!(self.b64_len >= num_bytes); + fn decode_to_buf(&mut self, b64_len_to_decode: usize, buf: &mut [u8]) -> io::Result<usize> { + debug_assert!(self.b64_len >= b64_len_to_decode); debug_assert!(self.b64_offset + self.b64_len <= BUF_SIZE); debug_assert!(!buf.is_empty()); - let decoded = self + let b64_to_decode = &self.b64_buffer[self.b64_offset..self.b64_offset + b64_len_to_decode]; + let decode_metadata = self .engine .internal_decode( - &self.b64_buffer[self.b64_offset..self.b64_offset + num_bytes], + b64_to_decode, buf, - self.engine.internal_decoded_len_estimate(num_bytes), + self.engine.internal_decoded_len_estimate(b64_len_to_decode), ) .map_err(|e| match e { DecodeError::InvalidByte(offset, byte) => { + // This can be incorrect, but not in a way that probably matters to anyone: + // if there was padding handled in a previous decode, and we are now getting + // InvalidByte due to more padding, we should arguably report InvalidByte with + // PAD_BYTE at the original padding position (`self.padding_offset`), but we + // don't have a good way to tie those two cases together, so instead we + // just report the invalid byte as if the previous padding, and its possibly + // related downgrade to a now invalid byte, didn't happen. DecodeError::InvalidByte(self.total_b64_decoded + offset, byte) } DecodeError::InvalidLength => DecodeError::InvalidLength, @@ -151,13 +163,27 @@ impl<'e, E: Engine, R: io::Read> DecoderReader<'e, E, R> { }) .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?; - self.total_b64_decoded += num_bytes; - self.b64_offset += num_bytes; - self.b64_len -= num_bytes; + if let Some(offset) = self.padding_offset { + // we've already seen padding + if decode_metadata.decoded_len > 0 { + // we read more after already finding padding; report error at first padding byte + return Err(io::Error::new( + io::ErrorKind::InvalidData, + DecodeError::InvalidByte(offset, PAD_BYTE), + )); + } + } + + self.padding_offset = self.padding_offset.or(decode_metadata + .padding_offset + .map(|offset| self.total_b64_decoded + offset)); + self.total_b64_decoded += b64_len_to_decode; + self.b64_offset += b64_len_to_decode; + self.b64_len -= b64_len_to_decode; debug_assert!(self.b64_offset + self.b64_len <= BUF_SIZE); - Ok(decoded) + Ok(decode_metadata.decoded_len) } /// Unwraps this `DecoderReader`, returning the base reader which it reads base64 encoded @@ -205,9 +231,9 @@ impl<'e, E: Engine, R: io::Read> io::Read for DecoderReader<'e, E, R> { self.decoded_offset < DECODED_CHUNK_SIZE }); - // We shouldn't ever decode into here when we can't immediately write at least one byte into - // the provided buf, so the effective length should only be 3 momentarily between when we - // decode and when we copy into the target buffer. + // We shouldn't ever decode into decoded_buffer when we can't immediately write at least one + // byte into the provided buf, so the effective length should only be 3 momentarily between + // when we decode and when we copy into the target buffer. debug_assert!(self.decoded_len < DECODED_CHUNK_SIZE); debug_assert!(self.decoded_len + self.decoded_offset <= DECODED_CHUNK_SIZE); @@ -217,20 +243,15 @@ impl<'e, E: Engine, R: io::Read> io::Read for DecoderReader<'e, E, R> { } else { let mut at_eof = false; while self.b64_len < BASE64_CHUNK_SIZE { - // Work around lack of copy_within, which is only present in 1.37 // Copy any bytes we have to the start of the buffer. - // We know we have < 1 chunk, so we can use a tiny tmp buffer. - let mut memmove_buf = [0_u8; BASE64_CHUNK_SIZE]; - memmove_buf[..self.b64_len].copy_from_slice( - &self.b64_buffer[self.b64_offset..self.b64_offset + self.b64_len], - ); - self.b64_buffer[0..self.b64_len].copy_from_slice(&memmove_buf[..self.b64_len]); + self.b64_buffer + .copy_within(self.b64_offset..self.b64_offset + self.b64_len, 0); self.b64_offset = 0; // then fill in more data let read = self.read_from_delegate()?; if read == 0 { - // we never pass in an empty buf, so 0 => we've hit EOF + // we never read into an empty buf, so 0 => we've hit EOF at_eof = true; break; } diff --git a/src/read/decoder_tests.rs b/src/read/decoder_tests.rs index 65d58d8..099dd63 100644 --- a/src/read/decoder_tests.rs +++ b/src/read/decoder_tests.rs @@ -8,9 +8,10 @@ use rand::{Rng as _, RngCore as _}; use super::decoder::{DecoderReader, BUF_SIZE}; use crate::{ + alphabet, engine::{general_purpose::STANDARD, Engine, GeneralPurpose}, tests::{random_alphabet, random_config, random_engine}, - DecodeError, + DecodeError, PAD_BYTE, }; #[test] @@ -75,7 +76,7 @@ fn trailing_junk() { saw_error = true; break; } - Ok(read) if read == 0 => break, + Ok(0) => break, Ok(_) => (), } } @@ -247,19 +248,21 @@ fn reports_invalid_byte_correctly() { let mut rng = rand::thread_rng(); let mut bytes = Vec::new(); let mut b64 = String::new(); - let mut decoded = Vec::new(); + let mut stream_decoded = Vec::new(); + let mut bulk_decoded = Vec::new(); for _ in 0..10_000 { bytes.clear(); b64.clear(); - decoded.clear(); + stream_decoded.clear(); + bulk_decoded.clear(); let size = rng.gen_range(1..(10 * BUF_SIZE)); bytes.extend(iter::repeat(0).take(size)); rng.fill_bytes(&mut bytes[..size]); assert_eq!(size, bytes.len()); - let engine = random_engine(&mut rng); + let engine = GeneralPurpose::new(&alphabet::STANDARD, random_config(&mut rng)); engine.encode_string(&bytes[..], &mut b64); // replace one byte, somewhere, with '*', which is invalid @@ -270,9 +273,8 @@ fn reports_invalid_byte_correctly() { let mut wrapped_reader = io::Cursor::new(b64_bytes.clone()); let mut decoder = DecoderReader::new(&mut wrapped_reader, &engine); - // some gymnastics to avoid double-moving the io::Error, which is not Copy let read_decode_err = decoder - .read_to_end(&mut decoded) + .read_to_end(&mut stream_decoded) .map_err(|e| { let kind = e.kind(); let inner = e @@ -283,8 +285,7 @@ fn reports_invalid_byte_correctly() { .err() .and_then(|o| o); - let mut bulk_buf = Vec::new(); - let bulk_decode_err = engine.decode_vec(&b64_bytes[..], &mut bulk_buf).err(); + let bulk_decode_err = engine.decode_vec(&b64_bytes[..], &mut bulk_decoded).err(); // it's tricky to predict where the invalid data's offset will be since if it's in the last // chunk it will be reported at the first padding location because it's treated as invalid @@ -296,6 +297,134 @@ fn reports_invalid_byte_correctly() { } } +#[test] +fn internal_padding_error_with_short_read_concatenated_texts_invalid_byte_error() { + let mut rng = rand::thread_rng(); + let mut bytes = Vec::new(); + let mut b64 = String::new(); + let mut reader_decoded = Vec::new(); + let mut bulk_decoded = Vec::new(); + + // encodes with padding, requires that padding be present so we don't get InvalidPadding + // just because padding is there at all + let engine = STANDARD; + + for _ in 0..10_000 { + bytes.clear(); + b64.clear(); + reader_decoded.clear(); + bulk_decoded.clear(); + + // at least 2 bytes so there can be a split point between bytes + let size = rng.gen_range(2..(10 * BUF_SIZE)); + bytes.resize(size, 0); + rng.fill_bytes(&mut bytes[..size]); + + // Concatenate two valid b64s, yielding padding in the middle. + // This avoids scenarios that are challenging to assert on, like random padding location + // that might be InvalidLastSymbol when decoded at certain buffer sizes but InvalidByte + // when done all at once. + let split = loop { + // find a split point that will produce padding on the first part + let s = rng.gen_range(1..size); + if s % 3 != 0 { + // short enough to need padding + break s; + }; + }; + + engine.encode_string(&bytes[..split], &mut b64); + assert!(b64.contains('='), "split: {}, b64: {}", split, b64); + let bad_byte_pos = b64.find('=').unwrap(); + engine.encode_string(&bytes[split..], &mut b64); + let b64_bytes = b64.as_bytes(); + + // short read to make it plausible for padding to happen on a read boundary + let read_len = rng.gen_range(1..10); + let mut wrapped_reader = ShortRead { + max_read_len: read_len, + delegate: io::Cursor::new(&b64_bytes), + }; + + let mut decoder = DecoderReader::new(&mut wrapped_reader, &engine); + + let read_decode_err = decoder + .read_to_end(&mut reader_decoded) + .map_err(|e| { + *e.into_inner() + .and_then(|e| e.downcast::<DecodeError>().ok()) + .unwrap() + }) + .unwrap_err(); + + let bulk_decode_err = engine.decode_vec(b64_bytes, &mut bulk_decoded).unwrap_err(); + + assert_eq!( + bulk_decode_err, + read_decode_err, + "read len: {}, bad byte pos: {}, b64: {}", + read_len, + bad_byte_pos, + std::str::from_utf8(b64_bytes).unwrap() + ); + assert_eq!( + DecodeError::InvalidByte( + split / 3 * 4 + + match split % 3 { + 1 => 2, + 2 => 3, + _ => unreachable!(), + }, + PAD_BYTE + ), + read_decode_err + ); + } +} + +#[test] +fn internal_padding_anywhere_error() { + let mut rng = rand::thread_rng(); + let mut bytes = Vec::new(); + let mut b64 = String::new(); + let mut reader_decoded = Vec::new(); + + // encodes with padding, requires that padding be present so we don't get InvalidPadding + // just because padding is there at all + let engine = STANDARD; + + for _ in 0..10_000 { + bytes.clear(); + b64.clear(); + reader_decoded.clear(); + + bytes.resize(10 * BUF_SIZE, 0); + rng.fill_bytes(&mut bytes[..]); + + // Just shove a padding byte in there somewhere. + // The specific error to expect is challenging to predict precisely because it + // will vary based on the position of the padding in the quad and the read buffer + // length, but SOMETHING should go wrong. + + engine.encode_string(&bytes[..], &mut b64); + let mut b64_bytes = b64.as_bytes().to_vec(); + // put padding somewhere other than the last quad + b64_bytes[rng.gen_range(0..bytes.len() - 4)] = PAD_BYTE; + + // short read to make it plausible for padding to happen on a read boundary + let read_len = rng.gen_range(1..10); + let mut wrapped_reader = ShortRead { + max_read_len: read_len, + delegate: io::Cursor::new(&b64_bytes), + }; + + let mut decoder = DecoderReader::new(&mut wrapped_reader, &engine); + + let result = decoder.read_to_end(&mut reader_decoded); + assert!(result.is_err()); + } +} + fn consume_with_short_reads_and_validate<R: io::Read>( rng: &mut rand::rngs::ThreadRng, expected_bytes: &[u8], @@ -344,3 +473,15 @@ impl<'a, 'b, R: io::Read, N: rand::Rng> io::Read for RandomShortRead<'a, 'b, R, self.delegate.read(&mut buf[..effective_len]) } } + +struct ShortRead<R: io::Read> { + delegate: R, + max_read_len: usize, +} + +impl<R: io::Read> io::Read for ShortRead<R> { + fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> { + let len = self.max_read_len.max(buf.len()); + self.delegate.read(&mut buf[..len]) + } +} diff --git a/src/write/encoder_string_writer.rs b/src/write/encoder_string_writer.rs index 9394dc9..9c02bcd 100644 --- a/src/write/encoder_string_writer.rs +++ b/src/write/encoder_string_writer.rs @@ -44,11 +44,6 @@ use std::io; /// assert_eq!("base64: YXNkZg==", &buf); /// ``` /// -/// # Panics -/// -/// Calling `write()` (or related methods) or `finish()` after `finish()` has completed without -/// error is invalid and will panic. -/// /// # Performance /// /// Because it has to validate that the base64 is UTF-8, it is about 80% as fast as writing plain @@ -144,6 +139,7 @@ mod tests { engine::Engine, tests::random_engine, write::encoder_string_writer::EncoderStringWriter, }; use rand::Rng; + use std::cmp; use std::io::Write; #[test] @@ -158,9 +154,8 @@ mod tests { orig_data.clear(); normal_encoded.clear(); - for _ in 0..size { - orig_data.push(rng.gen()); - } + orig_data.resize(size, 0); + rng.fill(&mut orig_data[..]); let engine = random_engine(&mut rng); engine.encode_string(&orig_data, &mut normal_encoded); @@ -175,4 +170,38 @@ mod tests { assert_eq!(normal_encoded, stream_encoded); } } + #[test] + fn incremental_writes() { + let mut rng = rand::thread_rng(); + let mut orig_data = Vec::<u8>::new(); + let mut normal_encoded = String::new(); + + let size = 5_000; + + for _ in 0..size { + orig_data.clear(); + normal_encoded.clear(); + + orig_data.resize(size, 0); + rng.fill(&mut orig_data[..]); + + let engine = random_engine(&mut rng); + engine.encode_string(&orig_data, &mut normal_encoded); + + let mut stream_encoder = EncoderStringWriter::new(&engine); + // write small nibbles of data + let mut offset = 0; + while offset < size { + let nibble_size = cmp::min(rng.gen_range(0..=64), size - offset); + let len = stream_encoder + .write(&orig_data[offset..offset + nibble_size]) + .unwrap(); + offset += len; + } + + let stream_encoded = stream_encoder.into_inner(); + + assert_eq!(normal_encoded, stream_encoded); + } + } } diff --git a/src/write/encoder_tests.rs b/src/write/encoder_tests.rs index ce76d63..1f1a165 100644 --- a/src/write/encoder_tests.rs +++ b/src/write/encoder_tests.rs @@ -358,7 +358,7 @@ fn retrying_writes_that_error_with_interrupted_works() { Ok(_) => break, Err(e) => match e.kind() { io::ErrorKind::Interrupted => continue, - _ => Err(e).unwrap(), // bail + _ => panic!("{:?}", e), // bail }, } } diff --git a/tests/encode.rs b/tests/encode.rs index 2e1f893..9d69447 100644 --- a/tests/encode.rs +++ b/tests/encode.rs @@ -8,11 +8,7 @@ fn compare_encode(expected: &str, target: &[u8]) { #[test] fn encode_all_ascii() { - let mut ascii = Vec::<u8>::with_capacity(128); - - for i in 0..128 { - ascii.push(i); - } + let ascii: Vec<u8> = (0..=127).collect(); compare_encode( "AAECAwQFBgcICQoLDA0ODxAREhMUFRYXGBkaGxwdHh8gISIjJCUmJygpKissLS4vMDEyMzQ1Njc4OTo7P\ @@ -24,12 +20,7 @@ fn encode_all_ascii() { #[test] fn encode_all_bytes() { - let mut bytes = Vec::<u8>::with_capacity(256); - - for i in 0..255 { - bytes.push(i); - } - bytes.push(255); //bug with "overflowing" ranges? + let bytes: Vec<u8> = (0..=255).collect(); compare_encode( "AAECAwQFBgcICQoLDA0ODxAREhMUFRYXGBkaGxwdHh8gISIjJCUmJygpKissLS4vMDEyMzQ1Njc4OTo7P\ @@ -42,12 +33,7 @@ fn encode_all_bytes() { #[test] fn encode_all_bytes_url() { - let mut bytes = Vec::<u8>::with_capacity(256); - - for i in 0..255 { - bytes.push(i); - } - bytes.push(255); //bug with "overflowing" ranges? + let bytes: Vec<u8> = (0..=255).collect(); assert_eq!( "AAECAwQFBgcICQoLDA0ODxAREhMUFRYXGBkaGxwdHh8gISIjJCUmJygpKissLS4vMDEyMzQ1Njc4OTo7PD0\ @@ -55,6 +41,37 @@ fn encode_all_bytes_url() { -AgYKDhIWGh4iJiouMjY6PkJGSk5SVlpeYmZqbnJ2en6ChoqOkpaanqKmqq6ytrq\ -wsbKztLW2t7i5uru8vb6_wMHCw8TFxsfIycrLzM3Oz9DR0tPU1dbX2Nna29zd3t_g4eLj5OXm5-jp6uvs7e7v8PHy\ 8_T19vf4-fr7_P3-_w==", - &engine::GeneralPurpose::new(&URL_SAFE, PAD).encode(&bytes) + &engine::GeneralPurpose::new(&URL_SAFE, PAD).encode(bytes) ); } + +#[test] +fn encoded_len_unpadded() { + assert_eq!(0, encoded_len(0, false).unwrap()); + assert_eq!(2, encoded_len(1, false).unwrap()); + assert_eq!(3, encoded_len(2, false).unwrap()); + assert_eq!(4, encoded_len(3, false).unwrap()); + assert_eq!(6, encoded_len(4, false).unwrap()); + assert_eq!(7, encoded_len(5, false).unwrap()); + assert_eq!(8, encoded_len(6, false).unwrap()); + assert_eq!(10, encoded_len(7, false).unwrap()); +} + +#[test] +fn encoded_len_padded() { + assert_eq!(0, encoded_len(0, true).unwrap()); + assert_eq!(4, encoded_len(1, true).unwrap()); + assert_eq!(4, encoded_len(2, true).unwrap()); + assert_eq!(4, encoded_len(3, true).unwrap()); + assert_eq!(8, encoded_len(4, true).unwrap()); + assert_eq!(8, encoded_len(5, true).unwrap()); + assert_eq!(8, encoded_len(6, true).unwrap()); + assert_eq!(12, encoded_len(7, true).unwrap()); +} +#[test] +fn encoded_len_overflow() { + let max_size = usize::MAX / 4 * 3 + 2; + assert_eq!(2, max_size % 3); + assert_eq!(Some(usize::MAX), encoded_len(max_size, false)); + assert_eq!(None, encoded_len(max_size + 1, false)); +} |