aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndroid Build Coastguard Worker <android-build-coastguard-worker@google.com>2024-02-02 23:52:03 +0000
committerAndroid Build Coastguard Worker <android-build-coastguard-worker@google.com>2024-02-02 23:52:03 +0000
commit7d653f3bd3f888d745509de507a5b69f01055597 (patch)
treef926c571cbd4fd2e9116e5ee98efcbe17dc354ae
parent833562ca05db671f59197ee5672363af2e2e8fbc (diff)
parent86d8e5b747bb44b483754be095afebfe03ad0875 (diff)
downloadbase64-simpleperf-release.tar.gz
Snap for 11400057 from 86d8e5b747bb44b483754be095afebfe03ad0875 to simpleperf-releasesimpleperf-release
Change-Id: I60dfcdb501724b51ab706d5dba1175fa519eb285
-rw-r--r--.cargo_vcs_info.json2
-rw-r--r--.circleci/config.yml38
-rw-r--r--.github/ISSUE_TEMPLATE/general-purpose-issue.md21
-rw-r--r--Android.bp5
-rw-r--r--Cargo.toml42
-rw-r--r--Cargo.toml.orig37
-rw-r--r--METADATA25
-rw-r--r--README.md10
-rw-r--r--RELEASE-NOTES.md42
-rw-r--r--benches/benchmarks.rs9
-rw-r--r--clippy.toml2
-rw-r--r--examples/base64.rs50
-rw-r--r--patches/doc-string-fix.patch41
-rw-r--r--src/alphabet.rs62
-rw-r--r--src/chunked_encoder.rs107
-rw-r--r--src/decode.rs21
-rw-r--r--src/encode.rs66
-rw-r--r--src/engine/general_purpose/decode.rs59
-rw-r--r--src/engine/general_purpose/decode_suffix.rs17
-rw-r--r--src/engine/general_purpose/mod.rs7
-rw-r--r--src/engine/mod.rs234
-rw-r--r--src/engine/naive.rs7
-rw-r--r--src/engine/tests.rs393
-rw-r--r--src/lib.rs241
-rw-r--r--src/prelude.rs3
-rw-r--r--src/read/decoder.rs65
-rw-r--r--src/read/decoder_tests.rs159
-rw-r--r--src/write/encoder_string_writer.rs45
-rw-r--r--src/write/encoder_tests.rs2
-rw-r--r--tests/encode.rs53
30 files changed, 1308 insertions, 557 deletions
diff --git a/.cargo_vcs_info.json b/.cargo_vcs_info.json
index 7b32cf5..d61e543 100644
--- a/.cargo_vcs_info.json
+++ b/.cargo_vcs_info.json
@@ -1,6 +1,6 @@
{
"git": {
- "sha1": "d7fb31c4ada4ca45df5ae80ec691fa3a050d9c3e"
+ "sha1": "9652c787730e58515ce7b44fcafd2430ab424628"
},
"path_in_vcs": ""
} \ No newline at end of file
diff --git a/.circleci/config.yml b/.circleci/config.yml
index fa98f9c..ac0fae1 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -14,13 +14,15 @@ workflows:
# be easier on the CI hosts since presumably those fat lower layers will already be cached, and
# therefore faster than a minimal, customized alpine.
# MSRV
- 'rust:1.57.0'
+ 'rust:1.48.0'
]
# a hacky scheme to work around CircleCI's inability to deal with mutable docker tags, forcing us to
# get a nightly or stable toolchain via rustup instead of a mutable docker tag
toolchain_override: [
'__msrv__', # won't add any other toolchains, just uses what's in the docker image
+ '1.65.0', # minimum needed to build dev-dependencies
'stable',
+ 'beta',
'nightly'
]
@@ -49,6 +51,12 @@ jobs:
name: Log rustc version
command: rustc --version
- run:
+ name: Build main target
+ # update first to select dependencies appropriate for this toolchain
+ command: |
+ cargo update
+ cargo build
+ - run:
name: Check formatting
command: |
rustup component add rustfmt
@@ -64,13 +72,27 @@ jobs:
fi
- run:
name: Build all targets
- command: cargo build --all-targets
+ command: |
+ if [[ '<< parameters.toolchain_override >>' != '__msrv__' ]]
+ then
+ cargo build --all-targets
+ fi
- run:
name: Build without default features
- command: cargo build --no-default-features
+ command: |
+ cargo build --no-default-features
+ if [[ '<< parameters.toolchain_override >>' != '__msrv__' ]]
+ then
+ cargo build --no-default-features --all-targets
+ fi
- run:
name: Build with only alloc
- command: cargo build --no-default-features --features alloc
+ command: |
+ cargo build --no-default-features --features alloc
+ if [[ '<< parameters.toolchain_override >>' != '__msrv__' ]]
+ then
+ cargo build --no-default-features --features alloc --all-targets
+ fi
- run:
name: Add arm toolchain
command: rustup target add thumbv6m-none-eabi
@@ -81,8 +103,14 @@ jobs:
name: Build ARM with only alloc feature
command: cargo build --target thumbv6m-none-eabi --no-default-features --features alloc
- run:
+ # dev dependencies can't build on 1.48.0
name: Run tests
- command: cargo test --verbose
+ command: |
+ if [[ '<< parameters.toolchain_override >>' != '__msrv__' ]]
+ then
+ cargo test --no-default-features
+ cargo test
+ fi
- run:
name: Build docs
command: cargo doc --verbose
diff --git a/.github/ISSUE_TEMPLATE/general-purpose-issue.md b/.github/ISSUE_TEMPLATE/general-purpose-issue.md
new file mode 100644
index 0000000..b35b2f3
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/general-purpose-issue.md
@@ -0,0 +1,21 @@
+---
+name: General purpose issue
+about: General purpose issue
+title: Default issue
+labels: ''
+assignees: ''
+
+---
+
+# Before you file an issue
+
+- Did you read the docs?
+- Did you read the README?
+
+# The problem
+
+-
+
+# How I, the issue filer, am going to help solve it
+
+-
diff --git a/Android.bp b/Android.bp
index a08937a..22a37c0 100644
--- a/Android.bp
+++ b/Android.bp
@@ -42,10 +42,11 @@ rust_library {
host_supported: true,
crate_name: "base64",
cargo_env_compat: true,
- cargo_pkg_version: "0.21.0",
+ cargo_pkg_version: "0.21.7",
srcs: ["src/lib.rs"],
- edition: "2021",
+ edition: "2018",
features: [
+ "alloc",
"default",
"std",
],
diff --git a/Cargo.toml b/Cargo.toml
index 83f3da1..e508297 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -10,10 +10,10 @@
# See Cargo.toml.orig for the original contents.
[package]
-edition = "2021"
-rust-version = "1.57.0"
+edition = "2018"
+rust-version = "1.48.0"
name = "base64"
-version = "0.21.0"
+version = "0.21.7"
authors = [
"Alice Maz <alice@alicemaz.com>",
"Marshall Pierce <marshall@mpierce.org>",
@@ -32,33 +32,57 @@ categories = ["encoding"]
license = "MIT OR Apache-2.0"
repository = "https://github.com/marshallpierce/rust-base64"
+[package.metadata.docs.rs]
+rustdoc-args = ["--generate-link-to-definition"]
+
[profile.bench]
-debug = true
+debug = 2
[profile.test]
opt-level = 3
+[[example]]
+name = "base64"
+required-features = ["std"]
+
+[[test]]
+name = "tests"
+required-features = ["alloc"]
+
+[[test]]
+name = "encode"
+required-features = ["alloc"]
+
[[bench]]
name = "benchmarks"
harness = false
+required-features = ["std"]
+
+[dev-dependencies.clap]
+version = "3.2.25"
+features = ["derive"]
[dev-dependencies.criterion]
version = "0.4.0"
+[dev-dependencies.once_cell]
+version = "1"
+
[dev-dependencies.rand]
version = "0.8.5"
features = ["small_rng"]
[dev-dependencies.rstest]
-version = "0.12.0"
+version = "0.13.0"
[dev-dependencies.rstest_reuse]
-version = "0.3.0"
+version = "0.6.0"
-[dev-dependencies.structopt]
-version = "0.3.26"
+[dev-dependencies.strum]
+version = "0.25"
+features = ["derive"]
[features]
alloc = []
default = ["std"]
-std = []
+std = ["alloc"]
diff --git a/Cargo.toml.orig b/Cargo.toml.orig
index 33847db..4db5d26 100644
--- a/Cargo.toml.orig
+++ b/Cargo.toml.orig
@@ -1,6 +1,6 @@
[package]
name = "base64"
-version = "0.21.0"
+version = "0.21.7"
authors = ["Alice Maz <alice@alicemaz.com>", "Marshall Pierce <marshall@mpierce.org>"]
description = "encodes and decodes base64 as bytes or utf8"
repository = "https://github.com/marshallpierce/rust-base64"
@@ -9,26 +9,47 @@ readme = "README.md"
keywords = ["base64", "utf8", "encode", "decode", "no_std"]
categories = ["encoding"]
license = "MIT OR Apache-2.0"
-edition = "2021"
-rust-version = "1.57.0"
+edition = "2018"
+# dev-dependencies require 1.65, but the main code doesn't
+# This option was added in 1.56, keep it for when we bump MSRV.
+rust-version = "1.48.0"
[[bench]]
name = "benchmarks"
harness = false
+required-features = ["std"]
+
+[[example]]
+name = "base64"
+required-features = ["std"]
+
+[[test]]
+name = "tests"
+required-features = ["alloc"]
+
+[[test]]
+name = "encode"
+required-features = ["alloc"]
+
+[package.metadata.docs.rs]
+rustdoc-args = ["--generate-link-to-definition"]
[dev-dependencies]
criterion = "0.4.0"
rand = { version = "0.8.5", features = ["small_rng"] }
-# clap 4 would require 1.60
-structopt = "0.3.26"
+# Latest is 4.4.13 but specifies MSRV in Cargo.toml which means we can't depend
+# on it (even though we won't compile it in MSRV CI).
+clap = { version = "3.2.25", features = ["derive"] }
+strum = { version = "0.25", features = ["derive"] }
# test fixtures for engine tests
-rstest = "0.12.0"
-rstest_reuse = "0.3.0"
+rstest = "0.13.0"
+rstest_reuse = "0.6.0"
+once_cell = "1"
[features]
default = ["std"]
alloc = []
-std = []
+std = ["alloc"]
[profile.bench]
# Useful for better disassembly when using `perf record` and `perf annotate`
diff --git a/METADATA b/METADATA
index b85053b..b4a6927 100644
--- a/METADATA
+++ b/METADATA
@@ -1,23 +1,20 @@
# This project was upgraded with external_updater.
-# Usage: tools/external_updater/updater.sh update rust/crates/base64
-# For more info, check https://cs.android.com/android/platform/superproject/+/master:tools/external_updater/README.md
+# Usage: tools/external_updater/updater.sh update external/rust/crates/base64
+# For more info, check https://cs.android.com/android/platform/superproject/+/main:tools/external_updater/README.md
name: "base64"
description: "encodes and decodes base64 as bytes or utf8"
third_party {
- url {
- type: HOMEPAGE
- value: "https://crates.io/crates/base64"
- }
- url {
- type: ARCHIVE
- value: "https://static.crates.io/crates/base64/base64-0.21.0.crate"
- }
- version: "0.21.0"
license_type: NOTICE
last_upgrade_date {
- year: 2023
- month: 2
- day: 1
+ year: 2024
+ month: 1
+ day: 31
+ }
+ homepage: "https://crates.io/crates/base64"
+ identifier {
+ type: "Archive"
+ value: "https://static.crates.io/crates/base64/base64-0.21.7.crate"
+ version: "0.21.7"
}
}
diff --git a/README.md b/README.md
index d7b0885..f566756 100644
--- a/README.md
+++ b/README.md
@@ -63,7 +63,7 @@ optionally may allow other behaviors.
## Rust version compatibility
-The minimum supported Rust version is 1.57.0.
+The minimum supported Rust version is 1.48.0.
# Contributing
@@ -76,10 +76,10 @@ free time to give each PR the attention it deserves. I will get to everyone even
## Developing
-Benchmarks are in `benches/`. Running them requires nightly rust, but `rustup` makes it easy:
+Benchmarks are in `benches/`.
```bash
-rustup run nightly cargo bench
+cargo bench
```
## no_std
@@ -92,12 +92,12 @@ to bring back the support for heap allocations.
## Profiling
On Linux, you can use [perf](https://perf.wiki.kernel.org/index.php/Main_Page) for profiling. Then compile the
-benchmarks with `rustup nightly run cargo bench --no-run`.
+benchmarks with `cargo bench --no-run`.
Run the benchmark binary with `perf` (shown here filtering to one particular benchmark, which will make the results
easier to read). `perf` is only available to the root user on most systems as it fiddles with event counters in your
CPU, so use `sudo`. We need to run the actual benchmark binary, hence the path into `target`. You can see the actual
-full path with `rustup run nightly cargo bench -v`; it will print out the commands it runs. If you use the exact path
+full path with `cargo bench -v`; it will print out the commands it runs. If you use the exact path
that `bench` outputs, make sure you get the one that's for the benchmarks, not the tests. You may also want
to `cargo clean` so you have only one `benchmarks-` binary (they tend to accumulate).
diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md
index 4fcadda..0031215 100644
--- a/RELEASE-NOTES.md
+++ b/RELEASE-NOTES.md
@@ -1,7 +1,42 @@
-# 0.21.0
+# 0.21.7
+
+- Support getting an alphabet's contents as a str via `Alphabet::as_str()`
+
+# 0.21.6
+
+- Improved introductory documentation and example
+
+# 0.21.5
+
+- Add `Debug` and `Clone` impls for the general purpose Engine
+
+# 0.21.4
+
+- Make `encoded_len` `const`, allowing the creation of arrays sized to encode compile-time-known data lengths
+
+# 0.21.3
-(not yet released)
+- Implement `source` instead of `cause` on Error types
+- Roll back MSRV to 1.48.0 so Debian can continue to live in a time warp
+- Slightly faster chunked encoding for short inputs
+- Decrease binary size
+# 0.21.2
+
+- Rollback MSRV to 1.57.0 -- only dev dependencies need 1.60, not the main code
+
+# 0.21.1
+
+- Remove the possibility of panicking during decoded length calculations
+- `DecoderReader` no longer sometimes erroneously ignores
+ padding [#226](https://github.com/marshallpierce/rust-base64/issues/226)
+
+## Breaking changes
+
+- `Engine.internal_decode` return type changed
+- Update MSRV to 1.60.0
+
+# 0.21.0
## Migration
@@ -46,7 +81,8 @@ precisely, see the following table.
## Breaking changes
-- Re-exports of preconfigured engines in `engine` are removed in favor of `base64::prelude::...` that are better suited to those who wish to `use` the entire path to a name.
+- Re-exports of preconfigured engines in `engine` are removed in favor of `base64::prelude::...` that are better suited
+ to those who wish to `use` the entire path to a name.
# 0.21.0-beta.1
diff --git a/benches/benchmarks.rs b/benches/benchmarks.rs
index 61d542f..802c8cc 100644
--- a/benches/benchmarks.rs
+++ b/benches/benchmarks.rs
@@ -39,8 +39,7 @@ fn do_decode_bench_slice(b: &mut Bencher, &size: &usize) {
fill(&mut v);
let encoded = STANDARD.encode(&v);
- let mut buf = Vec::new();
- buf.resize(size, 0);
+ let mut buf = vec![0; size];
b.iter(|| {
STANDARD.decode_slice(&encoded, &mut buf).unwrap();
black_box(&buf);
@@ -52,8 +51,7 @@ fn do_decode_bench_stream(b: &mut Bencher, &size: &usize) {
fill(&mut v);
let encoded = STANDARD.encode(&v);
- let mut buf = Vec::new();
- buf.resize(size, 0);
+ let mut buf = vec![0; size];
buf.truncate(0);
b.iter(|| {
@@ -96,9 +94,8 @@ fn do_encode_bench_reuse_buf(b: &mut Bencher, &size: &usize) {
fn do_encode_bench_slice(b: &mut Bencher, &size: &usize) {
let mut v: Vec<u8> = Vec::with_capacity(size);
fill(&mut v);
- let mut buf = Vec::new();
// conservative estimate of encoded size
- buf.resize(v.len() * 2, 0);
+ let mut buf = vec![0; v.len() * 2];
b.iter(|| STANDARD.encode_slice(&v, &mut buf).unwrap());
}
diff --git a/clippy.toml b/clippy.toml
index 23b32c1..11d46a7 100644
--- a/clippy.toml
+++ b/clippy.toml
@@ -1 +1 @@
-msrv = "1.57.0"
+msrv = "1.48.0"
diff --git a/examples/base64.rs b/examples/base64.rs
index 0a214d2..0c8aa3f 100644
--- a/examples/base64.rs
+++ b/examples/base64.rs
@@ -2,51 +2,40 @@ use std::fs::File;
use std::io::{self, Read};
use std::path::PathBuf;
use std::process;
-use std::str::FromStr;
use base64::{alphabet, engine, read, write};
-use structopt::StructOpt;
+use clap::Parser;
-#[derive(Debug, StructOpt)]
+#[derive(Clone, Debug, Parser, strum::EnumString, Default)]
+#[strum(serialize_all = "kebab-case")]
enum Alphabet {
+ #[default]
Standard,
UrlSafe,
}
-impl Default for Alphabet {
- fn default() -> Self {
- Self::Standard
- }
-}
-
-impl FromStr for Alphabet {
- type Err = String;
- fn from_str(s: &str) -> Result<Self, String> {
- match s {
- "standard" => Ok(Self::Standard),
- "urlsafe" => Ok(Self::UrlSafe),
- _ => Err(format!("alphabet '{}' unrecognized", s)),
- }
- }
-}
-
/// Base64 encode or decode FILE (or standard input), to standard output.
-#[derive(Debug, StructOpt)]
+#[derive(Debug, Parser)]
struct Opt {
- /// decode data
- #[structopt(short = "d", long = "decode")]
+ /// Decode the base64-encoded input (default: encode the input as base64).
+ #[structopt(short = 'd', long = "decode")]
decode: bool,
- /// The alphabet to choose. Defaults to the standard base64 alphabet.
- /// Supported alphabets include "standard" and "urlsafe".
+
+ /// The encoding alphabet: "standard" (default) or "url-safe".
#[structopt(long = "alphabet")]
alphabet: Option<Alphabet>,
- /// The file to encode/decode.
- #[structopt(parse(from_os_str))]
+
+ /// Omit padding characters while encoding, and reject them while decoding.
+ #[structopt(short = 'p', long = "no-padding")]
+ no_padding: bool,
+
+ /// The file to encode or decode.
+ #[structopt(name = "FILE", parse(from_os_str))]
file: Option<PathBuf>,
}
fn main() {
- let opt = Opt::from_args();
+ let opt = Opt::parse();
let stdin;
let mut input: Box<dyn Read> = match opt.file {
None => {
@@ -66,7 +55,10 @@ fn main() {
Alphabet::Standard => alphabet::STANDARD,
Alphabet::UrlSafe => alphabet::URL_SAFE,
},
- engine::general_purpose::PAD,
+ match opt.no_padding {
+ true => engine::general_purpose::NO_PAD,
+ false => engine::general_purpose::PAD,
+ },
);
let stdout = io::stdout();
diff --git a/patches/doc-string-fix.patch b/patches/doc-string-fix.patch
deleted file mode 100644
index 15370bf..0000000
--- a/patches/doc-string-fix.patch
+++ /dev/null
@@ -1,41 +0,0 @@
-From 27fc4ecc69aab7b31e23aefbeed10b252b176d5a Mon Sep 17 00:00:00 2001
-From: Chris Wailes <chriswailes@google.com>
-Date: Thu, 9 Feb 2023 23:15:26 -0800
-Subject: [PATCH] Fix an error in the documentation strings
-
-This file explicitly turns on this warning meaning that it can't be
-overridden by command line lint arguments. The rustdoc from version
-1.66.1 will fail without this patch.
-
-Test: m rustdoc
-Bug: 263153841
-Change-Id: Idcf3779cbd46300691232302bba10c46143a2dbc
----
- src/decode.rs | 4 ++--
- 1 file changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/src/decode.rs b/src/decode.rs
-index e349240..3f627c7 100644
---- a/src/decode.rs
-+++ b/src/decode.rs
-@@ -71,7 +71,7 @@ impl error::Error for DecodeError {
- }
-
- ///Decode from string reference as octets.
--///Returns a Result containing a Vec<u8>.
-+///Returns a Result containing a `Vec<u8>`.
- ///Convenience `decode_config(input, base64::STANDARD);`.
- ///
- ///# Example
-@@ -90,7 +90,7 @@ pub fn decode<T: AsRef<[u8]>>(input: T) -> Result<Vec<u8>, DecodeError> {
- }
-
- ///Decode from string reference as octets.
--///Returns a Result containing a Vec<u8>.
-+///Returns a Result containing a `Vec<u8>`.
- ///
- ///# Example
- ///
---
-2.39.1.581.gbfd45094c4-goog
-
diff --git a/src/alphabet.rs b/src/alphabet.rs
index 7cd1b57..7895914 100644
--- a/src/alphabet.rs
+++ b/src/alphabet.rs
@@ -1,7 +1,7 @@
//! Provides [Alphabet] and constants for alphabets commonly used in the wild.
use crate::PAD_BYTE;
-use core::fmt;
+use core::{convert, fmt};
#[cfg(any(feature = "std", test))]
use std::error;
@@ -12,6 +12,10 @@ const ALPHABET_SIZE: usize = 64;
/// Common alphabets are provided as constants, and custom alphabets
/// can be made via `from_str` or the `TryFrom<str>` implementation.
///
+/// # Examples
+///
+/// Building and using a custom Alphabet:
+///
/// ```
/// let custom = base64::alphabet::Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/").unwrap();
///
@@ -19,6 +23,34 @@ const ALPHABET_SIZE: usize = 64;
/// &custom,
/// base64::engine::general_purpose::PAD);
/// ```
+///
+/// Building a const:
+///
+/// ```
+/// use base64::alphabet::Alphabet;
+///
+/// static CUSTOM: Alphabet = {
+/// // Result::unwrap() isn't const yet, but panic!() is OK
+/// match Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/") {
+/// Ok(x) => x,
+/// Err(_) => panic!("creation of alphabet failed"),
+/// }
+/// };
+/// ```
+///
+/// Building lazily:
+///
+/// ```
+/// use base64::{
+/// alphabet::Alphabet,
+/// engine::{general_purpose::GeneralPurpose, GeneralPurposeConfig},
+/// };
+/// use once_cell::sync::Lazy;
+///
+/// static CUSTOM: Lazy<Alphabet> = Lazy::new(||
+/// Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/").unwrap()
+/// );
+/// ```
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct Alphabet {
pub(crate) symbols: [u8; ALPHABET_SIZE],
@@ -91,9 +123,14 @@ impl Alphabet {
Ok(Self::from_str_unchecked(alphabet))
}
+
+ /// Create a `&str` from the symbols in the `Alphabet`
+ pub fn as_str(&self) -> &str {
+ core::str::from_utf8(&self.symbols).unwrap()
+ }
}
-impl TryFrom<&str> for Alphabet {
+impl convert::TryFrom<&str> for Alphabet {
type Error = ParseAlphabetError;
fn try_from(value: &str) -> Result<Self, Self::Error> {
@@ -128,21 +165,21 @@ impl fmt::Display for ParseAlphabetError {
#[cfg(any(feature = "std", test))]
impl error::Error for ParseAlphabetError {}
-/// The standard alphabet (uses `+` and `/`).
+/// The standard alphabet (with `+` and `/`) specified in [RFC 4648][].
///
-/// See [RFC 3548](https://tools.ietf.org/html/rfc3548#section-3).
+/// [RFC 4648]: https://datatracker.ietf.org/doc/html/rfc4648#section-4
pub const STANDARD: Alphabet = Alphabet::from_str_unchecked(
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/",
);
-/// The URL safe alphabet (uses `-` and `_`).
+/// The URL-safe alphabet (with `-` and `_`) specified in [RFC 4648][].
///
-/// See [RFC 3548](https://tools.ietf.org/html/rfc3548#section-4).
+/// [RFC 4648]: https://datatracker.ietf.org/doc/html/rfc4648#section-5
pub const URL_SAFE: Alphabet = Alphabet::from_str_unchecked(
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_",
);
-/// The `crypt(3)` alphabet (uses `.` and `/` as the first two values).
+/// The `crypt(3)` alphabet (with `.` and `/` as the _first_ two characters).
///
/// Not standardized, but folk wisdom on the net asserts that this alphabet is what crypt uses.
pub const CRYPT: Alphabet = Alphabet::from_str_unchecked(
@@ -154,7 +191,7 @@ pub const BCRYPT: Alphabet = Alphabet::from_str_unchecked(
"./ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789",
);
-/// The alphabet used in IMAP-modified UTF-7 (uses `+` and `,`).
+/// The alphabet used in IMAP-modified UTF-7 (with `+` and `,`).
///
/// See [RFC 3501](https://tools.ietf.org/html/rfc3501#section-5.1.3)
pub const IMAP_MUTF7: Alphabet = Alphabet::from_str_unchecked(
@@ -171,7 +208,7 @@ pub const BIN_HEX: Alphabet = Alphabet::from_str_unchecked(
#[cfg(test)]
mod tests {
use crate::alphabet::*;
- use std::convert::TryFrom as _;
+ use core::convert::TryFrom as _;
#[test]
fn detects_duplicate_start() {
@@ -238,4 +275,11 @@ mod tests {
.unwrap()
);
}
+
+ #[test]
+ fn str_same_as_input() {
+ let alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+ let a = Alphabet::try_from(alphabet).unwrap();
+ assert_eq!(alphabet, a.as_str())
+ }
}
diff --git a/src/chunked_encoder.rs b/src/chunked_encoder.rs
index 0457259..817b339 100644
--- a/src/chunked_encoder.rs
+++ b/src/chunked_encoder.rs
@@ -1,12 +1,12 @@
-#[cfg(any(feature = "alloc", feature = "std", test))]
+use crate::{
+ encode::add_padding,
+ engine::{Config, Engine},
+};
+#[cfg(any(feature = "alloc", test))]
use alloc::string::String;
-use core::cmp;
-#[cfg(any(feature = "alloc", feature = "std", test))]
+#[cfg(any(feature = "alloc", test))]
use core::str;
-use crate::encode::add_padding;
-use crate::engine::{Config, Engine};
-
/// The output mechanism for ChunkedEncoder's encoded bytes.
pub trait Sink {
type Error;
@@ -15,85 +15,51 @@ pub trait Sink {
fn write_encoded_bytes(&mut self, encoded: &[u8]) -> Result<(), Self::Error>;
}
-const BUF_SIZE: usize = 1024;
-
/// A base64 encoder that emits encoded bytes in chunks without heap allocation.
pub struct ChunkedEncoder<'e, E: Engine + ?Sized> {
engine: &'e E,
- max_input_chunk_len: usize,
}
impl<'e, E: Engine + ?Sized> ChunkedEncoder<'e, E> {
pub fn new(engine: &'e E) -> ChunkedEncoder<'e, E> {
- ChunkedEncoder {
- engine,
- max_input_chunk_len: max_input_length(BUF_SIZE, engine.config().encode_padding()),
- }
+ ChunkedEncoder { engine }
}
pub fn encode<S: Sink>(&self, bytes: &[u8], sink: &mut S) -> Result<(), S::Error> {
- let mut encode_buf: [u8; BUF_SIZE] = [0; BUF_SIZE];
- let mut input_index = 0;
-
- while input_index < bytes.len() {
- // either the full input chunk size, or it's the last iteration
- let input_chunk_len = cmp::min(self.max_input_chunk_len, bytes.len() - input_index);
-
- let chunk = &bytes[input_index..(input_index + input_chunk_len)];
-
- let mut b64_bytes_written = self.engine.internal_encode(chunk, &mut encode_buf);
-
- input_index += input_chunk_len;
- let more_input_left = input_index < bytes.len();
-
- if self.engine.config().encode_padding() && !more_input_left {
- // no more input, add padding if needed. Buffer will have room because
- // max_input_length leaves room for it.
- b64_bytes_written += add_padding(bytes.len(), &mut encode_buf[b64_bytes_written..]);
+ const BUF_SIZE: usize = 1024;
+ const CHUNK_SIZE: usize = BUF_SIZE / 4 * 3;
+
+ let mut buf = [0; BUF_SIZE];
+ for chunk in bytes.chunks(CHUNK_SIZE) {
+ let mut len = self.engine.internal_encode(chunk, &mut buf);
+ if chunk.len() != CHUNK_SIZE && self.engine.config().encode_padding() {
+ // Final, potentially partial, chunk.
+ // Only need to consider if padding is needed on a partial chunk since full chunk
+ // is a multiple of 3, which therefore won't be padded.
+ // Pad output to multiple of four bytes if required by config.
+ len += add_padding(len, &mut buf[len..]);
}
-
- sink.write_encoded_bytes(&encode_buf[0..b64_bytes_written])?;
+ sink.write_encoded_bytes(&buf[..len])?;
}
Ok(())
}
}
-/// Calculate the longest input that can be encoded for the given output buffer size.
-///
-/// If the config requires padding, two bytes of buffer space will be set aside so that the last
-/// chunk of input can be encoded safely.
-///
-/// The input length will always be a multiple of 3 so that no encoding state has to be carried over
-/// between chunks.
-fn max_input_length(encoded_buf_len: usize, padded: bool) -> usize {
- let effective_buf_len = if padded {
- // make room for padding
- encoded_buf_len
- .checked_sub(2)
- .expect("Don't use a tiny buffer")
- } else {
- encoded_buf_len
- };
-
- // No padding, so just normal base64 expansion.
- (effective_buf_len / 4) * 3
-}
-
// A really simple sink that just appends to a string
-#[cfg(any(feature = "alloc", feature = "std", test))]
+#[cfg(any(feature = "alloc", test))]
pub(crate) struct StringSink<'a> {
string: &'a mut String,
}
-#[cfg(any(feature = "alloc", feature = "std", test))]
+#[cfg(any(feature = "alloc", test))]
impl<'a> StringSink<'a> {
pub(crate) fn new(s: &mut String) -> StringSink {
StringSink { string: s }
}
}
-#[cfg(any(feature = "alloc", feature = "std", test))]
+#[cfg(any(feature = "alloc", test))]
impl<'a> Sink for StringSink<'a> {
type Error = ();
@@ -151,38 +117,13 @@ pub mod tests {
chunked_encode_matches_normal_encode_random(&helper);
}
- #[test]
- fn max_input_length_no_pad() {
- assert_eq!(768, max_input_length(1024, false));
- }
-
- #[test]
- fn max_input_length_with_pad_decrements_one_triple() {
- assert_eq!(765, max_input_length(1024, true));
- }
-
- #[test]
- fn max_input_length_with_pad_one_byte_short() {
- assert_eq!(765, max_input_length(1025, true));
- }
-
- #[test]
- fn max_input_length_with_pad_fits_exactly() {
- assert_eq!(768, max_input_length(1026, true));
- }
-
- #[test]
- fn max_input_length_cant_use_extra_single_encoded_byte() {
- assert_eq!(300, max_input_length(401, false));
- }
-
pub fn chunked_encode_matches_normal_encode_random<S: SinkTestHelper>(sink_test_helper: &S) {
let mut input_buf: Vec<u8> = Vec::new();
let mut output_buf = String::new();
let mut rng = rand::rngs::SmallRng::from_entropy();
let input_len_range = Uniform::new(1, 10_000);
- for _ in 0..5_000 {
+ for _ in 0..20_000 {
input_buf.clear();
output_buf.clear();
diff --git a/src/decode.rs b/src/decode.rs
index 0471518..5230fd3 100644
--- a/src/decode.rs
+++ b/src/decode.rs
@@ -1,5 +1,5 @@
use crate::engine::{general_purpose::STANDARD, DecodeEstimate, Engine};
-#[cfg(any(feature = "alloc", feature = "std", test))]
+#[cfg(any(feature = "alloc", test))]
use alloc::vec::Vec;
use core::fmt;
#[cfg(any(feature = "std", test))]
@@ -41,11 +41,7 @@ impl fmt::Display for DecodeError {
}
#[cfg(any(feature = "std", test))]
-impl error::Error for DecodeError {
- fn cause(&self) -> Option<&dyn error::Error> {
- None
- }
-}
+impl error::Error for DecodeError {}
/// Errors that can occur while decoding into a slice.
#[derive(Clone, Debug, PartialEq, Eq)]
@@ -69,7 +65,7 @@ impl fmt::Display for DecodeSliceError {
#[cfg(any(feature = "std", test))]
impl error::Error for DecodeSliceError {
- fn cause(&self) -> Option<&dyn error::Error> {
+ fn source(&self) -> Option<&(dyn error::Error + 'static)> {
match self {
DecodeSliceError::DecodeError(e) => Some(e),
DecodeSliceError::OutputSliceTooSmall => None,
@@ -87,7 +83,7 @@ impl From<DecodeError> for DecodeSliceError {
///
/// See [Engine::decode].
#[deprecated(since = "0.21.0", note = "Use Engine::decode")]
-#[cfg(any(feature = "alloc", feature = "std", test))]
+#[cfg(any(feature = "alloc", test))]
pub fn decode<T: AsRef<[u8]>>(input: T) -> Result<Vec<u8>, DecodeError> {
STANDARD.decode(input)
}
@@ -97,7 +93,7 @@ pub fn decode<T: AsRef<[u8]>>(input: T) -> Result<Vec<u8>, DecodeError> {
/// See [Engine::decode].
///Returns a `Result` containing a `Vec<u8>`.
#[deprecated(since = "0.21.0", note = "Use Engine::decode")]
-#[cfg(any(feature = "alloc", feature = "std", test))]
+#[cfg(any(feature = "alloc", test))]
pub fn decode_engine<E: Engine, T: AsRef<[u8]>>(
input: T,
engine: &E,
@@ -108,7 +104,7 @@ pub fn decode_engine<E: Engine, T: AsRef<[u8]>>(
/// Decode from string reference as octets.
///
/// See [Engine::decode_vec].
-#[cfg(any(feature = "alloc", feature = "std", test))]
+#[cfg(any(feature = "alloc", test))]
#[deprecated(since = "0.21.0", note = "Use Engine::decode_vec")]
pub fn decode_engine_vec<E: Engine, T: AsRef<[u8]>>(
input: T,
@@ -148,11 +144,6 @@ pub fn decode_engine_slice<E: Engine, T: AsRef<[u8]>>(
/// // start of the next quad of encoded symbols
/// assert_eq!(6, decoded_len_estimate(5));
/// ```
-///
-/// # Panics
-///
-/// Panics if decoded length estimation overflows.
-/// This would happen for sizes within a few bytes of the maximum value of `usize`.
pub fn decoded_len_estimate(encoded_len: usize) -> usize {
STANDARD
.internal_decoded_len_estimate(encoded_len)
diff --git a/src/encode.rs b/src/encode.rs
index cb17650..ae6d790 100644
--- a/src/encode.rs
+++ b/src/encode.rs
@@ -1,10 +1,10 @@
-#[cfg(any(feature = "alloc", feature = "std", test))]
+#[cfg(any(feature = "alloc", test))]
use alloc::string::String;
use core::fmt;
#[cfg(any(feature = "std", test))]
use std::error;
-#[cfg(any(feature = "alloc", feature = "std", test))]
+#[cfg(any(feature = "alloc", test))]
use crate::engine::general_purpose::STANDARD;
use crate::engine::{Config, Engine};
use crate::PAD_BYTE;
@@ -14,7 +14,7 @@ use crate::PAD_BYTE;
/// See [Engine::encode].
#[allow(unused)]
#[deprecated(since = "0.21.0", note = "Use Engine::encode")]
-#[cfg(any(feature = "alloc", feature = "std", test))]
+#[cfg(any(feature = "alloc", test))]
pub fn encode<T: AsRef<[u8]>>(input: T) -> String {
STANDARD.encode(input)
}
@@ -24,7 +24,7 @@ pub fn encode<T: AsRef<[u8]>>(input: T) -> String {
/// See [Engine::encode].
#[allow(unused)]
#[deprecated(since = "0.21.0", note = "Use Engine::encode")]
-#[cfg(any(feature = "alloc", feature = "std", test))]
+#[cfg(any(feature = "alloc", test))]
pub fn encode_engine<E: Engine, T: AsRef<[u8]>>(input: T, engine: &E) -> String {
engine.encode(input)
}
@@ -34,7 +34,7 @@ pub fn encode_engine<E: Engine, T: AsRef<[u8]>>(input: T, engine: &E) -> String
/// See [Engine::encode_string].
#[allow(unused)]
#[deprecated(since = "0.21.0", note = "Use Engine::encode_string")]
-#[cfg(any(feature = "alloc", feature = "std", test))]
+#[cfg(any(feature = "alloc", test))]
pub fn encode_engine_string<E: Engine, T: AsRef<[u8]>>(
input: T,
output_buf: &mut String,
@@ -77,7 +77,7 @@ pub(crate) fn encode_with_padding<E: Engine + ?Sized>(
let b64_bytes_written = engine.internal_encode(input, output);
let padding_bytes = if engine.config().encode_padding() {
- add_padding(input.len(), &mut output[b64_bytes_written..])
+ add_padding(b64_bytes_written, &mut output[b64_bytes_written..])
} else {
0
};
@@ -94,43 +94,51 @@ pub(crate) fn encode_with_padding<E: Engine + ?Sized>(
///
/// Returns `None` if the encoded length can't be represented in `usize`. This will happen for
/// input lengths in approximately the top quarter of the range of `usize`.
-pub fn encoded_len(bytes_len: usize, padding: bool) -> Option<usize> {
+pub const fn encoded_len(bytes_len: usize, padding: bool) -> Option<usize> {
let rem = bytes_len % 3;
let complete_input_chunks = bytes_len / 3;
- let complete_chunk_output = complete_input_chunks.checked_mul(4);
+ // `?` is disallowed in const, and `let Some(_) = _ else` requires 1.65.0, whereas this
+ // messier syntax works on 1.48
+ let complete_chunk_output =
+ if let Some(complete_chunk_output) = complete_input_chunks.checked_mul(4) {
+ complete_chunk_output
+ } else {
+ return None;
+ };
if rem > 0 {
if padding {
- complete_chunk_output.and_then(|c| c.checked_add(4))
+ complete_chunk_output.checked_add(4)
} else {
let encoded_rem = match rem {
1 => 2,
- 2 => 3,
- _ => unreachable!("Impossible remainder"),
+ // only other possible remainder is 2
+ // can't use a separate _ => unreachable!() in const fns in ancient rust versions
+ _ => 3,
};
- complete_chunk_output.and_then(|c| c.checked_add(encoded_rem))
+ complete_chunk_output.checked_add(encoded_rem)
}
} else {
- complete_chunk_output
+ Some(complete_chunk_output)
}
}
/// Write padding characters.
-/// `input_len` is the size of the original, not encoded, input.
+/// `unpadded_output_len` is the size of the unpadded but base64 encoded data.
/// `output` is the slice where padding should be written, of length at least 2.
///
/// Returns the number of padding bytes written.
-pub(crate) fn add_padding(input_len: usize, output: &mut [u8]) -> usize {
- // TODO base on encoded len to use cheaper mod by 4 (aka & 7)
- let rem = input_len % 3;
- let mut bytes_written = 0;
- for _ in 0..((3 - rem) % 3) {
- output[bytes_written] = PAD_BYTE;
- bytes_written += 1;
+pub(crate) fn add_padding(unpadded_output_len: usize, output: &mut [u8]) -> usize {
+ let pad_bytes = (4 - (unpadded_output_len % 4)) % 4;
+ // for just a couple bytes, this has better performance than using
+ // .fill(), or iterating over mutable refs, which call memset()
+ #[allow(clippy::needless_range_loop)]
+ for i in 0..pad_bytes {
+ output[i] = PAD_BYTE;
}
- bytes_written
+ pad_bytes
}
/// Errors that can occur while encoding into a slice.
@@ -149,11 +157,7 @@ impl fmt::Display for EncodeSliceError {
}
#[cfg(any(feature = "std", test))]
-impl error::Error for EncodeSliceError {
- fn cause(&self) -> Option<&dyn error::Error> {
- None
- }
-}
+impl error::Error for EncodeSliceError {}
#[cfg(test)]
mod tests {
@@ -434,18 +438,18 @@ mod tests {
let mut rng = rand::rngs::SmallRng::from_entropy();
- // cover our bases for length % 3
- for input_len in 0..10 {
+ // cover our bases for length % 4
+ for unpadded_output_len in 0..20 {
output.clear();
// fill output with random
- for _ in 0..10 {
+ for _ in 0..100 {
output.push(rng.gen());
}
let orig_output_buf = output.clone();
- let bytes_written = add_padding(input_len, &mut output);
+ let bytes_written = add_padding(unpadded_output_len, &mut output);
// make sure the part beyond bytes_written is the same garbage it was before
assert_eq!(orig_output_buf[bytes_written..], output[bytes_written..]);
diff --git a/src/engine/general_purpose/decode.rs b/src/engine/general_purpose/decode.rs
index e9fd788..21a386f 100644
--- a/src/engine/general_purpose/decode.rs
+++ b/src/engine/general_purpose/decode.rs
@@ -1,5 +1,5 @@
use crate::{
- engine::{general_purpose::INVALID_VALUE, DecodeEstimate, DecodePaddingMode},
+ engine::{general_purpose::INVALID_VALUE, DecodeEstimate, DecodeMetadata, DecodePaddingMode},
DecodeError, PAD_BYTE,
};
@@ -30,16 +30,11 @@ pub struct GeneralPurposeEstimate {
impl GeneralPurposeEstimate {
pub(crate) fn new(encoded_len: usize) -> Self {
+ // Formulas that won't overflow
Self {
- num_chunks: encoded_len
- .checked_add(INPUT_CHUNK_LEN - 1)
- .expect("Overflow when calculating number of chunks in input")
- / INPUT_CHUNK_LEN,
- decoded_len_estimate: encoded_len
- .checked_add(3)
- .expect("Overflow when calculating decoded len estimate")
- / 4
- * 3,
+ num_chunks: encoded_len / INPUT_CHUNK_LEN
+ + (encoded_len % INPUT_CHUNK_LEN > 0) as usize,
+ decoded_len_estimate: (encoded_len / 4 + (encoded_len % 4 > 0) as usize) * 3,
}
}
}
@@ -51,7 +46,7 @@ impl DecodeEstimate for GeneralPurposeEstimate {
}
/// Helper to avoid duplicating num_chunks calculation, which is costly on short inputs.
-/// Returns the number of bytes written, or an error.
+/// Returns the decode metadata, or an error.
// We're on the fragile edge of compiler heuristics here. If this is not inlined, slow. If this is
// inlined(always), a different slow. plain ol' inline makes the benchmarks happiest at the moment,
// but this is fragile and the best setting changes with only minor code modifications.
@@ -63,7 +58,7 @@ pub(crate) fn decode_helper(
decode_table: &[u8; 256],
decode_allow_trailing_bits: bool,
padding_mode: DecodePaddingMode,
-) -> Result<usize, DecodeError> {
+) -> Result<DecodeMetadata, DecodeError> {
let remainder_len = input.len() % INPUT_CHUNK_LEN;
// Because the fast decode loop writes in groups of 8 bytes (unrolled to
@@ -345,4 +340,44 @@ mod tests {
decode_chunk(&input[..], 0, &STANDARD.decode_table, &mut output).unwrap();
assert_eq!(&vec![b'f', b'o', b'o', b'b', b'a', b'r', 0, 0], &output);
}
+
+ #[test]
+ fn estimate_short_lengths() {
+ for (range, (num_chunks, decoded_len_estimate)) in [
+ (0..=0, (0, 0)),
+ (1..=4, (1, 3)),
+ (5..=8, (1, 6)),
+ (9..=12, (2, 9)),
+ (13..=16, (2, 12)),
+ (17..=20, (3, 15)),
+ ] {
+ for encoded_len in range {
+ let estimate = GeneralPurposeEstimate::new(encoded_len);
+ assert_eq!(num_chunks, estimate.num_chunks);
+ assert_eq!(decoded_len_estimate, estimate.decoded_len_estimate);
+ }
+ }
+ }
+
+ #[test]
+ fn estimate_via_u128_inflation() {
+ // cover both ends of usize
+ (0..1000)
+ .chain(usize::MAX - 1000..=usize::MAX)
+ .for_each(|encoded_len| {
+ // inflate to 128 bit type to be able to safely use the easy formulas
+ let len_128 = encoded_len as u128;
+
+ let estimate = GeneralPurposeEstimate::new(encoded_len);
+ assert_eq!(
+ ((len_128 + (INPUT_CHUNK_LEN - 1) as u128) / (INPUT_CHUNK_LEN as u128))
+ as usize,
+ estimate.num_chunks
+ );
+ assert_eq!(
+ ((len_128 + 3) / 4 * 3) as usize,
+ estimate.decoded_len_estimate
+ );
+ })
+ }
}
diff --git a/src/engine/general_purpose/decode_suffix.rs b/src/engine/general_purpose/decode_suffix.rs
index 5652035..e1e005d 100644
--- a/src/engine/general_purpose/decode_suffix.rs
+++ b/src/engine/general_purpose/decode_suffix.rs
@@ -1,13 +1,13 @@
use crate::{
- engine::{general_purpose::INVALID_VALUE, DecodePaddingMode},
+ engine::{general_purpose::INVALID_VALUE, DecodeMetadata, DecodePaddingMode},
DecodeError, PAD_BYTE,
};
/// Decode the last 1-8 bytes, checking for trailing set bits and padding per the provided
/// parameters.
///
-/// Returns the total number of bytes decoded, including the ones indicated as already written by
-/// `output_index`.
+/// Returns the decode metadata representing the total number of bytes decoded, including the ones
+/// indicated as already written by `output_index`.
pub(crate) fn decode_suffix(
input: &[u8],
input_index: usize,
@@ -16,7 +16,7 @@ pub(crate) fn decode_suffix(
decode_table: &[u8; 256],
decode_allow_trailing_bits: bool,
padding_mode: DecodePaddingMode,
-) -> Result<usize, DecodeError> {
+) -> Result<DecodeMetadata, DecodeError> {
// Decode any leftovers that aren't a complete input block of 8 bytes.
// Use a u64 as a stack-resident 8 byte buffer.
let mut leftover_bits: u64 = 0;
@@ -157,5 +157,12 @@ pub(crate) fn decode_suffix(
leftover_bits_appended_to_buf += 8;
}
- Ok(output_index)
+ Ok(DecodeMetadata::new(
+ output_index,
+ if padding_bytes > 0 {
+ Some(input_index + first_padding_index)
+ } else {
+ None
+ },
+ ))
}
diff --git a/src/engine/general_purpose/mod.rs b/src/engine/general_purpose/mod.rs
index af8897b..e0227f3 100644
--- a/src/engine/general_purpose/mod.rs
+++ b/src/engine/general_purpose/mod.rs
@@ -2,13 +2,14 @@
use crate::{
alphabet,
alphabet::Alphabet,
- engine::{Config, DecodePaddingMode},
+ engine::{Config, DecodeMetadata, DecodePaddingMode},
DecodeError,
};
use core::convert::TryInto;
mod decode;
pub(crate) mod decode_suffix;
+
pub use decode::GeneralPurposeEstimate;
pub(crate) const INVALID_VALUE: u8 = 255;
@@ -18,6 +19,8 @@ pub(crate) const INVALID_VALUE: u8 = 255;
/// - It uses no vector CPU instructions, so it will work on any system.
/// - It is reasonably fast (~2-3GiB/s).
/// - It is not constant-time, though, so it is vulnerable to timing side-channel attacks. For loading cryptographic keys, etc, it is suggested to use the forthcoming constant-time implementation.
+
+#[derive(Debug, Clone)]
pub struct GeneralPurpose {
encode_table: [u8; 64],
decode_table: [u8; 256],
@@ -170,7 +173,7 @@ impl super::Engine for GeneralPurpose {
input: &[u8],
output: &mut [u8],
estimate: Self::DecodeEstimate,
- ) -> Result<usize, DecodeError> {
+ ) -> Result<DecodeMetadata, DecodeError> {
decode::decode_helper(
input,
estimate,
diff --git a/src/engine/mod.rs b/src/engine/mod.rs
index 12dfaa8..16c05d7 100644
--- a/src/engine/mod.rs
+++ b/src/engine/mod.rs
@@ -1,14 +1,14 @@
//! Provides the [Engine] abstraction and out of the box implementations.
-#[cfg(any(feature = "alloc", feature = "std", test))]
+#[cfg(any(feature = "alloc", test))]
use crate::chunked_encoder;
use crate::{
encode::{encode_with_padding, EncodeSliceError},
encoded_len, DecodeError, DecodeSliceError,
};
-#[cfg(any(feature = "alloc", feature = "std", test))]
+#[cfg(any(feature = "alloc", test))]
use alloc::vec::Vec;
-#[cfg(any(feature = "alloc", feature = "std", test))]
+#[cfg(any(feature = "alloc", test))]
use alloc::{string::String, vec};
pub mod general_purpose;
@@ -62,10 +62,6 @@ pub trait Engine: Send + Sync {
/// As an optimization to prevent the decoded length from being calculated twice, it is
/// sometimes helpful to have a conservative estimate of the decoded size before doing the
/// decoding, so this calculation is done separately and passed to [Engine::decode()] as needed.
- ///
- /// # Panics
- ///
- /// Panics if decoded length estimation overflows.
#[doc(hidden)]
fn internal_decoded_len_estimate(&self, input_len: usize) -> Self::DecodeEstimate;
@@ -77,8 +73,6 @@ pub trait Engine: Send + Sync {
/// `decode_estimate` is the result of [Engine::internal_decoded_len_estimate()], which is passed in to avoid
/// calculating it again (expensive on short inputs).`
///
- /// Returns the number of bytes written to `output`.
- ///
/// Each complete 4-byte chunk of encoded data decodes to 3 bytes of decoded data, but this
/// function must also handle the final possibly partial chunk.
/// If the input length is not a multiple of 4, or uses padding bytes to reach a multiple of 4,
@@ -99,7 +93,7 @@ pub trait Engine: Send + Sync {
input: &[u8],
output: &mut [u8],
decode_estimate: Self::DecodeEstimate,
- ) -> Result<usize, DecodeError>;
+ ) -> Result<DecodeMetadata, DecodeError>;
/// Returns the config for this engine.
fn config(&self) -> &Self::Config;
@@ -119,15 +113,24 @@ pub trait Engine: Send + Sync {
/// engine::GeneralPurpose::new(&alphabet::URL_SAFE, general_purpose::NO_PAD);
///
/// let b64_url = CUSTOM_ENGINE.encode(b"hello internet~");
- #[cfg(any(feature = "alloc", feature = "std", test))]
+ #[cfg(any(feature = "alloc", test))]
+ #[inline]
fn encode<T: AsRef<[u8]>>(&self, input: T) -> String {
- let encoded_size = encoded_len(input.as_ref().len(), self.config().encode_padding())
- .expect("integer overflow when calculating buffer size");
- let mut buf = vec![0; encoded_size];
+ fn inner<E>(engine: &E, input_bytes: &[u8]) -> String
+ where
+ E: Engine + ?Sized,
+ {
+ let encoded_size = encoded_len(input_bytes.len(), engine.config().encode_padding())
+ .expect("integer overflow when calculating buffer size");
+
+ let mut buf = vec![0; encoded_size];
- encode_with_padding(input.as_ref(), &mut buf[..], self, encoded_size);
+ encode_with_padding(input_bytes, &mut buf[..], engine, encoded_size);
- String::from_utf8(buf).expect("Invalid UTF8")
+ String::from_utf8(buf).expect("Invalid UTF8")
+ }
+
+ inner(self, input.as_ref())
}
/// Encode arbitrary octets as base64 into a supplied `String`.
@@ -150,17 +153,21 @@ pub trait Engine: Send + Sync {
/// println!("{}", buf);
/// }
/// ```
- #[cfg(any(feature = "alloc", feature = "std", test))]
+ #[cfg(any(feature = "alloc", test))]
+ #[inline]
fn encode_string<T: AsRef<[u8]>>(&self, input: T, output_buf: &mut String) {
- let input_bytes = input.as_ref();
-
+ fn inner<E>(engine: &E, input_bytes: &[u8], output_buf: &mut String)
+ where
+ E: Engine + ?Sized,
{
let mut sink = chunked_encoder::StringSink::new(output_buf);
- chunked_encoder::ChunkedEncoder::new(self)
+ chunked_encoder::ChunkedEncoder::new(engine)
.encode(input_bytes, &mut sink)
.expect("Writing to a String shouldn't fail");
}
+
+ inner(self, input.as_ref(), output_buf)
}
/// Encode arbitrary octets as base64 into a supplied slice.
@@ -171,7 +178,8 @@ pub trait Engine: Send + Sync {
///
/// # Example
///
- /// ```rust
+ #[cfg_attr(feature = "alloc", doc = "```")]
+ #[cfg_attr(not(feature = "alloc"), doc = "```ignore")]
/// use base64::{Engine as _, engine::general_purpose};
/// let s = b"hello internet!";
/// let mut buf = Vec::new();
@@ -185,29 +193,38 @@ pub trait Engine: Send + Sync {
///
/// assert_eq!(s, general_purpose::STANDARD.decode(&buf).unwrap().as_slice());
/// ```
+ #[inline]
fn encode_slice<T: AsRef<[u8]>>(
&self,
input: T,
output_buf: &mut [u8],
) -> Result<usize, EncodeSliceError> {
- let input_bytes = input.as_ref();
+ fn inner<E>(
+ engine: &E,
+ input_bytes: &[u8],
+ output_buf: &mut [u8],
+ ) -> Result<usize, EncodeSliceError>
+ where
+ E: Engine + ?Sized,
+ {
+ let encoded_size = encoded_len(input_bytes.len(), engine.config().encode_padding())
+ .expect("usize overflow when calculating buffer size");
- let encoded_size = encoded_len(input_bytes.len(), self.config().encode_padding())
- .expect("usize overflow when calculating buffer size");
+ if output_buf.len() < encoded_size {
+ return Err(EncodeSliceError::OutputSliceTooSmall);
+ }
- if output_buf.len() < encoded_size {
- return Err(EncodeSliceError::OutputSliceTooSmall);
- }
+ let b64_output = &mut output_buf[0..encoded_size];
- let b64_output = &mut output_buf[0..encoded_size];
+ encode_with_padding(input_bytes, b64_output, engine, encoded_size);
- encode_with_padding(input_bytes, b64_output, self, encoded_size);
+ Ok(encoded_size)
+ }
- Ok(encoded_size)
+ inner(self, input.as_ref(), output_buf)
}
- /// Decode from string reference as octets using the specified [Engine].
- /// Returns a `Result` containing a `Vec<u8>`.
+ /// Decode the input into a new `Vec`.
///
/// # Example
///
@@ -225,25 +242,30 @@ pub trait Engine: Send + Sync {
/// .decode("aGVsbG8gaW50ZXJuZXR-Cg").unwrap();
/// println!("{:?}", bytes_url);
/// ```
- ///
- /// # Panics
- ///
- /// Panics if decoded length estimation overflows.
- /// This would happen for sizes within a few bytes of the maximum value of `usize`.
- #[cfg(any(feature = "alloc", feature = "std", test))]
+ #[cfg(any(feature = "alloc", test))]
+ #[inline]
fn decode<T: AsRef<[u8]>>(&self, input: T) -> Result<Vec<u8>, DecodeError> {
- let input_bytes = input.as_ref();
+ fn inner<E>(engine: &E, input_bytes: &[u8]) -> Result<Vec<u8>, DecodeError>
+ where
+ E: Engine + ?Sized,
+ {
+ let estimate = engine.internal_decoded_len_estimate(input_bytes.len());
+ let mut buffer = vec![0; estimate.decoded_len_estimate()];
- let estimate = self.internal_decoded_len_estimate(input_bytes.len());
- let mut buffer = vec![0; estimate.decoded_len_estimate()];
+ let bytes_written = engine
+ .internal_decode(input_bytes, &mut buffer, estimate)?
+ .decoded_len;
- let bytes_written = self.internal_decode(input_bytes, &mut buffer, estimate)?;
- buffer.truncate(bytes_written);
+ buffer.truncate(bytes_written);
- Ok(buffer)
+ Ok(buffer)
+ }
+
+ inner(self, input.as_ref())
}
- /// Decode from string reference as octets.
+ /// Decode the `input` into the supplied `buffer`.
+ ///
/// Writes into the supplied `Vec`, which may allocate if its internal buffer isn't big enough.
/// Returns a `Result` containing an empty tuple, aka `()`.
///
@@ -272,39 +294,45 @@ pub trait Engine: Send + Sync {
/// println!("{:?}", buffer);
/// }
/// ```
- ///
- /// # Panics
- ///
- /// Panics if decoded length estimation overflows.
- /// This would happen for sizes within a few bytes of the maximum value of `usize`.
- #[cfg(any(feature = "alloc", feature = "std", test))]
+ #[cfg(any(feature = "alloc", test))]
+ #[inline]
fn decode_vec<T: AsRef<[u8]>>(
&self,
input: T,
buffer: &mut Vec<u8>,
) -> Result<(), DecodeError> {
- let input_bytes = input.as_ref();
+ fn inner<E>(engine: &E, input_bytes: &[u8], buffer: &mut Vec<u8>) -> Result<(), DecodeError>
+ where
+ E: Engine + ?Sized,
+ {
+ let starting_output_len = buffer.len();
+ let estimate = engine.internal_decoded_len_estimate(input_bytes.len());
+
+ let total_len_estimate = estimate
+ .decoded_len_estimate()
+ .checked_add(starting_output_len)
+ .expect("Overflow when calculating output buffer length");
- let starting_output_len = buffer.len();
+ buffer.resize(total_len_estimate, 0);
- let estimate = self.internal_decoded_len_estimate(input_bytes.len());
- let total_len_estimate = estimate
- .decoded_len_estimate()
- .checked_add(starting_output_len)
- .expect("Overflow when calculating output buffer length");
- buffer.resize(total_len_estimate, 0);
+ let buffer_slice = &mut buffer.as_mut_slice()[starting_output_len..];
- let buffer_slice = &mut buffer.as_mut_slice()[starting_output_len..];
- let bytes_written = self.internal_decode(input_bytes, buffer_slice, estimate)?;
+ let bytes_written = engine
+ .internal_decode(input_bytes, buffer_slice, estimate)?
+ .decoded_len;
- buffer.truncate(starting_output_len + bytes_written);
+ buffer.truncate(starting_output_len + bytes_written);
- Ok(())
+ Ok(())
+ }
+
+ inner(self, input.as_ref(), buffer)
}
/// Decode the input into the provided output slice.
///
- /// Returns an error if `output` is smaller than the estimated decoded length.
+ /// Returns the number of bytes written to the slice, or an error if `output` is smaller than
+ /// the estimated decoded length.
///
/// This will not write any bytes past exactly what is decoded (no stray garbage bytes at the end).
///
@@ -312,29 +340,39 @@ pub trait Engine: Send + Sync {
///
/// See [Engine::decode_slice_unchecked] for a version that panics instead of returning an error
/// if the output buffer is too small.
- ///
- /// # Panics
- ///
- /// Panics if decoded length estimation overflows.
- /// This would happen for sizes within a few bytes of the maximum value of `usize`.
+ #[inline]
fn decode_slice<T: AsRef<[u8]>>(
&self,
input: T,
output: &mut [u8],
) -> Result<usize, DecodeSliceError> {
- let input_bytes = input.as_ref();
+ fn inner<E>(
+ engine: &E,
+ input_bytes: &[u8],
+ output: &mut [u8],
+ ) -> Result<usize, DecodeSliceError>
+ where
+ E: Engine + ?Sized,
+ {
+ let estimate = engine.internal_decoded_len_estimate(input_bytes.len());
- let estimate = self.internal_decoded_len_estimate(input_bytes.len());
- if output.len() < estimate.decoded_len_estimate() {
- return Err(DecodeSliceError::OutputSliceTooSmall);
+ if output.len() < estimate.decoded_len_estimate() {
+ return Err(DecodeSliceError::OutputSliceTooSmall);
+ }
+
+ engine
+ .internal_decode(input_bytes, output, estimate)
+ .map_err(|e| e.into())
+ .map(|dm| dm.decoded_len)
}
- self.internal_decode(input_bytes, output, estimate)
- .map_err(|e| e.into())
+ inner(self, input.as_ref(), output)
}
/// Decode the input into the provided output slice.
///
+ /// Returns the number of bytes written to the slice.
+ ///
/// This will not write any bytes past exactly what is decoded (no stray garbage bytes at the end).
///
/// See [crate::decoded_len_estimate] for calculating buffer sizes.
@@ -344,22 +382,27 @@ pub trait Engine: Send + Sync {
///
/// # Panics
///
- /// Panics if decoded length estimation overflows.
- /// This would happen for sizes within a few bytes of the maximum value of `usize`.
- ///
/// Panics if the provided output buffer is too small for the decoded data.
+ #[inline]
fn decode_slice_unchecked<T: AsRef<[u8]>>(
&self,
input: T,
output: &mut [u8],
) -> Result<usize, DecodeError> {
- let input_bytes = input.as_ref();
+ fn inner<E>(engine: &E, input_bytes: &[u8], output: &mut [u8]) -> Result<usize, DecodeError>
+ where
+ E: Engine + ?Sized,
+ {
+ engine
+ .internal_decode(
+ input_bytes,
+ output,
+ engine.internal_decoded_len_estimate(input_bytes.len()),
+ )
+ .map(|dm| dm.decoded_len)
+ }
- self.internal_decode(
- input_bytes,
- output,
- self.internal_decoded_len_estimate(input_bytes.len()),
- )
+ inner(self, input.as_ref(), output)
}
}
@@ -387,11 +430,6 @@ pub trait DecodeEstimate {
///
/// The estimate must be no larger than the next largest complete triple of decoded bytes.
/// That is, the final quad of tokens to decode may be assumed to be complete with no padding.
- ///
- /// # Panics
- ///
- /// Panics if decoded length estimation overflows.
- /// This would happen for sizes within a few bytes of the maximum value of `usize`.
fn decoded_len_estimate(&self) -> usize;
}
@@ -408,3 +446,21 @@ pub enum DecodePaddingMode {
/// Padding must be absent -- for when you want predictable padding, without any wasted bytes.
RequireNone,
}
+
+/// Metadata about the result of a decode operation
+#[derive(PartialEq, Eq, Debug)]
+pub struct DecodeMetadata {
+ /// Number of decoded bytes output
+ pub(crate) decoded_len: usize,
+ /// Offset of the first padding byte in the input, if any
+ pub(crate) padding_offset: Option<usize>,
+}
+
+impl DecodeMetadata {
+ pub(crate) fn new(decoded_bytes: usize, padding_index: Option<usize>) -> Self {
+ Self {
+ decoded_len: decoded_bytes,
+ padding_offset: padding_index,
+ }
+ }
+}
diff --git a/src/engine/naive.rs b/src/engine/naive.rs
index 6665c5e..6a50cbe 100644
--- a/src/engine/naive.rs
+++ b/src/engine/naive.rs
@@ -2,12 +2,11 @@ use crate::{
alphabet::Alphabet,
engine::{
general_purpose::{self, decode_table, encode_table},
- Config, DecodeEstimate, DecodePaddingMode, Engine,
+ Config, DecodeEstimate, DecodeMetadata, DecodePaddingMode, Engine,
},
DecodeError, PAD_BYTE,
};
-use alloc::ops::BitOr;
-use std::ops::{BitAnd, Shl, Shr};
+use std::ops::{BitAnd, BitOr, Shl, Shr};
/// Comparatively simple implementation that can be used as something to compare against in tests
pub struct Naive {
@@ -112,7 +111,7 @@ impl Engine for Naive {
input: &[u8],
output: &mut [u8],
estimate: Self::DecodeEstimate,
- ) -> Result<usize, DecodeError> {
+ ) -> Result<DecodeMetadata, DecodeError> {
if estimate.rem == 1 {
// trailing whitespace is so common that it's worth it to check the last byte to
// possibly return a better error message
diff --git a/src/engine/tests.rs b/src/engine/tests.rs
index 906bba0..b048005 100644
--- a/src/engine/tests.rs
+++ b/src/engine/tests.rs
@@ -8,13 +8,16 @@ use rand::{
};
use rstest::rstest;
use rstest_reuse::{apply, template};
-use std::{collections, fmt};
+use std::{collections, fmt, io::Read as _};
use crate::{
alphabet::{Alphabet, STANDARD},
encode::add_padding,
encoded_len,
- engine::{general_purpose, naive, Config, DecodeEstimate, DecodePaddingMode, Engine},
+ engine::{
+ general_purpose, naive, Config, DecodeEstimate, DecodeMetadata, DecodePaddingMode, Engine,
+ },
+ read::DecoderReader,
tests::{assert_encode_sanity, random_alphabet, random_config},
DecodeError, PAD_BYTE,
};
@@ -24,9 +27,20 @@ use crate::{
#[rstest(engine_wrapper,
case::general_purpose(GeneralPurposeWrapper {}),
case::naive(NaiveWrapper {}),
+case::decoder_reader(DecoderReaderEngineWrapper {}),
)]
fn all_engines<E: EngineWrapper>(engine_wrapper: E) {}
+/// Some decode tests don't make sense for use with `DecoderReader` as they are difficult to
+/// reason about or otherwise inapplicable given how DecoderReader slice up its input along
+/// chunk boundaries.
+#[template]
+#[rstest(engine_wrapper,
+case::general_purpose(GeneralPurposeWrapper {}),
+case::naive(NaiveWrapper {}),
+)]
+fn all_engines_except_decoder_reader<E: EngineWrapper>(engine_wrapper: E) {}
+
#[apply(all_engines)]
fn rfc_test_vectors_std_alphabet<E: EngineWrapper>(engine_wrapper: E) {
let data = vec![
@@ -86,7 +100,7 @@ fn rfc_test_vectors_std_alphabet<E: EngineWrapper>(engine_wrapper: E) {
&encoded_without_padding,
&std::str::from_utf8(&encode_buf[0..encode_len]).unwrap()
);
- let pad_len = add_padding(orig.len(), &mut encode_buf[encode_len..]);
+ let pad_len = add_padding(encode_len, &mut encode_buf[encode_len..]);
assert_eq!(encoded.as_bytes(), &encode_buf[..encode_len + pad_len]);
let decode_len = engine
@@ -195,7 +209,10 @@ fn encode_doesnt_write_extra_bytes<E: EngineWrapper>(engine_wrapper: E) {
// pad so we can decode it in case our random engine requires padding
let pad_len = if padded {
- add_padding(orig_len, &mut encode_buf[prefix_len + encoded_len_no_pad..])
+ add_padding(
+ encoded_len_no_pad,
+ &mut encode_buf[prefix_len + encoded_len_no_pad..],
+ )
} else {
0
};
@@ -382,7 +399,7 @@ fn decode_detect_invalid_last_symbol_every_possible_two_symbols<E: EngineWrapper
for b in 0_u8..=255 {
let mut b64 = vec![0_u8; 4];
assert_eq!(2, engine.internal_encode(&[b], &mut b64[..]));
- let _ = add_padding(1, &mut b64[2..]);
+ let _ = add_padding(2, &mut b64[2..]);
assert!(base64_to_bytes.insert(b64, vec![b]).is_none());
}
@@ -442,7 +459,7 @@ fn decode_detect_invalid_last_symbol_every_possible_three_symbols<E: EngineWrapp
bytes[1] = b2;
let mut b64 = vec![0_u8; 4];
assert_eq!(3, engine.internal_encode(&bytes, &mut b64[..]));
- let _ = add_padding(2, &mut b64[3..]);
+ let _ = add_padding(3, &mut b64[3..]);
let mut v = Vec::with_capacity(2);
v.extend_from_slice(&bytes[..]);
@@ -549,7 +566,7 @@ fn decode_invalid_byte_error<E: EngineWrapper>(engine_wrapper: E) {
let len_range = distributions::Uniform::new(1, 1_000);
- for _ in 0..10_000 {
+ for _ in 0..100_000 {
let alphabet = random_alphabet(&mut rng);
let engine = E::random_alphabet(&mut rng, alphabet);
@@ -573,7 +590,7 @@ fn decode_invalid_byte_error<E: EngineWrapper>(engine_wrapper: E) {
let invalid_byte: u8 = loop {
let byte: u8 = rng.gen();
- if alphabet.symbols.contains(&byte) {
+ if alphabet.symbols.contains(&byte) || byte == PAD_BYTE {
continue;
} else {
break byte;
@@ -597,14 +614,16 @@ fn decode_invalid_byte_error<E: EngineWrapper>(engine_wrapper: E) {
/// Any amount of padding anywhere before the final non padding character = invalid byte at first
/// pad byte.
/// From this, we know padding must extend to the end of the input.
-#[apply(all_engines)]
+// DecoderReader pseudo-engine detects InvalidLastSymbol instead of InvalidLength because it
+// can end a decode on the quad that happens to contain the start of the padding
+#[apply(all_engines_except_decoder_reader)]
fn decode_padding_before_final_non_padding_char_error_invalid_byte<E: EngineWrapper>(
engine_wrapper: E,
) {
let mut rng = seeded_rng();
// the different amounts of proper padding, w/ offset from end for the last non-padding char
- let suffixes = vec![("/w==", 2), ("iYu=", 1), ("zzzz", 0)];
+ let suffixes = [("/w==", 2), ("iYu=", 1), ("zzzz", 0)];
let prefix_quads_range = distributions::Uniform::from(0..=256);
@@ -641,10 +660,13 @@ fn decode_padding_before_final_non_padding_char_error_invalid_byte<E: EngineWrap
}
}
-/// Any amount of padding before final chunk that crosses over into final chunk with 1-4 bytes =
-/// invalid byte at first pad byte (except for 1 byte suffix = invalid length).
-/// From this we know the padding must start in the final chunk.
-#[apply(all_engines)]
+/// Any amount of padding before final chunk that crosses over into final chunk with 2-4 bytes =
+/// invalid byte at first pad byte.
+/// From this and [decode_padding_starts_before_final_chunk_error_invalid_length] we know the
+/// padding must start in the final chunk.
+// DecoderReader pseudo-engine detects InvalidLastSymbol instead of InvalidLength because it
+// can end a decode on the quad that happens to contain the start of the padding
+#[apply(all_engines_except_decoder_reader)]
fn decode_padding_starts_before_final_chunk_error_invalid_byte<E: EngineWrapper>(
engine_wrapper: E,
) {
@@ -652,8 +674,8 @@ fn decode_padding_starts_before_final_chunk_error_invalid_byte<E: EngineWrapper>
// must have at least one prefix quad
let prefix_quads_range = distributions::Uniform::from(1..256);
- // including 1 just to make sure that it really does produce invalid length
- let suffix_pad_len_range = distributions::Uniform::from(1..=4);
+ // excluding 1 since we don't care about invalid length in this test
+ let suffix_pad_len_range = distributions::Uniform::from(2..=4);
for mode in all_pad_modes() {
// we don't encode so we don't care about encode padding
let engine = E::standard_with_pad_mode(true, mode);
@@ -671,14 +693,48 @@ fn decode_padding_starts_before_final_chunk_error_invalid_byte<E: EngineWrapper>
let padding_start = encoded.len() - padding_len;
encoded[padding_start..].fill(PAD_BYTE);
- if suffix_len == 1 {
- assert_eq!(Err(DecodeError::InvalidLength), engine.decode(&encoded),);
- } else {
- assert_eq!(
- Err(DecodeError::InvalidByte(padding_start, PAD_BYTE)),
- engine.decode(&encoded),
- );
- }
+ assert_eq!(
+ Err(DecodeError::InvalidByte(padding_start, PAD_BYTE)),
+ engine.decode(&encoded),
+ "suffix_len: {}, padding_len: {}, b64: {}",
+ suffix_len,
+ padding_len,
+ std::str::from_utf8(&encoded).unwrap()
+ );
+ }
+ }
+}
+
+/// Any amount of padding before final chunk that crosses over into final chunk with 1 byte =
+/// invalid length.
+/// From this we know the padding must start in the final chunk.
+// DecoderReader pseudo-engine detects InvalidByte instead of InvalidLength because it starts by
+// decoding only the available complete quads
+#[apply(all_engines_except_decoder_reader)]
+fn decode_padding_starts_before_final_chunk_error_invalid_length<E: EngineWrapper>(
+ engine_wrapper: E,
+) {
+ let mut rng = seeded_rng();
+
+ // must have at least one prefix quad
+ let prefix_quads_range = distributions::Uniform::from(1..256);
+ for mode in all_pad_modes() {
+ // we don't encode so we don't care about encode padding
+ let engine = E::standard_with_pad_mode(true, mode);
+ for _ in 0..100_000 {
+ let mut encoded = "ABCD"
+ .repeat(prefix_quads_range.sample(&mut rng))
+ .into_bytes();
+ encoded.resize(encoded.len() + 1, PAD_BYTE);
+
+ // amount of padding must be long enough to extend back from suffix into previous
+ // quads
+ let padding_len = rng.gen_range(1 + 1..encoded.len());
+ // no non-padding after padding in this test, so padding goes to the end
+ let padding_start = encoded.len() - padding_len;
+ encoded[padding_start..].fill(PAD_BYTE);
+
+ assert_eq!(Err(DecodeError::InvalidLength), engine.decode(&encoded),);
}
}
}
@@ -787,7 +843,9 @@ fn decode_malleability_test_case_2_byte_suffix_no_padding<E: EngineWrapper>(engi
}
// https://eprint.iacr.org/2022/361.pdf table 2, test 7
-#[apply(all_engines)]
+// DecoderReader pseudo-engine gets InvalidByte at 8 (extra padding) since it decodes the first
+// two complete quads correctly.
+#[apply(all_engines_except_decoder_reader)]
fn decode_malleability_test_case_2_byte_suffix_too_much_padding<E: EngineWrapper>(
engine_wrapper: E,
) {
@@ -811,7 +869,7 @@ fn decode_pad_mode_requires_canonical_accepts_canonical<E: EngineWrapper>(engine
fn decode_pad_mode_requires_canonical_rejects_non_canonical<E: EngineWrapper>(engine_wrapper: E) {
let engine = E::standard_with_pad_mode(true, DecodePaddingMode::RequireCanonical);
- let suffixes = vec!["/w", "/w=", "iYU"];
+ let suffixes = ["/w", "/w=", "iYU"];
for num_prefix_quads in 0..256 {
for &suffix in suffixes.iter() {
let mut encoded = "AAAA".repeat(num_prefix_quads);
@@ -838,7 +896,7 @@ fn decode_pad_mode_requires_no_padding_accepts_no_padding<E: EngineWrapper>(engi
fn decode_pad_mode_requires_no_padding_rejects_any_padding<E: EngineWrapper>(engine_wrapper: E) {
let engine = E::standard_with_pad_mode(true, DecodePaddingMode::RequireNone);
- let suffixes = vec!["/w=", "/w==", "iYU="];
+ let suffixes = ["/w=", "/w==", "iYU="];
for num_prefix_quads in 0..256 {
for &suffix in suffixes.iter() {
let mut encoded = "AAAA".repeat(num_prefix_quads);
@@ -861,7 +919,11 @@ fn decode_pad_mode_indifferent_padding_accepts_anything<E: EngineWrapper>(engine
}
//this is a MAY in the rfc: https://tools.ietf.org/html/rfc4648#section-3.3
-#[apply(all_engines)]
+// DecoderReader pseudo-engine finds the first padding, but doesn't report it as an error,
+// because in the next decode it finds more padding, which is reported as InvalidByte, just
+// with an offset at its position in the second decode, rather than being linked to the start
+// of the padding that was first seen in the previous decode.
+#[apply(all_engines_except_decoder_reader)]
fn decode_pad_byte_in_penultimate_quad_error<E: EngineWrapper>(engine_wrapper: E) {
for mode in all_pad_modes() {
// we don't encode so we don't care about encode padding
@@ -895,7 +957,7 @@ fn decode_pad_byte_in_penultimate_quad_error<E: EngineWrapper>(engine_wrapper: E
num_prefix_quads * 4 + num_valid_bytes_penultimate_quad,
b'=',
),
- engine.decode(&s).unwrap_err()
+ engine.decode(&s).unwrap_err(),
);
}
}
@@ -955,7 +1017,9 @@ fn decode_absurd_pad_error<E: EngineWrapper>(engine_wrapper: E) {
}
}
-#[apply(all_engines)]
+// DecoderReader pseudo-engine detects InvalidByte instead of InvalidLength because it starts by
+// decoding only the available complete quads
+#[apply(all_engines_except_decoder_reader)]
fn decode_too_much_padding_returns_error<E: EngineWrapper>(engine_wrapper: E) {
for mode in all_pad_modes() {
// we don't encode so we don't care about encode padding
@@ -981,7 +1045,9 @@ fn decode_too_much_padding_returns_error<E: EngineWrapper>(engine_wrapper: E) {
}
}
-#[apply(all_engines)]
+// DecoderReader pseudo-engine detects InvalidByte instead of InvalidLength because it starts by
+// decoding only the available complete quads
+#[apply(all_engines_except_decoder_reader)]
fn decode_padding_followed_by_non_padding_returns_error<E: EngineWrapper>(engine_wrapper: E) {
for mode in all_pad_modes() {
// we don't encode so we don't care about encode padding
@@ -1079,27 +1145,43 @@ fn decode_too_few_symbols_in_final_quad_error<E: EngineWrapper>(engine_wrapper:
}
}
-#[apply(all_engines)]
+// DecoderReader pseudo-engine can't handle DecodePaddingMode::RequireNone since it will decode
+// a complete quad with padding in it before encountering the stray byte that makes it an invalid
+// length
+#[apply(all_engines_except_decoder_reader)]
fn decode_invalid_trailing_bytes<E: EngineWrapper>(engine_wrapper: E) {
for mode in all_pad_modes() {
- // we don't encode so we don't care about encode padding
- let engine = E::standard_with_pad_mode(true, mode);
+ do_invalid_trailing_byte(E::standard_with_pad_mode(true, mode), mode);
+ }
+}
- for num_prefix_quads in 0..256 {
- let mut s: String = "ABCD".repeat(num_prefix_quads);
- s.push_str("Cg==\n");
+#[apply(all_engines)]
+fn decode_invalid_trailing_bytes_all_modes<E: EngineWrapper>(engine_wrapper: E) {
+ // excluding no padding mode because the DecoderWrapper pseudo-engine will fail with
+ // InvalidPadding because it will decode the last complete quad with padding first
+ for mode in pad_modes_allowing_padding() {
+ do_invalid_trailing_byte(E::standard_with_pad_mode(true, mode), mode);
+ }
+}
- // The case of trailing newlines is common enough to warrant a test for a good error
- // message.
- assert_eq!(
- Err(DecodeError::InvalidByte(num_prefix_quads * 4 + 4, b'\n')),
- engine.decode(&s)
- );
+#[apply(all_engines)]
+fn decode_invalid_trailing_padding_as_invalid_length<E: EngineWrapper>(engine_wrapper: E) {
+ // excluding no padding mode because the DecoderWrapper pseudo-engine will fail with
+ // InvalidPadding because it will decode the last complete quad with padding first
+ for mode in pad_modes_allowing_padding() {
+ do_invalid_trailing_padding_as_invalid_length(E::standard_with_pad_mode(true, mode), mode);
+ }
+}
- // extra padding, however, is still InvalidLength
- let s = s.replace('\n', "=");
- assert_eq!(Err(DecodeError::InvalidLength), engine.decode(s));
- }
+// DecoderReader pseudo-engine can't handle DecodePaddingMode::RequireNone since it will decode
+// a complete quad with padding in it before encountering the stray byte that makes it an invalid
+// length
+#[apply(all_engines_except_decoder_reader)]
+fn decode_invalid_trailing_padding_as_invalid_length_all_modes<E: EngineWrapper>(
+ engine_wrapper: E,
+) {
+ for mode in all_pad_modes() {
+ do_invalid_trailing_padding_as_invalid_length(E::standard_with_pad_mode(true, mode), mode);
}
}
@@ -1178,6 +1260,53 @@ fn decode_into_slice_fits_in_precisely_sized_slice<E: EngineWrapper>(engine_wrap
}
#[apply(all_engines)]
+fn inner_decode_reports_padding_position<E: EngineWrapper>(engine_wrapper: E) {
+ let mut b64 = String::new();
+ let mut decoded = Vec::new();
+ let engine = E::standard();
+
+ for pad_position in 1..10_000 {
+ b64.clear();
+ decoded.clear();
+ // plenty of room for original data
+ decoded.resize(pad_position, 0);
+
+ for _ in 0..pad_position {
+ b64.push('A');
+ }
+ // finish the quad with padding
+ for _ in 0..(4 - (pad_position % 4)) {
+ b64.push('=');
+ }
+
+ let decode_res = engine.internal_decode(
+ b64.as_bytes(),
+ &mut decoded[..],
+ engine.internal_decoded_len_estimate(b64.len()),
+ );
+ if pad_position % 4 < 2 {
+ // impossible padding
+ assert_eq!(
+ Err(DecodeError::InvalidByte(pad_position, PAD_BYTE)),
+ decode_res
+ );
+ } else {
+ let decoded_bytes = pad_position / 4 * 3
+ + match pad_position % 4 {
+ 0 => 0,
+ 2 => 1,
+ 3 => 2,
+ _ => unreachable!(),
+ };
+ assert_eq!(
+ Ok(DecodeMetadata::new(decoded_bytes, Some(pad_position))),
+ decode_res
+ );
+ }
+ }
+}
+
+#[apply(all_engines)]
fn decode_length_estimate_delta<E: EngineWrapper>(engine_wrapper: E) {
for engine in [E::standard(), E::standard_unpadded()] {
for &padding in &[true, false] {
@@ -1200,6 +1329,64 @@ fn decode_length_estimate_delta<E: EngineWrapper>(engine_wrapper: E) {
}
}
+#[apply(all_engines)]
+fn estimate_via_u128_inflation<E: EngineWrapper>(engine_wrapper: E) {
+ // cover both ends of usize
+ (0..1000)
+ .chain(usize::MAX - 1000..=usize::MAX)
+ .for_each(|encoded_len| {
+ // inflate to 128 bit type to be able to safely use the easy formulas
+ let len_128 = encoded_len as u128;
+
+ let estimate = E::standard()
+ .internal_decoded_len_estimate(encoded_len)
+ .decoded_len_estimate();
+
+ // This check is a little too strict: it requires using the (len + 3) / 4 * 3 formula
+ // or equivalent, but until other engines come along that use a different formula
+ // requiring that we think more carefully about what the allowable criteria are, this
+ // will do.
+ assert_eq!(
+ ((len_128 + 3) / 4 * 3) as usize,
+ estimate,
+ "enc len {}",
+ encoded_len
+ );
+ })
+}
+
+fn do_invalid_trailing_byte(engine: impl Engine, mode: DecodePaddingMode) {
+ for num_prefix_quads in 0..256 {
+ let mut s: String = "ABCD".repeat(num_prefix_quads);
+ s.push_str("Cg==\n");
+
+ // The case of trailing newlines is common enough to warrant a test for a good error
+ // message.
+ assert_eq!(
+ Err(DecodeError::InvalidByte(num_prefix_quads * 4 + 4, b'\n')),
+ engine.decode(&s),
+ "mode: {:?}, input: {}",
+ mode,
+ s
+ );
+ }
+}
+
+fn do_invalid_trailing_padding_as_invalid_length(engine: impl Engine, mode: DecodePaddingMode) {
+ for num_prefix_quads in 0..256 {
+ let mut s: String = "ABCD".repeat(num_prefix_quads);
+ s.push_str("Cg===");
+
+ assert_eq!(
+ Err(DecodeError::InvalidLength),
+ engine.decode(&s),
+ "mode: {:?}, input: {}",
+ mode,
+ s
+ );
+ }
+}
+
/// Returns a tuple of the original data length, the encoded data length (just data), and the length including padding.
///
/// Vecs provided should be empty.
@@ -1219,7 +1406,7 @@ fn generate_random_encoded_data<E: Engine, R: rand::Rng, D: distributions::Distr
let base_encoded_len = engine.internal_encode(&orig_data[..], &mut encode_buf[..]);
let enc_len_with_padding = if padding {
- base_encoded_len + add_padding(orig_len, &mut encode_buf[base_encoded_len..])
+ base_encoded_len + add_padding(base_encoded_len, &mut encode_buf[base_encoded_len..])
} else {
base_encoded_len
};
@@ -1249,11 +1436,7 @@ fn fill_rand_len<R: rand::Rng>(vec: &mut Vec<u8>, rng: &mut R, len: usize) {
}
}
-fn prefixed_data<'i, 'd>(
- input_with_prefix: &'i mut String,
- prefix_len: usize,
- data: &'d str,
-) -> &'i str {
+fn prefixed_data<'i>(input_with_prefix: &'i mut String, prefix_len: usize, data: &str) -> &'i str {
input_with_prefix.truncate(prefix_len);
input_with_prefix.push_str(data);
input_with_prefix.as_str()
@@ -1405,6 +1588,103 @@ impl EngineWrapper for NaiveWrapper {
}
}
+/// A pseudo-Engine that routes all decoding through [DecoderReader]
+struct DecoderReaderEngine<E: Engine> {
+ engine: E,
+}
+
+impl<E: Engine> From<E> for DecoderReaderEngine<E> {
+ fn from(value: E) -> Self {
+ Self { engine: value }
+ }
+}
+
+impl<E: Engine> Engine for DecoderReaderEngine<E> {
+ type Config = E::Config;
+ type DecodeEstimate = E::DecodeEstimate;
+
+ fn internal_encode(&self, input: &[u8], output: &mut [u8]) -> usize {
+ self.engine.internal_encode(input, output)
+ }
+
+ fn internal_decoded_len_estimate(&self, input_len: usize) -> Self::DecodeEstimate {
+ self.engine.internal_decoded_len_estimate(input_len)
+ }
+
+ fn internal_decode(
+ &self,
+ input: &[u8],
+ output: &mut [u8],
+ decode_estimate: Self::DecodeEstimate,
+ ) -> Result<DecodeMetadata, DecodeError> {
+ let mut reader = DecoderReader::new(input, &self.engine);
+ let mut buf = vec![0; input.len()];
+ // to avoid effects like not detecting invalid length due to progressively growing
+ // the output buffer in read_to_end etc, read into a big enough buffer in one go
+ // to make behavior more consistent with normal engines
+ let _ = reader
+ .read(&mut buf)
+ .and_then(|len| {
+ buf.truncate(len);
+ // make sure we got everything
+ reader.read_to_end(&mut buf)
+ })
+ .map_err(|io_error| {
+ *io_error
+ .into_inner()
+ .and_then(|inner| inner.downcast::<DecodeError>().ok())
+ .unwrap()
+ })?;
+ output[..buf.len()].copy_from_slice(&buf);
+ Ok(DecodeMetadata::new(
+ buf.len(),
+ input
+ .iter()
+ .enumerate()
+ .filter(|(_offset, byte)| **byte == PAD_BYTE)
+ .map(|(offset, _byte)| offset)
+ .next(),
+ ))
+ }
+
+ fn config(&self) -> &Self::Config {
+ self.engine.config()
+ }
+}
+
+struct DecoderReaderEngineWrapper {}
+
+impl EngineWrapper for DecoderReaderEngineWrapper {
+ type Engine = DecoderReaderEngine<general_purpose::GeneralPurpose>;
+
+ fn standard() -> Self::Engine {
+ GeneralPurposeWrapper::standard().into()
+ }
+
+ fn standard_unpadded() -> Self::Engine {
+ GeneralPurposeWrapper::standard_unpadded().into()
+ }
+
+ fn standard_with_pad_mode(
+ encode_pad: bool,
+ decode_pad_mode: DecodePaddingMode,
+ ) -> Self::Engine {
+ GeneralPurposeWrapper::standard_with_pad_mode(encode_pad, decode_pad_mode).into()
+ }
+
+ fn standard_allow_trailing_bits() -> Self::Engine {
+ GeneralPurposeWrapper::standard_allow_trailing_bits().into()
+ }
+
+ fn random<R: rand::Rng>(rng: &mut R) -> Self::Engine {
+ GeneralPurposeWrapper::random(rng).into()
+ }
+
+ fn random_alphabet<R: rand::Rng>(rng: &mut R, alphabet: &Alphabet) -> Self::Engine {
+ GeneralPurposeWrapper::random_alphabet(rng, alphabet).into()
+ }
+}
+
fn seeded_rng() -> impl rand::Rng {
rngs::SmallRng::from_entropy()
}
@@ -1417,6 +1697,13 @@ fn all_pad_modes() -> Vec<DecodePaddingMode> {
]
}
+fn pad_modes_allowing_padding() -> Vec<DecodePaddingMode> {
+ vec![
+ DecodePaddingMode::Indifferent,
+ DecodePaddingMode::RequireCanonical,
+ ]
+}
+
fn assert_all_suffixes_ok<E: Engine>(engine: E, suffixes: Vec<&str>) {
for num_prefix_quads in 0..256 {
for &suffix in suffixes.iter() {
diff --git a/src/lib.rs b/src/lib.rs
index cc9d628..6ec3c12 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,101 +1,127 @@
-//! # Getting started
+//! Correct, fast, and configurable [base64][] decoding and encoding. Base64
+//! transports binary data efficiently in contexts where only plain text is
+//! allowed.
//!
-//! 1. Perhaps one of the preconfigured engines in [engine::general_purpose] will suit, e.g.
-//! [engine::general_purpose::STANDARD_NO_PAD].
-//! - These are re-exported in [prelude] with a `BASE64_` prefix for those who prefer to
-//! `use base64::prelude::*` or equivalent, e.g. [prelude::BASE64_STANDARD_NO_PAD]
-//! 1. If not, choose which alphabet you want. Most usage will want [alphabet::STANDARD] or [alphabet::URL_SAFE].
-//! 1. Choose which [Engine] implementation you want. For the moment there is only one: [engine::GeneralPurpose].
-//! 1. Configure the engine appropriately using the engine's `Config` type.
-//! - This is where you'll select whether to add padding (when encoding) or expect it (when
-//! decoding). If given the choice, prefer no padding.
-//! 1. Build the engine using the selected alphabet and config.
+//! [base64]: https://developer.mozilla.org/en-US/docs/Glossary/Base64
//!
-//! For more detail, see below.
+//! # Usage
//!
-//! ## Alphabets
+//! Use an [`Engine`] to decode or encode base64, configured with the base64
+//! alphabet and padding behavior best suited to your application.
//!
-//! An [alphabet::Alphabet] defines what ASCII symbols are used to encode to or decode from.
+//! ## Engine setup
//!
-//! Constants in [alphabet] like [alphabet::STANDARD] or [alphabet::URL_SAFE] provide commonly used
-//! alphabets, but you can also build your own custom [alphabet::Alphabet] if needed.
+//! There is more than one way to encode a stream of bytes as “base64”.
+//! Different applications use different encoding
+//! [alphabets][alphabet::Alphabet] and
+//! [padding behaviors][engine::general_purpose::GeneralPurposeConfig].
//!
-//! ## Engines
+//! ### Encoding alphabet
//!
-//! Once you have an `Alphabet`, you can pick which `Engine` you want. A few parts of the public
-//! API provide a default, but otherwise the user must provide an `Engine` to use.
+//! Almost all base64 [alphabets][alphabet::Alphabet] use `A-Z`, `a-z`, and
+//! `0-9`, which gives nearly 64 characters (26 + 26 + 10 = 62), but they differ
+//! in their choice of their final 2.
//!
-//! See [Engine] for more.
+//! Most applications use the [standard][alphabet::STANDARD] alphabet specified
+//! in [RFC 4648][rfc-alphabet]. If that’s all you need, you can get started
+//! quickly by using the pre-configured
+//! [`STANDARD`][engine::general_purpose::STANDARD] engine, which is also available
+//! in the [`prelude`] module as shown here, if you prefer a minimal `use`
+//! footprint.
//!
-//! ## Config
+#![cfg_attr(feature = "alloc", doc = "```")]
+#![cfg_attr(not(feature = "alloc"), doc = "```ignore")]
+//! use base64::prelude::*;
//!
-//! In addition to an `Alphabet`, constructing an `Engine` also requires an [engine::Config]. Each
-//! `Engine` has a corresponding `Config` implementation since different `Engine`s may offer different
-//! levels of configurability.
-//!
-//! # Encoding
-//!
-//! Several different encoding methods on [Engine] are available to you depending on your desire for
-//! convenience vs performance.
+//! # fn main() -> Result<(), base64::DecodeError> {
+//! assert_eq!(BASE64_STANDARD.decode(b"+uwgVQA=")?, b"\xFA\xEC\x20\x55\0");
+//! assert_eq!(BASE64_STANDARD.encode(b"\xFF\xEC\x20\x55\0"), "/+wgVQA=");
+//! # Ok(())
+//! # }
+//! ```
//!
-//! | Method | Output | Allocates |
-//! | ------------------------ | ---------------------------- | ------------------------------ |
-//! | [Engine::encode] | Returns a new `String` | Always |
-//! | [Engine::encode_string] | Appends to provided `String` | Only if `String` needs to grow |
-//! | [Engine::encode_slice] | Writes to provided `&[u8]` | Never - fastest |
+//! [rfc-alphabet]: https://datatracker.ietf.org/doc/html/rfc4648#section-4
//!
-//! All of the encoding methods will pad as per the engine's config.
+//! Other common alphabets are available in the [`alphabet`] module.
//!
-//! # Decoding
+//! #### URL-safe alphabet
//!
-//! Just as for encoding, there are different decoding methods available.
+//! The standard alphabet uses `+` and `/` as its two non-alphanumeric tokens,
+//! which cannot be safely used in URL’s without encoding them as `%2B` and
+//! `%2F`.
//!
-//! | Method | Output | Allocates |
-//! | ------------------------ | ----------------------------- | ------------------------------ |
-//! | [Engine::decode] | Returns a new `Vec<u8>` | Always |
-//! | [Engine::decode_vec] | Appends to provided `Vec<u8>` | Only if `Vec` needs to grow |
-//! | [Engine::decode_slice] | Writes to provided `&[u8]` | Never - fastest |
+//! To avoid that, some applications use a [“URL-safe” alphabet][alphabet::URL_SAFE],
+//! which uses `-` and `_` instead. To use that alternative alphabet, use the
+//! [`URL_SAFE`][engine::general_purpose::URL_SAFE] engine. This example doesn't
+//! use [`prelude`] to show what a more explicit `use` would look like.
//!
-//! Unlike encoding, where all possible input is valid, decoding can fail (see [DecodeError]).
+#![cfg_attr(feature = "alloc", doc = "```")]
+#![cfg_attr(not(feature = "alloc"), doc = "```ignore")]
+//! use base64::{engine::general_purpose::URL_SAFE, Engine as _};
//!
-//! Input can be invalid because it has invalid characters or invalid padding. The nature of how
-//! padding is checked depends on the engine's config.
-//! Whitespace in the input is invalid, just like any other non-base64 byte.
+//! # fn main() -> Result<(), base64::DecodeError> {
+//! assert_eq!(URL_SAFE.decode(b"-uwgVQA=")?, b"\xFA\xEC\x20\x55\0");
+//! assert_eq!(URL_SAFE.encode(b"\xFF\xEC\x20\x55\0"), "_-wgVQA=");
+//! # Ok(())
+//! # }
+//! ```
//!
-//! # `Read` and `Write`
+//! ### Padding characters
//!
-//! To decode a [std::io::Read] of b64 bytes, wrap a reader (file, network socket, etc) with
-//! [read::DecoderReader].
+//! Each base64 character represents 6 bits (2⁶ = 64) of the original binary
+//! data, and every 3 bytes of input binary data will encode to 4 base64
+//! characters (8 bits × 3 = 6 bits × 4 = 24 bits).
//!
-//! To write raw bytes and have them b64 encoded on the fly, wrap a [std::io::Write] with
-//! [write::EncoderWriter].
+//! When the input is not an even multiple of 3 bytes in length, [canonical][]
+//! base64 encoders insert padding characters at the end, so that the output
+//! length is always a multiple of 4:
//!
-//! There is some performance overhead (15% or so) because of the necessary buffer shuffling --
-//! still fast enough that almost nobody cares. Also, these implementations do not heap allocate.
+//! [canonical]: https://datatracker.ietf.org/doc/html/rfc4648#section-3.5
//!
-//! # `Display`
+#![cfg_attr(feature = "alloc", doc = "```")]
+#![cfg_attr(not(feature = "alloc"), doc = "```ignore")]
+//! use base64::{engine::general_purpose::STANDARD, Engine as _};
//!
-//! See [display] for how to transparently base64 data via a `Display` implementation.
+//! assert_eq!(STANDARD.encode(b""), "");
+//! assert_eq!(STANDARD.encode(b"f"), "Zg==");
+//! assert_eq!(STANDARD.encode(b"fo"), "Zm8=");
+//! assert_eq!(STANDARD.encode(b"foo"), "Zm9v");
+//! ```
//!
-//! # Examples
+//! Canonical encoding ensures that base64 encodings will be exactly the same,
+//! byte-for-byte, regardless of input length. But the `=` padding characters
+//! aren’t necessary for decoding, and they may be omitted by using a
+//! [`NO_PAD`][engine::general_purpose::NO_PAD] configuration:
//!
-//! ## Using predefined engines
+#![cfg_attr(feature = "alloc", doc = "```")]
+#![cfg_attr(not(feature = "alloc"), doc = "```ignore")]
+//! use base64::{engine::general_purpose::STANDARD_NO_PAD, Engine as _};
//!
+//! assert_eq!(STANDARD_NO_PAD.encode(b""), "");
+//! assert_eq!(STANDARD_NO_PAD.encode(b"f"), "Zg");
+//! assert_eq!(STANDARD_NO_PAD.encode(b"fo"), "Zm8");
+//! assert_eq!(STANDARD_NO_PAD.encode(b"foo"), "Zm9v");
//! ```
-//! use base64::{Engine as _, engine::general_purpose};
//!
-//! let orig = b"data";
-//! let encoded: String = general_purpose::STANDARD_NO_PAD.encode(orig);
-//! assert_eq!("ZGF0YQ", encoded);
-//! assert_eq!(orig.as_slice(), &general_purpose::STANDARD_NO_PAD.decode(encoded).unwrap());
+//! The pre-configured `NO_PAD` engines will reject inputs containing padding
+//! `=` characters. To encode without padding and still accept padding while
+//! decoding, create an [engine][engine::general_purpose::GeneralPurpose] with
+//! that [padding mode][engine::DecodePaddingMode].
//!
-//! // or, URL-safe
-//! let encoded_url = general_purpose::URL_SAFE_NO_PAD.encode(orig);
+#![cfg_attr(feature = "alloc", doc = "```")]
+#![cfg_attr(not(feature = "alloc"), doc = "```ignore")]
+//! # use base64::{engine::general_purpose::STANDARD_NO_PAD, Engine as _};
+//! assert_eq!(STANDARD_NO_PAD.decode(b"Zm8="), Err(base64::DecodeError::InvalidPadding));
//! ```
//!
-//! ## Custom alphabet, config, and engine
+//! ### Further customization
//!
-//! ```
+//! Decoding and encoding behavior can be customized by creating an
+//! [engine][engine::GeneralPurpose] with an [alphabet][alphabet::Alphabet] and
+//! [padding configuration][engine::GeneralPurposeConfig]:
+//!
+#![cfg_attr(feature = "alloc", doc = "```")]
+#![cfg_attr(not(feature = "alloc"), doc = "```ignore")]
//! use base64::{engine, alphabet, Engine as _};
//!
//! // bizarro-world base64: +/ as the first symbols instead of the last
@@ -115,6 +141,81 @@
//!
//! ```
//!
+//! ## Memory allocation
+//!
+//! The [decode][Engine::decode()] and [encode][Engine::encode()] engine methods
+//! allocate memory for their results – `decode` returns a `Vec<u8>` and
+//! `encode` returns a `String`. To instead decode or encode into a buffer that
+//! you allocated, use one of the alternative methods:
+//!
+//! #### Decoding
+//!
+//! | Method | Output | Allocates memory |
+//! | -------------------------- | ----------------------------- | ----------------------------- |
+//! | [`Engine::decode`] | returns a new `Vec<u8>` | always |
+//! | [`Engine::decode_vec`] | appends to provided `Vec<u8>` | if `Vec` lacks capacity |
+//! | [`Engine::decode_slice`] | writes to provided `&[u8]` | never
+//!
+//! #### Encoding
+//!
+//! | Method | Output | Allocates memory |
+//! | -------------------------- | ---------------------------- | ------------------------------ |
+//! | [`Engine::encode`] | returns a new `String` | always |
+//! | [`Engine::encode_string`] | appends to provided `String` | if `String` lacks capacity |
+//! | [`Engine::encode_slice`] | writes to provided `&[u8]` | never |
+//!
+//! ## Input and output
+//!
+//! The `base64` crate can [decode][Engine::decode()] and
+//! [encode][Engine::encode()] values in memory, or
+//! [`DecoderReader`][read::DecoderReader] and
+//! [`EncoderWriter`][write::EncoderWriter] provide streaming decoding and
+//! encoding for any [readable][std::io::Read] or [writable][std::io::Write]
+//! byte stream.
+//!
+//! #### Decoding
+//!
+#![cfg_attr(feature = "std", doc = "```")]
+#![cfg_attr(not(feature = "std"), doc = "```ignore")]
+//! # use std::io;
+//! use base64::{engine::general_purpose::STANDARD, read::DecoderReader};
+//!
+//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
+//! let mut input = io::stdin();
+//! let mut decoder = DecoderReader::new(&mut input, &STANDARD);
+//! io::copy(&mut decoder, &mut io::stdout())?;
+//! # Ok(())
+//! # }
+//! ```
+//!
+//! #### Encoding
+//!
+#![cfg_attr(feature = "std", doc = "```")]
+#![cfg_attr(not(feature = "std"), doc = "```ignore")]
+//! # use std::io;
+//! use base64::{engine::general_purpose::STANDARD, write::EncoderWriter};
+//!
+//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
+//! let mut output = io::stdout();
+//! let mut encoder = EncoderWriter::new(&mut output, &STANDARD);
+//! io::copy(&mut io::stdin(), &mut encoder)?;
+//! # Ok(())
+//! # }
+//! ```
+//!
+//! #### Display
+//!
+//! If you only need a base64 representation for implementing the
+//! [`Display`][std::fmt::Display] trait, use
+//! [`Base64Display`][display::Base64Display]:
+//!
+//! ```
+//! use base64::{display::Base64Display, engine::general_purpose::STANDARD};
+//!
+//! let value = Base64Display::new(b"\0\x01\x02\x03", &STANDARD);
+//! assert_eq!("base64: AAECAw==", format!("base64: {}", value));
+//! ```
+//!
//! # Panics
//!
//! If length calculations result in overflowing `usize`, a panic will result.
@@ -136,10 +237,8 @@
#![allow(clippy::single_component_path_imports)]
#![cfg_attr(not(any(feature = "std", test)), no_std)]
-#[cfg(all(feature = "alloc", not(any(feature = "std", test))))]
+#[cfg(any(feature = "alloc", test))]
extern crate alloc;
-#[cfg(any(feature = "std", test))]
-extern crate std as alloc;
// has to be included at top level because of the way rstest_reuse defines its macros
#[cfg(test)]
@@ -159,14 +258,14 @@ pub mod alphabet;
mod encode;
#[allow(deprecated)]
-#[cfg(any(feature = "alloc", feature = "std", test))]
+#[cfg(any(feature = "alloc", test))]
pub use crate::encode::{encode, encode_engine, encode_engine_string};
#[allow(deprecated)]
pub use crate::encode::{encode_engine_slice, encoded_len, EncodeSliceError};
mod decode;
#[allow(deprecated)]
-#[cfg(any(feature = "alloc", feature = "std", test))]
+#[cfg(any(feature = "alloc", test))]
pub use crate::decode::{decode, decode_engine, decode_engine_vec};
#[allow(deprecated)]
pub use crate::decode::{decode_engine_slice, decoded_len_estimate, DecodeError, DecodeSliceError};
diff --git a/src/prelude.rs b/src/prelude.rs
index fbeb5ba..df5fdb4 100644
--- a/src/prelude.rs
+++ b/src/prelude.rs
@@ -5,7 +5,8 @@
//!
//! # Examples
//!
-//! ```
+#![cfg_attr(feature = "alloc", doc = "```")]
+#![cfg_attr(not(feature = "alloc"), doc = "```ignore")]
//! use base64::prelude::{Engine as _, BASE64_STANDARD_NO_PAD};
//!
//! assert_eq!("c29tZSBieXRlcw", &BASE64_STANDARD_NO_PAD.encode(b"some bytes"));
diff --git a/src/read/decoder.rs b/src/read/decoder.rs
index 4888c9c..b656ae3 100644
--- a/src/read/decoder.rs
+++ b/src/read/decoder.rs
@@ -1,4 +1,4 @@
-use crate::{engine::Engine, DecodeError};
+use crate::{engine::Engine, DecodeError, PAD_BYTE};
use std::{cmp, fmt, io};
// This should be large, but it has to fit on the stack.
@@ -46,13 +46,15 @@ pub struct DecoderReader<'e, E: Engine, R: io::Read> {
// Technically we only need to hold 2 bytes but then we'd need a separate temporary buffer to
// decode 3 bytes into and then juggle copying one byte into the provided read buf and the rest
// into here, which seems like a lot of complexity for 1 extra byte of storage.
- decoded_buffer: [u8; 3],
+ decoded_buffer: [u8; DECODED_CHUNK_SIZE],
// index of start of decoded data
decoded_offset: usize,
// length of decoded data
decoded_len: usize,
// used to provide accurate offsets in errors
total_b64_decoded: usize,
+ // offset of previously seen padding, if any
+ padding_offset: Option<usize>,
}
impl<'e, E: Engine, R: io::Read> fmt::Debug for DecoderReader<'e, E, R> {
@@ -64,6 +66,7 @@ impl<'e, E: Engine, R: io::Read> fmt::Debug for DecoderReader<'e, E, R> {
.field("decoded_offset", &self.decoded_offset)
.field("decoded_len", &self.decoded_len)
.field("total_b64_decoded", &self.total_b64_decoded)
+ .field("padding_offset", &self.padding_offset)
.finish()
}
}
@@ -81,6 +84,7 @@ impl<'e, E: Engine, R: io::Read> DecoderReader<'e, E, R> {
decoded_offset: 0,
decoded_len: 0,
total_b64_decoded: 0,
+ padding_offset: None,
}
}
@@ -127,20 +131,28 @@ impl<'e, E: Engine, R: io::Read> DecoderReader<'e, E, R> {
/// caller's responsibility to choose the number of b64 bytes to decode correctly.
///
/// Returns a Result with the number of decoded bytes written to `buf`.
- fn decode_to_buf(&mut self, num_bytes: usize, buf: &mut [u8]) -> io::Result<usize> {
- debug_assert!(self.b64_len >= num_bytes);
+ fn decode_to_buf(&mut self, b64_len_to_decode: usize, buf: &mut [u8]) -> io::Result<usize> {
+ debug_assert!(self.b64_len >= b64_len_to_decode);
debug_assert!(self.b64_offset + self.b64_len <= BUF_SIZE);
debug_assert!(!buf.is_empty());
- let decoded = self
+ let b64_to_decode = &self.b64_buffer[self.b64_offset..self.b64_offset + b64_len_to_decode];
+ let decode_metadata = self
.engine
.internal_decode(
- &self.b64_buffer[self.b64_offset..self.b64_offset + num_bytes],
+ b64_to_decode,
buf,
- self.engine.internal_decoded_len_estimate(num_bytes),
+ self.engine.internal_decoded_len_estimate(b64_len_to_decode),
)
.map_err(|e| match e {
DecodeError::InvalidByte(offset, byte) => {
+ // This can be incorrect, but not in a way that probably matters to anyone:
+ // if there was padding handled in a previous decode, and we are now getting
+ // InvalidByte due to more padding, we should arguably report InvalidByte with
+ // PAD_BYTE at the original padding position (`self.padding_offset`), but we
+ // don't have a good way to tie those two cases together, so instead we
+ // just report the invalid byte as if the previous padding, and its possibly
+ // related downgrade to a now invalid byte, didn't happen.
DecodeError::InvalidByte(self.total_b64_decoded + offset, byte)
}
DecodeError::InvalidLength => DecodeError::InvalidLength,
@@ -151,13 +163,27 @@ impl<'e, E: Engine, R: io::Read> DecoderReader<'e, E, R> {
})
.map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
- self.total_b64_decoded += num_bytes;
- self.b64_offset += num_bytes;
- self.b64_len -= num_bytes;
+ if let Some(offset) = self.padding_offset {
+ // we've already seen padding
+ if decode_metadata.decoded_len > 0 {
+ // we read more after already finding padding; report error at first padding byte
+ return Err(io::Error::new(
+ io::ErrorKind::InvalidData,
+ DecodeError::InvalidByte(offset, PAD_BYTE),
+ ));
+ }
+ }
+
+ self.padding_offset = self.padding_offset.or(decode_metadata
+ .padding_offset
+ .map(|offset| self.total_b64_decoded + offset));
+ self.total_b64_decoded += b64_len_to_decode;
+ self.b64_offset += b64_len_to_decode;
+ self.b64_len -= b64_len_to_decode;
debug_assert!(self.b64_offset + self.b64_len <= BUF_SIZE);
- Ok(decoded)
+ Ok(decode_metadata.decoded_len)
}
/// Unwraps this `DecoderReader`, returning the base reader which it reads base64 encoded
@@ -205,9 +231,9 @@ impl<'e, E: Engine, R: io::Read> io::Read for DecoderReader<'e, E, R> {
self.decoded_offset < DECODED_CHUNK_SIZE
});
- // We shouldn't ever decode into here when we can't immediately write at least one byte into
- // the provided buf, so the effective length should only be 3 momentarily between when we
- // decode and when we copy into the target buffer.
+ // We shouldn't ever decode into decoded_buffer when we can't immediately write at least one
+ // byte into the provided buf, so the effective length should only be 3 momentarily between
+ // when we decode and when we copy into the target buffer.
debug_assert!(self.decoded_len < DECODED_CHUNK_SIZE);
debug_assert!(self.decoded_len + self.decoded_offset <= DECODED_CHUNK_SIZE);
@@ -217,20 +243,15 @@ impl<'e, E: Engine, R: io::Read> io::Read for DecoderReader<'e, E, R> {
} else {
let mut at_eof = false;
while self.b64_len < BASE64_CHUNK_SIZE {
- // Work around lack of copy_within, which is only present in 1.37
// Copy any bytes we have to the start of the buffer.
- // We know we have < 1 chunk, so we can use a tiny tmp buffer.
- let mut memmove_buf = [0_u8; BASE64_CHUNK_SIZE];
- memmove_buf[..self.b64_len].copy_from_slice(
- &self.b64_buffer[self.b64_offset..self.b64_offset + self.b64_len],
- );
- self.b64_buffer[0..self.b64_len].copy_from_slice(&memmove_buf[..self.b64_len]);
+ self.b64_buffer
+ .copy_within(self.b64_offset..self.b64_offset + self.b64_len, 0);
self.b64_offset = 0;
// then fill in more data
let read = self.read_from_delegate()?;
if read == 0 {
- // we never pass in an empty buf, so 0 => we've hit EOF
+ // we never read into an empty buf, so 0 => we've hit EOF
at_eof = true;
break;
}
diff --git a/src/read/decoder_tests.rs b/src/read/decoder_tests.rs
index 65d58d8..099dd63 100644
--- a/src/read/decoder_tests.rs
+++ b/src/read/decoder_tests.rs
@@ -8,9 +8,10 @@ use rand::{Rng as _, RngCore as _};
use super::decoder::{DecoderReader, BUF_SIZE};
use crate::{
+ alphabet,
engine::{general_purpose::STANDARD, Engine, GeneralPurpose},
tests::{random_alphabet, random_config, random_engine},
- DecodeError,
+ DecodeError, PAD_BYTE,
};
#[test]
@@ -75,7 +76,7 @@ fn trailing_junk() {
saw_error = true;
break;
}
- Ok(read) if read == 0 => break,
+ Ok(0) => break,
Ok(_) => (),
}
}
@@ -247,19 +248,21 @@ fn reports_invalid_byte_correctly() {
let mut rng = rand::thread_rng();
let mut bytes = Vec::new();
let mut b64 = String::new();
- let mut decoded = Vec::new();
+ let mut stream_decoded = Vec::new();
+ let mut bulk_decoded = Vec::new();
for _ in 0..10_000 {
bytes.clear();
b64.clear();
- decoded.clear();
+ stream_decoded.clear();
+ bulk_decoded.clear();
let size = rng.gen_range(1..(10 * BUF_SIZE));
bytes.extend(iter::repeat(0).take(size));
rng.fill_bytes(&mut bytes[..size]);
assert_eq!(size, bytes.len());
- let engine = random_engine(&mut rng);
+ let engine = GeneralPurpose::new(&alphabet::STANDARD, random_config(&mut rng));
engine.encode_string(&bytes[..], &mut b64);
// replace one byte, somewhere, with '*', which is invalid
@@ -270,9 +273,8 @@ fn reports_invalid_byte_correctly() {
let mut wrapped_reader = io::Cursor::new(b64_bytes.clone());
let mut decoder = DecoderReader::new(&mut wrapped_reader, &engine);
- // some gymnastics to avoid double-moving the io::Error, which is not Copy
let read_decode_err = decoder
- .read_to_end(&mut decoded)
+ .read_to_end(&mut stream_decoded)
.map_err(|e| {
let kind = e.kind();
let inner = e
@@ -283,8 +285,7 @@ fn reports_invalid_byte_correctly() {
.err()
.and_then(|o| o);
- let mut bulk_buf = Vec::new();
- let bulk_decode_err = engine.decode_vec(&b64_bytes[..], &mut bulk_buf).err();
+ let bulk_decode_err = engine.decode_vec(&b64_bytes[..], &mut bulk_decoded).err();
// it's tricky to predict where the invalid data's offset will be since if it's in the last
// chunk it will be reported at the first padding location because it's treated as invalid
@@ -296,6 +297,134 @@ fn reports_invalid_byte_correctly() {
}
}
+#[test]
+fn internal_padding_error_with_short_read_concatenated_texts_invalid_byte_error() {
+ let mut rng = rand::thread_rng();
+ let mut bytes = Vec::new();
+ let mut b64 = String::new();
+ let mut reader_decoded = Vec::new();
+ let mut bulk_decoded = Vec::new();
+
+ // encodes with padding, requires that padding be present so we don't get InvalidPadding
+ // just because padding is there at all
+ let engine = STANDARD;
+
+ for _ in 0..10_000 {
+ bytes.clear();
+ b64.clear();
+ reader_decoded.clear();
+ bulk_decoded.clear();
+
+ // at least 2 bytes so there can be a split point between bytes
+ let size = rng.gen_range(2..(10 * BUF_SIZE));
+ bytes.resize(size, 0);
+ rng.fill_bytes(&mut bytes[..size]);
+
+ // Concatenate two valid b64s, yielding padding in the middle.
+ // This avoids scenarios that are challenging to assert on, like random padding location
+ // that might be InvalidLastSymbol when decoded at certain buffer sizes but InvalidByte
+ // when done all at once.
+ let split = loop {
+ // find a split point that will produce padding on the first part
+ let s = rng.gen_range(1..size);
+ if s % 3 != 0 {
+ // short enough to need padding
+ break s;
+ };
+ };
+
+ engine.encode_string(&bytes[..split], &mut b64);
+ assert!(b64.contains('='), "split: {}, b64: {}", split, b64);
+ let bad_byte_pos = b64.find('=').unwrap();
+ engine.encode_string(&bytes[split..], &mut b64);
+ let b64_bytes = b64.as_bytes();
+
+ // short read to make it plausible for padding to happen on a read boundary
+ let read_len = rng.gen_range(1..10);
+ let mut wrapped_reader = ShortRead {
+ max_read_len: read_len,
+ delegate: io::Cursor::new(&b64_bytes),
+ };
+
+ let mut decoder = DecoderReader::new(&mut wrapped_reader, &engine);
+
+ let read_decode_err = decoder
+ .read_to_end(&mut reader_decoded)
+ .map_err(|e| {
+ *e.into_inner()
+ .and_then(|e| e.downcast::<DecodeError>().ok())
+ .unwrap()
+ })
+ .unwrap_err();
+
+ let bulk_decode_err = engine.decode_vec(b64_bytes, &mut bulk_decoded).unwrap_err();
+
+ assert_eq!(
+ bulk_decode_err,
+ read_decode_err,
+ "read len: {}, bad byte pos: {}, b64: {}",
+ read_len,
+ bad_byte_pos,
+ std::str::from_utf8(b64_bytes).unwrap()
+ );
+ assert_eq!(
+ DecodeError::InvalidByte(
+ split / 3 * 4
+ + match split % 3 {
+ 1 => 2,
+ 2 => 3,
+ _ => unreachable!(),
+ },
+ PAD_BYTE
+ ),
+ read_decode_err
+ );
+ }
+}
+
+#[test]
+fn internal_padding_anywhere_error() {
+ let mut rng = rand::thread_rng();
+ let mut bytes = Vec::new();
+ let mut b64 = String::new();
+ let mut reader_decoded = Vec::new();
+
+ // encodes with padding, requires that padding be present so we don't get InvalidPadding
+ // just because padding is there at all
+ let engine = STANDARD;
+
+ for _ in 0..10_000 {
+ bytes.clear();
+ b64.clear();
+ reader_decoded.clear();
+
+ bytes.resize(10 * BUF_SIZE, 0);
+ rng.fill_bytes(&mut bytes[..]);
+
+ // Just shove a padding byte in there somewhere.
+ // The specific error to expect is challenging to predict precisely because it
+ // will vary based on the position of the padding in the quad and the read buffer
+ // length, but SOMETHING should go wrong.
+
+ engine.encode_string(&bytes[..], &mut b64);
+ let mut b64_bytes = b64.as_bytes().to_vec();
+ // put padding somewhere other than the last quad
+ b64_bytes[rng.gen_range(0..bytes.len() - 4)] = PAD_BYTE;
+
+ // short read to make it plausible for padding to happen on a read boundary
+ let read_len = rng.gen_range(1..10);
+ let mut wrapped_reader = ShortRead {
+ max_read_len: read_len,
+ delegate: io::Cursor::new(&b64_bytes),
+ };
+
+ let mut decoder = DecoderReader::new(&mut wrapped_reader, &engine);
+
+ let result = decoder.read_to_end(&mut reader_decoded);
+ assert!(result.is_err());
+ }
+}
+
fn consume_with_short_reads_and_validate<R: io::Read>(
rng: &mut rand::rngs::ThreadRng,
expected_bytes: &[u8],
@@ -344,3 +473,15 @@ impl<'a, 'b, R: io::Read, N: rand::Rng> io::Read for RandomShortRead<'a, 'b, R,
self.delegate.read(&mut buf[..effective_len])
}
}
+
+struct ShortRead<R: io::Read> {
+ delegate: R,
+ max_read_len: usize,
+}
+
+impl<R: io::Read> io::Read for ShortRead<R> {
+ fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
+ let len = self.max_read_len.max(buf.len());
+ self.delegate.read(&mut buf[..len])
+ }
+}
diff --git a/src/write/encoder_string_writer.rs b/src/write/encoder_string_writer.rs
index 9394dc9..9c02bcd 100644
--- a/src/write/encoder_string_writer.rs
+++ b/src/write/encoder_string_writer.rs
@@ -44,11 +44,6 @@ use std::io;
/// assert_eq!("base64: YXNkZg==", &buf);
/// ```
///
-/// # Panics
-///
-/// Calling `write()` (or related methods) or `finish()` after `finish()` has completed without
-/// error is invalid and will panic.
-///
/// # Performance
///
/// Because it has to validate that the base64 is UTF-8, it is about 80% as fast as writing plain
@@ -144,6 +139,7 @@ mod tests {
engine::Engine, tests::random_engine, write::encoder_string_writer::EncoderStringWriter,
};
use rand::Rng;
+ use std::cmp;
use std::io::Write;
#[test]
@@ -158,9 +154,8 @@ mod tests {
orig_data.clear();
normal_encoded.clear();
- for _ in 0..size {
- orig_data.push(rng.gen());
- }
+ orig_data.resize(size, 0);
+ rng.fill(&mut orig_data[..]);
let engine = random_engine(&mut rng);
engine.encode_string(&orig_data, &mut normal_encoded);
@@ -175,4 +170,38 @@ mod tests {
assert_eq!(normal_encoded, stream_encoded);
}
}
+ #[test]
+ fn incremental_writes() {
+ let mut rng = rand::thread_rng();
+ let mut orig_data = Vec::<u8>::new();
+ let mut normal_encoded = String::new();
+
+ let size = 5_000;
+
+ for _ in 0..size {
+ orig_data.clear();
+ normal_encoded.clear();
+
+ orig_data.resize(size, 0);
+ rng.fill(&mut orig_data[..]);
+
+ let engine = random_engine(&mut rng);
+ engine.encode_string(&orig_data, &mut normal_encoded);
+
+ let mut stream_encoder = EncoderStringWriter::new(&engine);
+ // write small nibbles of data
+ let mut offset = 0;
+ while offset < size {
+ let nibble_size = cmp::min(rng.gen_range(0..=64), size - offset);
+ let len = stream_encoder
+ .write(&orig_data[offset..offset + nibble_size])
+ .unwrap();
+ offset += len;
+ }
+
+ let stream_encoded = stream_encoder.into_inner();
+
+ assert_eq!(normal_encoded, stream_encoded);
+ }
+ }
}
diff --git a/src/write/encoder_tests.rs b/src/write/encoder_tests.rs
index ce76d63..1f1a165 100644
--- a/src/write/encoder_tests.rs
+++ b/src/write/encoder_tests.rs
@@ -358,7 +358,7 @@ fn retrying_writes_that_error_with_interrupted_works() {
Ok(_) => break,
Err(e) => match e.kind() {
io::ErrorKind::Interrupted => continue,
- _ => Err(e).unwrap(), // bail
+ _ => panic!("{:?}", e), // bail
},
}
}
diff --git a/tests/encode.rs b/tests/encode.rs
index 2e1f893..9d69447 100644
--- a/tests/encode.rs
+++ b/tests/encode.rs
@@ -8,11 +8,7 @@ fn compare_encode(expected: &str, target: &[u8]) {
#[test]
fn encode_all_ascii() {
- let mut ascii = Vec::<u8>::with_capacity(128);
-
- for i in 0..128 {
- ascii.push(i);
- }
+ let ascii: Vec<u8> = (0..=127).collect();
compare_encode(
"AAECAwQFBgcICQoLDA0ODxAREhMUFRYXGBkaGxwdHh8gISIjJCUmJygpKissLS4vMDEyMzQ1Njc4OTo7P\
@@ -24,12 +20,7 @@ fn encode_all_ascii() {
#[test]
fn encode_all_bytes() {
- let mut bytes = Vec::<u8>::with_capacity(256);
-
- for i in 0..255 {
- bytes.push(i);
- }
- bytes.push(255); //bug with "overflowing" ranges?
+ let bytes: Vec<u8> = (0..=255).collect();
compare_encode(
"AAECAwQFBgcICQoLDA0ODxAREhMUFRYXGBkaGxwdHh8gISIjJCUmJygpKissLS4vMDEyMzQ1Njc4OTo7P\
@@ -42,12 +33,7 @@ fn encode_all_bytes() {
#[test]
fn encode_all_bytes_url() {
- let mut bytes = Vec::<u8>::with_capacity(256);
-
- for i in 0..255 {
- bytes.push(i);
- }
- bytes.push(255); //bug with "overflowing" ranges?
+ let bytes: Vec<u8> = (0..=255).collect();
assert_eq!(
"AAECAwQFBgcICQoLDA0ODxAREhMUFRYXGBkaGxwdHh8gISIjJCUmJygpKissLS4vMDEyMzQ1Njc4OTo7PD0\
@@ -55,6 +41,37 @@ fn encode_all_bytes_url() {
-AgYKDhIWGh4iJiouMjY6PkJGSk5SVlpeYmZqbnJ2en6ChoqOkpaanqKmqq6ytrq\
-wsbKztLW2t7i5uru8vb6_wMHCw8TFxsfIycrLzM3Oz9DR0tPU1dbX2Nna29zd3t_g4eLj5OXm5-jp6uvs7e7v8PHy\
8_T19vf4-fr7_P3-_w==",
- &engine::GeneralPurpose::new(&URL_SAFE, PAD).encode(&bytes)
+ &engine::GeneralPurpose::new(&URL_SAFE, PAD).encode(bytes)
);
}
+
+#[test]
+fn encoded_len_unpadded() {
+ assert_eq!(0, encoded_len(0, false).unwrap());
+ assert_eq!(2, encoded_len(1, false).unwrap());
+ assert_eq!(3, encoded_len(2, false).unwrap());
+ assert_eq!(4, encoded_len(3, false).unwrap());
+ assert_eq!(6, encoded_len(4, false).unwrap());
+ assert_eq!(7, encoded_len(5, false).unwrap());
+ assert_eq!(8, encoded_len(6, false).unwrap());
+ assert_eq!(10, encoded_len(7, false).unwrap());
+}
+
+#[test]
+fn encoded_len_padded() {
+ assert_eq!(0, encoded_len(0, true).unwrap());
+ assert_eq!(4, encoded_len(1, true).unwrap());
+ assert_eq!(4, encoded_len(2, true).unwrap());
+ assert_eq!(4, encoded_len(3, true).unwrap());
+ assert_eq!(8, encoded_len(4, true).unwrap());
+ assert_eq!(8, encoded_len(5, true).unwrap());
+ assert_eq!(8, encoded_len(6, true).unwrap());
+ assert_eq!(12, encoded_len(7, true).unwrap());
+}
+#[test]
+fn encoded_len_overflow() {
+ let max_size = usize::MAX / 4 * 3 + 2;
+ assert_eq!(2, max_size % 3);
+ assert_eq!(Some(usize::MAX), encoded_len(max_size, false));
+ assert_eq!(None, encoded_len(max_size + 1, false));
+}