diff options
43 files changed, 4615 insertions, 4481 deletions
diff --git a/.cargo_vcs_info.json b/.cargo_vcs_info.json index f6e5649..7b32cf5 100644 --- a/.cargo_vcs_info.json +++ b/.cargo_vcs_info.json @@ -1,5 +1,6 @@ { "git": { - "sha1": "b4fc91325ec985e2a18e83e95a3c08eebd636af4" - } -} + "sha1": "d7fb31c4ada4ca45df5ae80ec691fa3a050d9c3e" + }, + "path_in_vcs": "" +}
\ No newline at end of file diff --git a/.circleci/config.yml b/.circleci/config.yml new file mode 100644 index 0000000..fa98f9c --- /dev/null +++ b/.circleci/config.yml @@ -0,0 +1,107 @@ +version: '2.1' + +workflows: + version: 2 + build: + jobs: + - build: + matrix: + parameters: + rust_img: [ + # Yes, a single-parameter axis, but means it can be referred to as a cache parameter easily without + # duplicating the magic version number throughout this file. + # The default rust images (not -slim or -alpine) are based on buildpack-deps. Hopefully this will + # be easier on the CI hosts since presumably those fat lower layers will already be cached, and + # therefore faster than a minimal, customized alpine. + # MSRV + 'rust:1.57.0' + ] + # a hacky scheme to work around CircleCI's inability to deal with mutable docker tags, forcing us to + # get a nightly or stable toolchain via rustup instead of a mutable docker tag + toolchain_override: [ + '__msrv__', # won't add any other toolchains, just uses what's in the docker image + 'stable', + 'nightly' + ] + +jobs: + build: + parameters: + rust_img: + type: string + toolchain_override: + type: string + docker: + - image: << parameters.rust_img >> + steps: + - checkout + - restore_cache: + key: project-cache-v5-<< parameters.rust_img >>-<< parameters.toolchain_override >>-{{ checksum "Cargo.toml" }} + - run: + name: Setup toolchain + command: | + if [[ '<< parameters.toolchain_override >>' != '__msrv__' ]] + then + rustup toolchain add '<< parameters.toolchain_override >>' + rustup default '<< parameters.toolchain_override >>' + fi + - run: + name: Log rustc version + command: rustc --version + - run: + name: Check formatting + command: | + rustup component add rustfmt + cargo fmt -- --check + - run: + name: Check clippy lints + # we only care about stable clippy -- nightly clippy is a bit wild + command: | + if [[ '<< parameters.toolchain_override >>' == 'stable' ]] + then + rustup component add clippy + cargo clippy --all-targets + fi + - run: + name: Build all targets + command: cargo build --all-targets + - run: + name: Build without default features + command: cargo build --no-default-features + - run: + name: Build with only alloc + command: cargo build --no-default-features --features alloc + - run: + name: Add arm toolchain + command: rustup target add thumbv6m-none-eabi + - run: + name: Build ARM without default features (no_std) + command: cargo build --target thumbv6m-none-eabi --no-default-features + - run: + name: Build ARM with only alloc feature + command: cargo build --target thumbv6m-none-eabi --no-default-features --features alloc + - run: + name: Run tests + command: cargo test --verbose + - run: + name: Build docs + command: cargo doc --verbose + - run: + name: Confirm fuzzers can run + # TERM=dumb prevents cargo fuzz list from printing with color + environment: + TERM: dumb + command: | + if [[ '<< parameters.toolchain_override >>' = 'nightly' ]] + then + cargo install cargo-fuzz + cargo fuzz list | xargs -I FUZZER cargo fuzz run FUZZER -- -max_total_time=1 + fi + + - save_cache: + key: project-cache-v5-<< parameters.rust_img >>-<< parameters.toolchain_override >>-{{ checksum "Cargo.toml" }} + paths: + # rust docker img doesn't use $HOME/[.cargo,.rustup] + - /usr/local/cargo + - /usr/local/rustup + - ./target @@ -10,5 +10,5 @@ main.rs *.iml # `perf record` files -perf.data* +/*perf.data* /tmp diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index fc3ec8f..0000000 --- a/.travis.yml +++ /dev/null @@ -1,50 +0,0 @@ ---- -language: rust -dist: trusty -sudo: required - -matrix: - include: - - rust: 1.34.0 - # cfg(doctest) is experimental in 1.39 but ignored with 1.34.0, and that snuck in when 1.39.0 wasn't tested - - rust: 1.39.0 - - rust: stable - - rust: beta - - rust: nightly - addons: - apt: - packages: - # cargo-tarpaulin needs this - - libssl-dev - install: - # For test coverage. In install step so that it can use cache. - - cargo tarpaulin --version || RUSTFLAGS="--cfg procmacro2_semver_exempt" cargo install --force cargo-tarpaulin - - cargo +nightly install cargo-fuzz - - # no_std - - rust: stable - env: TARGET="--target thumbv6m-none-eabi" FEATURES="--no-default-features --features alloc" - install: - - rustup target add thumbv6m-none-eabi - -cache: cargo - -env: - # prevent cargo fuzz list from printing with color - - TERM=dumb - -script: - - cargo build --verbose $TARGET --no-default-features - - cargo build --verbose $TARGET $FEATURES - - 'if [[ -z "$TARGET" ]]; then cargo test --verbose; fi' - - 'if [[ -z "$TARGET" ]]; then cargo doc --verbose; fi' - - 'if [[ "$TRAVIS_RUST_VERSION" = nightly ]]; then cargo bench --no-run; fi' - # run for just a second to confirm that it can build and run ok - - 'if [[ "$TRAVIS_RUST_VERSION" = nightly ]]; then cargo fuzz list | xargs -L 1 -I FUZZER cargo fuzz run FUZZER -- -max_total_time=1; fi' - -after_success: | - if [[ "$TRAVIS_RUST_VERSION" = nightly ]]; then - # Calculate test coverage - cargo tarpaulin --out Xml - bash <(curl -s https://codecov.io/bash) - fi @@ -37,55 +37,15 @@ license { ], } -rust_defaults { - name: "base64_test_defaults", - crate_name: "base64", - cargo_env_compat: true, - cargo_pkg_version: "0.13.0", - test_suites: ["general-tests"], - auto_gen_config: true, - edition: "2018", - features: [ - "default", - "std", - ], - rustlibs: [ - "libbase64_rust", - "libcriterion", - "librand", - "libstructopt", - ], -} - -rust_test { - name: "base64_test_tests_decode", - defaults: ["base64_test_defaults"], - host_supported: true, - srcs: ["tests/decode.rs"], - test_options: { - unit_test: true, - }, -} - -rust_test { - name: "base64_test_tests_encode", - defaults: ["base64_test_defaults"], - host_supported: true, - srcs: ["tests/encode.rs"], - test_options: { - unit_test: true, - }, -} - rust_library { name: "libbase64_rust", stem: "libbase64", host_supported: true, crate_name: "base64", cargo_env_compat: true, - cargo_pkg_version: "0.13.0", + cargo_pkg_version: "0.21.0", srcs: ["src/lib.rs"], - edition: "2018", + edition: "2021", features: [ "default", "std", @@ -93,6 +53,7 @@ rust_library { apex_available: [ "//apex_available:platform", "com.android.resolv", + "com.android.virt", ], min_sdk_version: "29", } @@ -3,39 +3,60 @@ # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies -# to registry (e.g., crates.io) dependencies +# to registry (e.g., crates.io) dependencies. # -# If you believe there's an error in this file please file an -# issue against the rust-lang/cargo repository. If you're -# editing this file be aware that the upstream Cargo.toml -# will likely look very different (and much more reasonable) +# If you are reading this file be aware that the original Cargo.toml +# will likely look very different (and much more reasonable). +# See Cargo.toml.orig for the original contents. [package] -edition = "2018" +edition = "2021" +rust-version = "1.57.0" name = "base64" -version = "0.13.0" -authors = ["Alice Maz <alice@alicemaz.com>", "Marshall Pierce <marshall@mpierce.org>"] +version = "0.21.0" +authors = [ + "Alice Maz <alice@alicemaz.com>", + "Marshall Pierce <marshall@mpierce.org>", +] description = "encodes and decodes base64 as bytes or utf8" documentation = "https://docs.rs/base64" readme = "README.md" -keywords = ["base64", "utf8", "encode", "decode", "no_std"] +keywords = [ + "base64", + "utf8", + "encode", + "decode", + "no_std", +] categories = ["encoding"] -license = "MIT/Apache-2.0" +license = "MIT OR Apache-2.0" repository = "https://github.com/marshallpierce/rust-base64" + [profile.bench] debug = true +[profile.test] +opt-level = 3 + [[bench]] name = "benchmarks" harness = false + [dev-dependencies.criterion] -version = "=0.3.2" +version = "0.4.0" [dev-dependencies.rand] -version = "0.6.1" +version = "0.8.5" +features = ["small_rng"] + +[dev-dependencies.rstest] +version = "0.12.0" + +[dev-dependencies.rstest_reuse] +version = "0.3.0" [dev-dependencies.structopt] -version = "0.3" +version = "0.3.26" [features] alloc = [] diff --git a/Cargo.toml.orig b/Cargo.toml.orig index 30e73ee..33847db 100644 --- a/Cargo.toml.orig +++ b/Cargo.toml.orig @@ -1,6 +1,6 @@ [package] name = "base64" -version = "0.13.0" +version = "0.21.0" authors = ["Alice Maz <alice@alicemaz.com>", "Marshall Pierce <marshall@mpierce.org>"] description = "encodes and decodes base64 as bytes or utf8" repository = "https://github.com/marshallpierce/rust-base64" @@ -8,18 +8,22 @@ documentation = "https://docs.rs/base64" readme = "README.md" keywords = ["base64", "utf8", "encode", "decode", "no_std"] categories = ["encoding"] -license = "MIT/Apache-2.0" -edition = "2018" +license = "MIT OR Apache-2.0" +edition = "2021" +rust-version = "1.57.0" [[bench]] name = "benchmarks" harness = false [dev-dependencies] -# 0.3.3 requires rust 1.36.0 for stable copied() -criterion = "=0.3.2" -rand = "0.6.1" -structopt = "0.3" +criterion = "0.4.0" +rand = { version = "0.8.5", features = ["small_rng"] } +# clap 4 would require 1.60 +structopt = "0.3.26" +# test fixtures for engine tests +rstest = "0.12.0" +rstest_reuse = "0.3.0" [features] default = ["std"] @@ -29,3 +33,7 @@ std = [] [profile.bench] # Useful for better disassembly when using `perf record` and `perf annotate` debug = true + +[profile.test] +# Faster tests save much more than the increase in compilation time +opt-level = 3 @@ -1,3 +1,7 @@ +# This project was upgraded with external_updater. +# Usage: tools/external_updater/updater.sh update rust/crates/base64 +# For more info, check https://cs.android.com/android/platform/superproject/+/master:tools/external_updater/README.md + name: "base64" description: "encodes and decodes base64 as bytes or utf8" third_party { @@ -7,14 +11,13 @@ third_party { } url { type: ARCHIVE - value: "https://static.crates.io/crates/base64/base64-0.13.0.crate" + value: "https://static.crates.io/crates/base64/base64-0.21.0.crate" } - version: "0.13.0" - # Dual-licensed, using the least restrictive per go/thirdpartylicenses#same. + version: "0.21.0" license_type: NOTICE last_upgrade_date { - year: 2021 - month: 6 - day: 23 + year: 2023 + month: 2 + day: 1 } } @@ -1,7 +1,6 @@ -[base64](https://crates.io/crates/base64) -=== +# [base64](https://crates.io/crates/base64) -[![](https://img.shields.io/crates/v/base64.svg)](https://crates.io/crates/base64) [![Docs](https://docs.rs/base64/badge.svg)](https://docs.rs/base64) [![Build](https://travis-ci.org/marshallpierce/rust-base64.svg?branch=master)](https://travis-ci.org/marshallpierce/rust-base64) [![codecov](https://codecov.io/gh/marshallpierce/rust-base64/branch/master/graph/badge.svg)](https://codecov.io/gh/marshallpierce/rust-base64) [![unsafe forbidden](https://img.shields.io/badge/unsafe-forbidden-success.svg)](https://github.com/rust-secure-code/safety-dance/) +[![](https://img.shields.io/crates/v/base64.svg)](https://crates.io/crates/base64) [![Docs](https://docs.rs/base64/badge.svg)](https://docs.rs/base64) [![CircleCI](https://circleci.com/gh/marshallpierce/rust-base64/tree/master.svg?style=shield)](https://circleci.com/gh/marshallpierce/rust-base64/tree/master) [![codecov](https://codecov.io/gh/marshallpierce/rust-base64/branch/master/graph/badge.svg)](https://codecov.io/gh/marshallpierce/rust-base64) [![unsafe forbidden](https://img.shields.io/badge/unsafe-forbidden-success.svg)](https://github.com/rust-secure-code/safety-dance/) <a href="https://www.jetbrains.com/?from=rust-base64"><img src="/icon_CLion.svg" height="40px"/></a> @@ -9,58 +8,98 @@ Made with CLion. Thanks to JetBrains for supporting open source! It's base64. What more could anyone want? -This library's goals are to be *correct* and *fast*. It's thoroughly tested and widely used. It exposes functionality at multiple levels of abstraction so you can choose the level of convenience vs performance that you want, e.g. `decode_config_slice` decodes into an existing `&mut [u8]` and is pretty fast (2.6GiB/s for a 3 KiB input), whereas `decode_config` allocates a new `Vec<u8>` and returns it, which might be more convenient in some cases, but is slower (although still fast enough for almost any purpose) at 2.1 GiB/s. +This library's goals are to be *correct* and *fast*. It's thoroughly tested and widely used. It exposes functionality at +multiple levels of abstraction so you can choose the level of convenience vs performance that you want, +e.g. `decode_engine_slice` decodes into an existing `&mut [u8]` and is pretty fast (2.6GiB/s for a 3 KiB input), +whereas `decode_engine` allocates a new `Vec<u8>` and returns it, which might be more convenient in some cases, but is +slower (although still fast enough for almost any purpose) at 2.1 GiB/s. -Example ---- +See the [docs](https://docs.rs/base64) for all the details. -```rust -extern crate base64; +## FAQ -use base64::{encode, decode}; +### I need to decode base64 with whitespace/null bytes/other random things interspersed in it. What should I do? -fn main() { - let a = b"hello world"; - let b = "aGVsbG8gd29ybGQ="; +Remove non-base64 characters from your input before decoding. - assert_eq!(encode(a), b); - assert_eq!(a, &decode(b).unwrap()[..]); -} -``` +If you have a `Vec` of base64, [retain](https://doc.rust-lang.org/std/vec/struct.Vec.html#method.retain) can be used to +strip out whatever you need removed. -See the [docs](https://docs.rs/base64) for all the details. +If you have a `Read` (e.g. reading a file or network socket), there are various approaches. -Rust version compatibility ---- +- Use [iter_read](https://crates.io/crates/iter-read) together with `Read`'s `bytes()` to filter out unwanted bytes. +- Implement `Read` with a `read()` impl that delegates to your actual `Read`, and then drops any bytes you don't want. -The minimum required Rust version is 1.34.0. +### I need to line-wrap base64, e.g. for MIME/PEM. -Developing ---- +[line-wrap](https://crates.io/crates/line-wrap) does just that. -Benchmarks are in `benches/`. Running them requires nightly rust, but `rustup` makes it easy: +### I want canonical base64 encoding/decoding. -```bash -rustup run nightly cargo bench -``` +First, don't do this. You should no more expect Base64 to be canonical than you should expect compression algorithms to +produce canonical output across all usage in the wild (hint: they don't). +However, [people are drawn to their own destruction like moths to a flame](https://eprint.iacr.org/2022/361), so here we +are. + +There are two opportunities for non-canonical encoding (and thus, detection of the same during decoding): the final bits +of the last encoded token in two or three token suffixes, and the `=` token used to inflate the suffix to a full four +tokens. + +The trailing bits issue is unavoidable: with 6 bits available in each encoded token, 1 input byte takes 2 tokens, +with the second one having some bits unused. Same for two input bytes: 16 bits, but 3 tokens have 18 bits. Unless we +decide to stop shipping whole bytes around, we're stuck with those extra bits that a sneaky or buggy encoder might set +to 1 instead of 0. + +The `=` pad bytes, on the other hand, are entirely a self-own by the Base64 standard. They do not affect decoding other +than to provide an opportunity to say "that padding is incorrect". Exabytes of storage and transfer have no doubt been +wasted on pointless `=` bytes. Somehow we all seem to be quite comfortable with, say, hex-encoded data just stopping +when it's done rather than requiring a confirmation that the author of the encoder could count to four. Anyway, there +are two ways to make pad bytes predictable: require canonical padding to the next multiple of four bytes as per the RFC, +or, if you control all producers and consumers, save a few bytes by requiring no padding (especially applicable to the +url-safe alphabet). + +All `Engine` implementations must at a minimum support treating non-canonical padding of both types as an error, and +optionally may allow other behaviors. -Decoding is aided by some pre-calculated tables, which are generated by: +## Rust version compatibility + +The minimum supported Rust version is 1.57.0. + +# Contributing + +Contributions are very welcome. However, because this library is used widely, and in security-sensitive contexts, all +PRs will be carefully scrutinized. Beyond that, this sort of low level library simply needs to be 100% correct. Nobody +wants to chase bugs in encoding of any sort. + +All this means that it takes me a fair amount of time to review each PR, so it might take quite a while to carve out the +free time to give each PR the attention it deserves. I will get to everyone eventually! + +## Developing + +Benchmarks are in `benches/`. Running them requires nightly rust, but `rustup` makes it easy: ```bash -cargo run --example make_tables > src/tables.rs.tmp && mv src/tables.rs.tmp src/tables.rs +rustup run nightly cargo bench ``` -no_std ---- +## no_std -This crate supports no_std. By default the crate targets std via the `std` feature. You can deactivate the `default-features` to target core instead. In that case you lose out on all the functionality revolving around `std::io`, `std::error::Error` and heap allocations. There is an additional `alloc` feature that you can activate to bring back the support for heap allocations. +This crate supports no_std. By default the crate targets std via the `std` feature. You can deactivate +the `default-features` to target `core` instead. In that case you lose out on all the functionality revolving +around `std::io`, `std::error::Error`, and heap allocations. There is an additional `alloc` feature that you can activate +to bring back the support for heap allocations. -Profiling ---- +## Profiling -On Linux, you can use [perf](https://perf.wiki.kernel.org/index.php/Main_Page) for profiling. Then compile the benchmarks with `rustup nightly run cargo bench --no-run`. +On Linux, you can use [perf](https://perf.wiki.kernel.org/index.php/Main_Page) for profiling. Then compile the +benchmarks with `rustup nightly run cargo bench --no-run`. -Run the benchmark binary with `perf` (shown here filtering to one particular benchmark, which will make the results easier to read). `perf` is only available to the root user on most systems as it fiddles with event counters in your CPU, so use `sudo`. We need to run the actual benchmark binary, hence the path into `target`. You can see the actual full path with `rustup run nightly cargo bench -v`; it will print out the commands it runs. If you use the exact path that `bench` outputs, make sure you get the one that's for the benchmarks, not the tests. You may also want to `cargo clean` so you have only one `benchmarks-` binary (they tend to accumulate). +Run the benchmark binary with `perf` (shown here filtering to one particular benchmark, which will make the results +easier to read). `perf` is only available to the root user on most systems as it fiddles with event counters in your +CPU, so use `sudo`. We need to run the actual benchmark binary, hence the path into `target`. You can see the actual +full path with `rustup run nightly cargo bench -v`; it will print out the commands it runs. If you use the exact path +that `bench` outputs, make sure you get the one that's for the benchmarks, not the tests. You may also want +to `cargo clean` so you have only one `benchmarks-` binary (they tend to accumulate). ```bash sudo perf record target/release/deps/benchmarks-* --bench decode_10mib_reuse @@ -72,7 +111,10 @@ Then analyze the results, again with perf: sudo perf annotate -l ``` -You'll see a bunch of interleaved rust source and assembly like this. The section with `lib.rs:327` is telling us that 4.02% of samples saw the `movzbl` aka bit shift as the active instruction. However, this percentage is not as exact as it seems due to a phenomenon called *skid*. Basically, a consequence of how fancy modern CPUs are is that this sort of instruction profiling is inherently inaccurate, especially in branch-heavy code. +You'll see a bunch of interleaved rust source and assembly like this. The section with `lib.rs:327` is telling us that +4.02% of samples saw the `movzbl` aka bit shift as the active instruction. However, this percentage is not as exact as +it seems due to a phenomenon called *skid*. Basically, a consequence of how fancy modern CPUs are is that this sort of +instruction profiling is inherently inaccurate, especially in branch-heavy code. ```text lib.rs:322 0.70 : 10698: mov %rdi,%rax @@ -94,11 +136,10 @@ You'll see a bunch of interleaved rust source and assembly like this. The sectio 0.00 : 106ab: je 1090e <base64::decode_config_buf::hbf68a45fefa299c1+0x46e> ``` +## Fuzzing -Fuzzing ---- - -This uses [cargo-fuzz](https://github.com/rust-fuzz/cargo-fuzz). See `fuzz/fuzzers` for the available fuzzing scripts. To run, use an invocation like these: +This uses [cargo-fuzz](https://github.com/rust-fuzz/cargo-fuzz). See `fuzz/fuzzers` for the available fuzzing scripts. +To run, use an invocation like these: ```bash cargo +nightly fuzz run roundtrip @@ -107,8 +148,7 @@ cargo +nightly fuzz run roundtrip_random_config -- -max_len=10240 cargo +nightly fuzz run decode_random ``` - -License ---- +## License This project is dual-licensed under MIT and Apache 2.0. + diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md index 1048c1e..4fcadda 100644 --- a/RELEASE-NOTES.md +++ b/RELEASE-NOTES.md @@ -1,10 +1,122 @@ +# 0.21.0 + +(not yet released) + + +## Migration + +### Functions + +| < 0.20 function | 0.21 equivalent | +|-------------------------|-------------------------------------------------------------------------------------| +| `encode()` | `engine::general_purpose::STANDARD.encode()` or `prelude::BASE64_STANDARD.encode()` | +| `encode_config()` | `engine.encode()` | +| `encode_config_buf()` | `engine.encode_string()` | +| `encode_config_slice()` | `engine.encode_slice()` | +| `decode()` | `engine::general_purpose::STANDARD.decode()` or `prelude::BASE64_STANDARD.decode()` | +| `decode_config()` | `engine.decode()` | +| `decode_config_buf()` | `engine.decode_vec()` | +| `decode_config_slice()` | `engine.decode_slice()` | + +The short-lived 0.20 functions were the 0.13 functions with `config` replaced with `engine`. + +### Padding + +If applicable, use the preset engines `engine::STANDARD`, `engine::STANDARD_NO_PAD`, `engine::URL_SAFE`, +or `engine::URL_SAFE_NO_PAD`. +The `NO_PAD` ones require that padding is absent when decoding, and the others require that +canonical padding is present . + +If you need the < 0.20 behavior that did not care about padding, or want to recreate < 0.20.0's predefined `Config`s +precisely, see the following table. + +| 0.13.1 Config | 0.20.0+ alphabet | `encode_padding` | `decode_padding_mode` | +|-----------------|------------------|------------------|-----------------------| +| STANDARD | STANDARD | true | Indifferent | +| STANDARD_NO_PAD | STANDARD | false | Indifferent | +| URL_SAFE | URL_SAFE | true | Indifferent | +| URL_SAFE_NO_PAD | URL_SAFE | false | Indifferent | + +# 0.21.0-rc.1 + +- Restore the ability to decode into a slice of precisely the correct length with `Engine.decode_slice_unchecked`. +- Add `Engine` as a `pub use` in `prelude`. + +# 0.21.0-beta.2 + +## Breaking changes + +- Re-exports of preconfigured engines in `engine` are removed in favor of `base64::prelude::...` that are better suited to those who wish to `use` the entire path to a name. + +# 0.21.0-beta.1 + +## Breaking changes + +- `FastPortable` was only meant to be an interim name, and shouldn't have shipped in 0.20. It is now `GeneralPurpose` to + make its intended usage more clear. +- `GeneralPurpose` and its config are now `pub use`'d in the `engine` module for convenience. +- Change a few `from()` functions to be `new()`. `from()` causes confusing compiler errors because of confusion + with `From::from`, and is a little misleading because some of those invocations are not very cheap as one would + usually expect from a `from` call. +- `encode*` and `decode*` top level functions are now methods on `Engine`. +- `DEFAULT_ENGINE` was replaced by `engine::general_purpose::STANDARD` +- Predefined engine consts `engine::general_purpose::{STANDARD, STANDARD_NO_PAD, URL_SAFE, URL_SAFE_NO_PAD}` + - These are `pub use`d into `engine` as well +- The `*_slice` decode/encode functions now return an error instead of panicking when the output slice is too small + - As part of this, there isn't now a public way to decode into a slice _exactly_ the size needed for inputs that + aren't multiples of 4 tokens. If adding up to 2 bytes to always be a multiple of 3 bytes for the decode buffer is + a problem, file an issue. + +## Other changes + +- `decoded_len_estimate()` is provided to make it easy to size decode buffers correctly. + +# 0.20.0 + +## Breaking changes + +- Update MSRV to 1.57.0 +- Decoding can now either ignore padding, require correct padding, or require no padding. The default is to require + correct padding. + - The `NO_PAD` config now requires that padding be absent when decoding. + +## 0.20.0-alpha.1 + +### Breaking changes + +- Extended the `Config` concept into the `Engine` abstraction, allowing the user to pick different encoding / decoding + implementations. + - What was formerly the only algorithm is now the `FastPortable` engine, so named because it's portable (works on + any CPU) and relatively fast. + - This opens the door to a portable constant-time + implementation ([#153](https://github.com/marshallpierce/rust-base64/pull/153), + presumably `ConstantTimePortable`?) for security-sensitive applications that need side-channel resistance, and + CPU-specific SIMD implementations for more speed. + - Standard base64 per the RFC is available via `DEFAULT_ENGINE`. To use different alphabets or other settings ( + padding, etc), create your own engine instance. +- `CharacterSet` is now `Alphabet` (per the RFC), and allows creating custom alphabets. The corresponding tables that + were previously code-generated are now built dynamically. +- Since there are already multiple breaking changes, various functions are renamed to be more consistent and + discoverable. +- MSRV is now 1.47.0 to allow various things to use `const fn`. +- `DecoderReader` now owns its inner reader, and can expose it via `into_inner()`. For symmetry, `EncoderWriter` can do + the same with its writer. +- `encoded_len` is now public so you can size encode buffers precisely. + +# 0.13.1 + +- More precise decode buffer sizing, avoiding unnecessary allocation in `decode_config`. + # 0.13.0 - Config methods are const - Added `EncoderStringWriter` to allow encoding directly to a String - `EncoderWriter` now owns its delegate writer rather than keeping a reference to it (though refs still work) - - As a consequence, it is now possible to extract the delegate writer from an `EncoderWriter` via `finish()`, which returns `Result<W>` instead of `Result<()>`. If you were calling `finish()` explicitly, you will now need to use `let _ = foo.finish()` instead of just `foo.finish()` to avoid a warning about the unused value. -- When decoding input that has both an invalid length and an invalid symbol as the last byte, `InvalidByte` will be emitted instead of `InvalidLength` to make the problem more obvious. + - As a consequence, it is now possible to extract the delegate writer from an `EncoderWriter` via `finish()`, which + returns `Result<W>` instead of `Result<()>`. If you were calling `finish()` explicitly, you will now need to + use `let _ = foo.finish()` instead of just `foo.finish()` to avoid a warning about the unused value. +- When decoding input that has both an invalid length and an invalid symbol as the last byte, `InvalidByte` will be + emitted instead of `InvalidLength` to make the problem more obvious. # 0.12.2 @@ -22,23 +134,31 @@ - A minor performance improvement in encoding # 0.11.0 + - Minimum rust version 1.34.0 - `no_std` is now supported via the two new features `alloc` and `std`. # 0.10.1 - Minimum rust version 1.27.2 -- Fix bug in streaming encoding ([#90](https://github.com/marshallpierce/rust-base64/pull/90)): if the underlying writer didn't write all the bytes given to it, the remaining bytes would not be retried later. See the docs on `EncoderWriter::write`. +- Fix bug in streaming encoding ([#90](https://github.com/marshallpierce/rust-base64/pull/90)): if the underlying writer + didn't write all the bytes given to it, the remaining bytes would not be retried later. See the docs + on `EncoderWriter::write`. - Make it configurable whether or not to return an error when decoding detects excess trailing bits. # 0.10.0 -- Remove line wrapping. Line wrapping was never a great conceptual fit in this library, and other features (streaming encoding, etc) either couldn't support it or could support only special cases of it with a great increase in complexity. Line wrapping has been pulled out into a [line-wrap](https://crates.io/crates/line-wrap) crate, so it's still available if you need it. - - `Base64Display` creation no longer uses a `Result` because it can't fail, which means its helper methods for common - configs that `unwrap()` for you are no longer needed +- Remove line wrapping. Line wrapping was never a great conceptual fit in this library, and other features (streaming + encoding, etc) either couldn't support it or could support only special cases of it with a great increase in + complexity. Line wrapping has been pulled out into a [line-wrap](https://crates.io/crates/line-wrap) crate, so it's + still available if you need it. + - `Base64Display` creation no longer uses a `Result` because it can't fail, which means its helper methods for + common + configs that `unwrap()` for you are no longer needed - Add a streaming encoder `Write` impl to transparently base64 as you write. - Remove the remaining `unsafe` code. -- Remove whitespace stripping to simplify `no_std` support. No out of the box configs use it, and it's trivial to do yourself if needed: `filter(|b| !b" \n\t\r\x0b\x0c".contains(b)`. +- Remove whitespace stripping to simplify `no_std` support. No out of the box configs use it, and it's trivial to do + yourself if needed: `filter(|b| !b" \n\t\r\x0b\x0c".contains(b)`. - Detect invalid trailing symbols when decoding and return an error rather than silently ignoring them. # 0.9.3 diff --git a/TEST_MAPPING b/TEST_MAPPING index de2964d..54936f7 100644 --- a/TEST_MAPPING +++ b/TEST_MAPPING @@ -3,46 +3,9 @@ "imports": [ { "path": "external/rust/crates/webpki" - } - ], - "presubmit": [ - { - "name": "base64_test_tests_decode" - }, - { - "name": "base64_test_tests_encode" - }, - { - "name": "doh_unit_test" - }, - { - "name": "libapkverify.integration_test" - }, - { - "name": "libapkverify.test" - }, - { - "name": "microdroid_manager_test" - } - ], - "presubmit-rust": [ - { - "name": "base64_test_tests_decode" - }, - { - "name": "base64_test_tests_encode" - }, - { - "name": "doh_unit_test" - }, - { - "name": "libapkverify.integration_test" - }, - { - "name": "libapkverify.test" }, { - "name": "microdroid_manager_test" + "path": "packages/modules/DnsResolver" } ] } diff --git a/benches/benchmarks.rs b/benches/benchmarks.rs index ddcb734..61d542f 100644 --- a/benches/benchmarks.rs +++ b/benches/benchmarks.rs @@ -1,27 +1,22 @@ -extern crate base64; #[macro_use] extern crate criterion; -extern crate rand; -use base64::display; use base64::{ - decode, decode_config_buf, decode_config_slice, encode, encode_config_buf, encode_config_slice, - write, Config, + display, + engine::{general_purpose::STANDARD, Engine}, + write, }; - -use criterion::{black_box, Bencher, Criterion, ParameterizedBenchmark, Throughput}; -use rand::{FromEntropy, Rng}; +use criterion::{black_box, Bencher, BenchmarkId, Criterion, Throughput}; +use rand::{Rng, SeedableRng}; use std::io::{self, Read, Write}; -const TEST_CONFIG: Config = base64::STANDARD; - fn do_decode_bench(b: &mut Bencher, &size: &usize) { let mut v: Vec<u8> = Vec::with_capacity(size * 3 / 4); fill(&mut v); - let encoded = encode(&v); + let encoded = STANDARD.encode(&v); b.iter(|| { - let orig = decode(&encoded); + let orig = STANDARD.decode(&encoded); black_box(&orig); }); } @@ -29,11 +24,11 @@ fn do_decode_bench(b: &mut Bencher, &size: &usize) { fn do_decode_bench_reuse_buf(b: &mut Bencher, &size: &usize) { let mut v: Vec<u8> = Vec::with_capacity(size * 3 / 4); fill(&mut v); - let encoded = encode(&v); + let encoded = STANDARD.encode(&v); let mut buf = Vec::new(); b.iter(|| { - decode_config_buf(&encoded, TEST_CONFIG, &mut buf).unwrap(); + STANDARD.decode_vec(&encoded, &mut buf).unwrap(); black_box(&buf); buf.clear(); }); @@ -42,12 +37,12 @@ fn do_decode_bench_reuse_buf(b: &mut Bencher, &size: &usize) { fn do_decode_bench_slice(b: &mut Bencher, &size: &usize) { let mut v: Vec<u8> = Vec::with_capacity(size * 3 / 4); fill(&mut v); - let encoded = encode(&v); + let encoded = STANDARD.encode(&v); let mut buf = Vec::new(); buf.resize(size, 0); b.iter(|| { - decode_config_slice(&encoded, TEST_CONFIG, &mut buf).unwrap(); + STANDARD.decode_slice(&encoded, &mut buf).unwrap(); black_box(&buf); }); } @@ -55,7 +50,7 @@ fn do_decode_bench_slice(b: &mut Bencher, &size: &usize) { fn do_decode_bench_stream(b: &mut Bencher, &size: &usize) { let mut v: Vec<u8> = Vec::with_capacity(size * 3 / 4); fill(&mut v); - let encoded = encode(&v); + let encoded = STANDARD.encode(&v); let mut buf = Vec::new(); buf.resize(size, 0); @@ -63,7 +58,7 @@ fn do_decode_bench_stream(b: &mut Bencher, &size: &usize) { b.iter(|| { let mut cursor = io::Cursor::new(&encoded[..]); - let mut decoder = base64::read::DecoderReader::new(&mut cursor, TEST_CONFIG); + let mut decoder = base64::read::DecoderReader::new(&mut cursor, &STANDARD); decoder.read_to_end(&mut buf).unwrap(); buf.clear(); black_box(&buf); @@ -74,7 +69,7 @@ fn do_encode_bench(b: &mut Bencher, &size: &usize) { let mut v: Vec<u8> = Vec::with_capacity(size); fill(&mut v); b.iter(|| { - let e = encode(&v); + let e = STANDARD.encode(&v); black_box(&e); }); } @@ -83,7 +78,7 @@ fn do_encode_bench_display(b: &mut Bencher, &size: &usize) { let mut v: Vec<u8> = Vec::with_capacity(size); fill(&mut v); b.iter(|| { - let e = format!("{}", display::Base64Display::with_config(&v, TEST_CONFIG)); + let e = format!("{}", display::Base64Display::new(&v, &STANDARD)); black_box(&e); }); } @@ -93,7 +88,7 @@ fn do_encode_bench_reuse_buf(b: &mut Bencher, &size: &usize) { fill(&mut v); let mut buf = String::new(); b.iter(|| { - encode_config_buf(&v, TEST_CONFIG, &mut buf); + STANDARD.encode_string(&v, &mut buf); buf.clear(); }); } @@ -104,9 +99,7 @@ fn do_encode_bench_slice(b: &mut Bencher, &size: &usize) { let mut buf = Vec::new(); // conservative estimate of encoded size buf.resize(v.len() * 2, 0); - b.iter(|| { - encode_config_slice(&v, TEST_CONFIG, &mut buf); - }); + b.iter(|| STANDARD.encode_slice(&v, &mut buf).unwrap()); } fn do_encode_bench_stream(b: &mut Bencher, &size: &usize) { @@ -117,7 +110,7 @@ fn do_encode_bench_stream(b: &mut Bencher, &size: &usize) { buf.reserve(size * 2); b.iter(|| { buf.clear(); - let mut stream_enc = write::EncoderWriter::new(&mut buf, TEST_CONFIG); + let mut stream_enc = write::EncoderWriter::new(&mut buf, &STANDARD); stream_enc.write_all(&v).unwrap(); stream_enc.flush().unwrap(); }); @@ -128,7 +121,7 @@ fn do_encode_bench_string_stream(b: &mut Bencher, &size: &usize) { fill(&mut v); b.iter(|| { - let mut stream_enc = write::EncoderStringWriter::new(TEST_CONFIG); + let mut stream_enc = write::EncoderStringWriter::new(&STANDARD); stream_enc.write_all(&v).unwrap(); stream_enc.flush().unwrap(); let _ = stream_enc.into_inner(); @@ -142,7 +135,7 @@ fn do_encode_bench_string_reuse_buf_stream(b: &mut Bencher, &size: &usize) { let mut buf = String::new(); b.iter(|| { buf.clear(); - let mut stream_enc = write::EncoderStringWriter::from(&mut buf, TEST_CONFIG); + let mut stream_enc = write::EncoderStringWriter::from_consumer(&mut buf, &STANDARD); stream_enc.write_all(&v).unwrap(); stream_enc.flush().unwrap(); let _ = stream_enc.into_inner(); @@ -164,46 +157,85 @@ const BYTE_SIZES: [usize; 5] = [3, 50, 100, 500, 3 * 1024]; // keep the benchmark runtime reasonable. const LARGE_BYTE_SIZES: [usize; 3] = [3 * 1024 * 1024, 10 * 1024 * 1024, 30 * 1024 * 1024]; -fn encode_benchmarks(byte_sizes: &[usize]) -> ParameterizedBenchmark<usize> { - ParameterizedBenchmark::new("encode", do_encode_bench, byte_sizes.iter().cloned()) +fn encode_benchmarks(c: &mut Criterion, label: &str, byte_sizes: &[usize]) { + let mut group = c.benchmark_group(label); + group .warm_up_time(std::time::Duration::from_millis(500)) - .measurement_time(std::time::Duration::from_secs(3)) - .throughput(|s| Throughput::Bytes(*s as u64)) - .with_function("encode_display", do_encode_bench_display) - .with_function("encode_reuse_buf", do_encode_bench_reuse_buf) - .with_function("encode_slice", do_encode_bench_slice) - .with_function("encode_reuse_buf_stream", do_encode_bench_stream) - .with_function("encode_string_stream", do_encode_bench_string_stream) - .with_function( - "encode_string_reuse_buf_stream", - do_encode_bench_string_reuse_buf_stream, - ) -} + .measurement_time(std::time::Duration::from_secs(3)); + + for size in byte_sizes { + group + .throughput(Throughput::Bytes(*size as u64)) + .bench_with_input(BenchmarkId::new("encode", size), size, do_encode_bench) + .bench_with_input( + BenchmarkId::new("encode_display", size), + size, + do_encode_bench_display, + ) + .bench_with_input( + BenchmarkId::new("encode_reuse_buf", size), + size, + do_encode_bench_reuse_buf, + ) + .bench_with_input( + BenchmarkId::new("encode_slice", size), + size, + do_encode_bench_slice, + ) + .bench_with_input( + BenchmarkId::new("encode_reuse_buf_stream", size), + size, + do_encode_bench_stream, + ) + .bench_with_input( + BenchmarkId::new("encode_string_stream", size), + size, + do_encode_bench_string_stream, + ) + .bench_with_input( + BenchmarkId::new("encode_string_reuse_buf_stream", size), + size, + do_encode_bench_string_reuse_buf_stream, + ); + } -fn decode_benchmarks(byte_sizes: &[usize]) -> ParameterizedBenchmark<usize> { - ParameterizedBenchmark::new("decode", do_decode_bench, byte_sizes.iter().cloned()) - .warm_up_time(std::time::Duration::from_millis(500)) - .measurement_time(std::time::Duration::from_secs(3)) - .throughput(|s| Throughput::Bytes(*s as u64)) - .with_function("decode_reuse_buf", do_decode_bench_reuse_buf) - .with_function("decode_slice", do_decode_bench_slice) - .with_function("decode_stream", do_decode_bench_stream) + group.finish(); } -fn bench(c: &mut Criterion) { - c.bench("bench_small_input", encode_benchmarks(&BYTE_SIZES[..])); - - c.bench( - "bench_large_input", - encode_benchmarks(&LARGE_BYTE_SIZES[..]).sample_size(10), - ); +fn decode_benchmarks(c: &mut Criterion, label: &str, byte_sizes: &[usize]) { + let mut group = c.benchmark_group(label); + + for size in byte_sizes { + group + .warm_up_time(std::time::Duration::from_millis(500)) + .measurement_time(std::time::Duration::from_secs(3)) + .throughput(Throughput::Bytes(*size as u64)) + .bench_with_input(BenchmarkId::new("decode", size), size, do_decode_bench) + .bench_with_input( + BenchmarkId::new("decode_reuse_buf", size), + size, + do_decode_bench_reuse_buf, + ) + .bench_with_input( + BenchmarkId::new("decode_slice", size), + size, + do_decode_bench_slice, + ) + .bench_with_input( + BenchmarkId::new("decode_stream", size), + size, + do_decode_bench_stream, + ); + } - c.bench("bench_small_input", decode_benchmarks(&BYTE_SIZES[..])); + group.finish(); +} - c.bench( - "bench_large_input", - decode_benchmarks(&LARGE_BYTE_SIZES[..]).sample_size(10), - ); +fn bench(c: &mut Criterion) { + encode_benchmarks(c, "encode_small_input", &BYTE_SIZES[..]); + encode_benchmarks(c, "encode_large_input", &LARGE_BYTE_SIZES[..]); + decode_benchmarks(c, "decode_small_input", &BYTE_SIZES[..]); + decode_benchmarks(c, "decode_large_input", &LARGE_BYTE_SIZES[..]); } criterion_group!(benches, bench); diff --git a/cargo2android.json b/cargo2android.json index d2d1ca1..1736c62 100644 --- a/cargo2android.json +++ b/cargo2android.json @@ -1,14 +1,10 @@ { "apex-available": [ "//apex_available:platform", - "com.android.resolv" + "com.android.resolv", + "com.android.virt" ], "device": true, "min-sdk-version": "29", - "run": true, - "test-blocklist": [ - "src/lib.rs", - "tests/tests.rs" - ], - "tests": true -}
\ No newline at end of file + "run": true +} diff --git a/clippy.toml b/clippy.toml new file mode 100644 index 0000000..23b32c1 --- /dev/null +++ b/clippy.toml @@ -0,0 +1 @@ +msrv = "1.57.0" diff --git a/examples/base64.rs b/examples/base64.rs index cba745b..0a214d2 100644 --- a/examples/base64.rs +++ b/examples/base64.rs @@ -4,37 +4,28 @@ use std::path::PathBuf; use std::process; use std::str::FromStr; -use base64::{read, write}; +use base64::{alphabet, engine, read, write}; use structopt::StructOpt; #[derive(Debug, StructOpt)] -enum CharacterSet { +enum Alphabet { Standard, UrlSafe, } -impl Default for CharacterSet { +impl Default for Alphabet { fn default() -> Self { - CharacterSet::Standard + Self::Standard } } -impl Into<base64::Config> for CharacterSet { - fn into(self) -> base64::Config { - match self { - CharacterSet::Standard => base64::STANDARD, - CharacterSet::UrlSafe => base64::URL_SAFE, - } - } -} - -impl FromStr for CharacterSet { +impl FromStr for Alphabet { type Err = String; - fn from_str(s: &str) -> Result<CharacterSet, String> { + fn from_str(s: &str) -> Result<Self, String> { match s { - "standard" => Ok(CharacterSet::Standard), - "urlsafe" => Ok(CharacterSet::UrlSafe), - _ => Err(format!("charset '{}' unrecognized", s)), + "standard" => Ok(Self::Standard), + "urlsafe" => Ok(Self::UrlSafe), + _ => Err(format!("alphabet '{}' unrecognized", s)), } } } @@ -45,10 +36,10 @@ struct Opt { /// decode data #[structopt(short = "d", long = "decode")] decode: bool, - /// The character set to choose. Defaults to the standard base64 character set. - /// Supported character sets include "standard" and "urlsafe". - #[structopt(long = "charset")] - charset: Option<CharacterSet>, + /// The alphabet to choose. Defaults to the standard base64 alphabet. + /// Supported alphabets include "standard" and "urlsafe". + #[structopt(long = "alphabet")] + alphabet: Option<Alphabet>, /// The file to encode/decode. #[structopt(parse(from_os_str))] file: Option<PathBuf>, @@ -68,14 +59,23 @@ fn main() { } Some(f) => Box::new(File::open(f).unwrap()), }; - let config = opt.charset.unwrap_or_default().into(); + + let alphabet = opt.alphabet.unwrap_or_default(); + let engine = engine::GeneralPurpose::new( + &match alphabet { + Alphabet::Standard => alphabet::STANDARD, + Alphabet::UrlSafe => alphabet::URL_SAFE, + }, + engine::general_purpose::PAD, + ); + let stdout = io::stdout(); let mut stdout = stdout.lock(); let r = if opt.decode { - let mut decoder = read::DecoderReader::new(&mut input, config); + let mut decoder = read::DecoderReader::new(&mut input, &engine); io::copy(&mut decoder, &mut stdout) } else { - let mut encoder = write::EncoderWriter::new(&mut stdout, config); + let mut encoder = write::EncoderWriter::new(&mut stdout, &engine); io::copy(&mut input, &mut encoder) }; if let Err(e) = r { diff --git a/examples/make_tables.rs b/examples/make_tables.rs deleted file mode 100644 index 2f27c0e..0000000 --- a/examples/make_tables.rs +++ /dev/null @@ -1,179 +0,0 @@ -use std::collections::{HashMap, HashSet}; -use std::iter::Iterator; - -fn main() { - println!("pub const INVALID_VALUE: u8 = 255;"); - - // A-Z - let standard_alphabet: Vec<u8> = (0x41..0x5B) - // a-z - .chain(0x61..0x7B) - // 0-9 - .chain(0x30..0x3A) - // + - .chain(0x2B..0x2C) - // / - .chain(0x2F..0x30) - .collect(); - print_encode_table(&standard_alphabet, "STANDARD_ENCODE", 0); - print_decode_table(&standard_alphabet, "STANDARD_DECODE", 0); - - // A-Z - let url_alphabet: Vec<u8> = (0x41..0x5B) - // a-z - .chain(0x61..0x7B) - // 0-9 - .chain(0x30..0x3A) - // - - .chain(0x2D..0x2E) - // _ - .chain(0x5F..0x60) - .collect(); - print_encode_table(&url_alphabet, "URL_SAFE_ENCODE", 0); - print_decode_table(&url_alphabet, "URL_SAFE_DECODE", 0); - - // ./0123456789 - let crypt_alphabet: Vec<u8> = (b'.'..(b'9' + 1)) - // A-Z - .chain(b'A'..(b'Z' + 1)) - // a-z - .chain(b'a'..(b'z' + 1)) - .collect(); - print_encode_table(&crypt_alphabet, "CRYPT_ENCODE", 0); - print_decode_table(&crypt_alphabet, "CRYPT_DECODE", 0); - - // ./ - let bcrypt_alphabet: Vec<u8> = (b'.'..(b'/' + 1)) - // A-Z - .chain(b'A'..(b'Z' + 1)) - // a-z - .chain(b'a'..(b'z' + 1)) - // 0-9 - .chain(b'0'..(b'9' + 1)) - .collect(); - print_encode_table(&bcrypt_alphabet, "BCRYPT_ENCODE", 0); - print_decode_table(&bcrypt_alphabet, "BCRYPT_DECODE", 0); - - // A-Z - let imap_alphabet: Vec<u8> = (0x41..0x5B) - // a-z - .chain(0x61..0x7B) - // 0-9 - .chain(0x30..0x3A) - // + - .chain(0x2B..0x2C) - // , - .chain(0x2C..0x2D) - .collect(); - print_encode_table(&imap_alphabet, "IMAP_MUTF7_ENCODE", 0); - print_decode_table(&imap_alphabet, "IMAP_MUTF7_DECODE", 0); - - // '!' - '-' - let binhex_alphabet: Vec<u8> = (0x21..0x2E) - // 0-9 - .chain(0x30..0x3A) - // @-N - .chain(0x40..0x4F) - // P-V - .chain(0x50..0x57) - // X-[ - .chain(0x58..0x5C) - // `-f - .chain(0x60..0x66) - // h-m - .chain(0x68..0x6E) - // p-r - .chain(0x70..0x73) - .collect(); - print_encode_table(&binhex_alphabet, "BINHEX_ENCODE", 0); - print_decode_table(&binhex_alphabet, "BINHEX_DECODE", 0); -} - -fn print_encode_table(alphabet: &[u8], const_name: &str, indent_depth: usize) { - check_alphabet(alphabet); - println!("#[rustfmt::skip]"); - println!( - "{:width$}pub const {}: &[u8; 64] = &[", - "", - const_name, - width = indent_depth - ); - - for (i, b) in alphabet.iter().enumerate() { - println!( - "{:width$}{}, // input {} (0x{:X}) => '{}' (0x{:X})", - "", - b, - i, - i, - String::from_utf8(vec![*b as u8]).unwrap(), - b, - width = indent_depth + 4 - ); - } - - println!("{:width$}];", "", width = indent_depth); -} - -fn print_decode_table(alphabet: &[u8], const_name: &str, indent_depth: usize) { - check_alphabet(alphabet); - // map of alphabet bytes to 6-bit morsels - let mut input_to_morsel = HashMap::<u8, u8>::new(); - - // standard base64 alphabet bytes, in order - for (morsel, ascii_byte) in alphabet.iter().enumerate() { - // truncation cast is fine here - let _ = input_to_morsel.insert(*ascii_byte, morsel as u8); - } - - println!("#[rustfmt::skip]"); - println!( - "{:width$}pub const {}: &[u8; 256] = &[", - "", - const_name, - width = indent_depth - ); - for ascii_byte in 0..256 { - let (value, comment) = match input_to_morsel.get(&(ascii_byte as u8)) { - None => ( - "INVALID_VALUE".to_string(), - format!("input {} (0x{:X})", ascii_byte, ascii_byte), - ), - Some(v) => ( - format!("{}", *v), - format!( - "input {} (0x{:X} char '{}') => {} (0x{:X})", - ascii_byte, - ascii_byte, - String::from_utf8(vec![ascii_byte as u8]).unwrap(), - *v, - *v - ), - ), - }; - - println!( - "{:width$}{}, // {}", - "", - value, - comment, - width = indent_depth + 4 - ); - } - println!("{:width$}];", "", width = indent_depth); -} - -fn check_alphabet(alphabet: &[u8]) { - // ensure all characters are distinct - assert_eq!(64, alphabet.len()); - let mut set: HashSet<u8> = HashSet::new(); - set.extend(alphabet); - assert_eq!(64, set.len()); - - // must be ASCII to be valid as single UTF-8 bytes - for &b in alphabet { - assert!(b <= 0x7F_u8); - // = is assumed to be padding, so cannot be used as a symbol - assert_ne!(b'=', b); - } -} diff --git a/patches/doc-string-fix.patch b/patches/doc-string-fix.patch new file mode 100644 index 0000000..15370bf --- /dev/null +++ b/patches/doc-string-fix.patch @@ -0,0 +1,41 @@ +From 27fc4ecc69aab7b31e23aefbeed10b252b176d5a Mon Sep 17 00:00:00 2001 +From: Chris Wailes <chriswailes@google.com> +Date: Thu, 9 Feb 2023 23:15:26 -0800 +Subject: [PATCH] Fix an error in the documentation strings + +This file explicitly turns on this warning meaning that it can't be +overridden by command line lint arguments. The rustdoc from version +1.66.1 will fail without this patch. + +Test: m rustdoc +Bug: 263153841 +Change-Id: Idcf3779cbd46300691232302bba10c46143a2dbc +--- + src/decode.rs | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/src/decode.rs b/src/decode.rs +index e349240..3f627c7 100644 +--- a/src/decode.rs ++++ b/src/decode.rs +@@ -71,7 +71,7 @@ impl error::Error for DecodeError { + } + + ///Decode from string reference as octets. +-///Returns a Result containing a Vec<u8>. ++///Returns a Result containing a `Vec<u8>`. + ///Convenience `decode_config(input, base64::STANDARD);`. + /// + ///# Example +@@ -90,7 +90,7 @@ pub fn decode<T: AsRef<[u8]>>(input: T) -> Result<Vec<u8>, DecodeError> { + } + + ///Decode from string reference as octets. +-///Returns a Result containing a Vec<u8>. ++///Returns a Result containing a `Vec<u8>`. + /// + ///# Example + /// +-- +2.39.1.581.gbfd45094c4-goog + diff --git a/patches/encoder.patch b/patches/encoder.patch deleted file mode 100644 index ac1cd73..0000000 --- a/patches/encoder.patch +++ /dev/null @@ -1,13 +0,0 @@ -diff --git a/src/write/encoder.rs b/src/write/encoder.rs -index 8a48f43..4bb57eb 100644 ---- a/src/write/encoder.rs -+++ b/src/write/encoder.rs -@@ -223,7 +223,7 @@ impl<W: Write> Write for EncoderWriter<W> { - /// Under non-error circumstances, this returns `Ok` with the value being the number of bytes - /// of `input` consumed. The value may be `0`, which interacts poorly with `write_all`, which - /// interprets `Ok(0)` as an error, despite it being allowed by the contract of `write`. See -- /// https://github.com/rust-lang/rust/issues/56889 for more on that. -+ /// <https://github.com/rust-lang/rust/issues/56889> for more on that. - /// - /// If the previous call to `write` provided more (encoded) data than the delegate writer could - /// accept in a single call to its `write`, the remaining data is buffered. As long as buffered diff --git a/src/alphabet.rs b/src/alphabet.rs new file mode 100644 index 0000000..7cd1b57 --- /dev/null +++ b/src/alphabet.rs @@ -0,0 +1,241 @@ +//! Provides [Alphabet] and constants for alphabets commonly used in the wild. + +use crate::PAD_BYTE; +use core::fmt; +#[cfg(any(feature = "std", test))] +use std::error; + +const ALPHABET_SIZE: usize = 64; + +/// An alphabet defines the 64 ASCII characters (symbols) used for base64. +/// +/// Common alphabets are provided as constants, and custom alphabets +/// can be made via `from_str` or the `TryFrom<str>` implementation. +/// +/// ``` +/// let custom = base64::alphabet::Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/").unwrap(); +/// +/// let engine = base64::engine::GeneralPurpose::new( +/// &custom, +/// base64::engine::general_purpose::PAD); +/// ``` +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct Alphabet { + pub(crate) symbols: [u8; ALPHABET_SIZE], +} + +impl Alphabet { + /// Performs no checks so that it can be const. + /// Used only for known-valid strings. + const fn from_str_unchecked(alphabet: &str) -> Self { + let mut symbols = [0_u8; ALPHABET_SIZE]; + let source_bytes = alphabet.as_bytes(); + + // a way to copy that's allowed in const fn + let mut index = 0; + while index < ALPHABET_SIZE { + symbols[index] = source_bytes[index]; + index += 1; + } + + Self { symbols } + } + + /// Create an `Alphabet` from a string of 64 unique printable ASCII bytes. + /// + /// The `=` byte is not allowed as it is used for padding. + pub const fn new(alphabet: &str) -> Result<Self, ParseAlphabetError> { + let bytes = alphabet.as_bytes(); + if bytes.len() != ALPHABET_SIZE { + return Err(ParseAlphabetError::InvalidLength); + } + + { + let mut index = 0; + while index < ALPHABET_SIZE { + let byte = bytes[index]; + + // must be ascii printable. 127 (DEL) is commonly considered printable + // for some reason but clearly unsuitable for base64. + if !(byte >= 32_u8 && byte <= 126_u8) { + return Err(ParseAlphabetError::UnprintableByte(byte)); + } + // = is assumed to be padding, so cannot be used as a symbol + if byte == PAD_BYTE { + return Err(ParseAlphabetError::ReservedByte(byte)); + } + + // Check for duplicates while staying within what const allows. + // It's n^2, but only over 64 hot bytes, and only once, so it's likely in the single digit + // microsecond range. + + let mut probe_index = 0; + while probe_index < ALPHABET_SIZE { + if probe_index == index { + probe_index += 1; + continue; + } + + let probe_byte = bytes[probe_index]; + + if byte == probe_byte { + return Err(ParseAlphabetError::DuplicatedByte(byte)); + } + + probe_index += 1; + } + + index += 1; + } + } + + Ok(Self::from_str_unchecked(alphabet)) + } +} + +impl TryFrom<&str> for Alphabet { + type Error = ParseAlphabetError; + + fn try_from(value: &str) -> Result<Self, Self::Error> { + Self::new(value) + } +} + +/// Possible errors when constructing an [Alphabet] from a `str`. +#[derive(Debug, Eq, PartialEq)] +pub enum ParseAlphabetError { + /// Alphabets must be 64 ASCII bytes + InvalidLength, + /// All bytes must be unique + DuplicatedByte(u8), + /// All bytes must be printable (in the range `[32, 126]`). + UnprintableByte(u8), + /// `=` cannot be used + ReservedByte(u8), +} + +impl fmt::Display for ParseAlphabetError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::InvalidLength => write!(f, "Invalid length - must be 64 bytes"), + Self::DuplicatedByte(b) => write!(f, "Duplicated byte: {:#04x}", b), + Self::UnprintableByte(b) => write!(f, "Unprintable byte: {:#04x}", b), + Self::ReservedByte(b) => write!(f, "Reserved byte: {:#04x}", b), + } + } +} + +#[cfg(any(feature = "std", test))] +impl error::Error for ParseAlphabetError {} + +/// The standard alphabet (uses `+` and `/`). +/// +/// See [RFC 3548](https://tools.ietf.org/html/rfc3548#section-3). +pub const STANDARD: Alphabet = Alphabet::from_str_unchecked( + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/", +); + +/// The URL safe alphabet (uses `-` and `_`). +/// +/// See [RFC 3548](https://tools.ietf.org/html/rfc3548#section-4). +pub const URL_SAFE: Alphabet = Alphabet::from_str_unchecked( + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_", +); + +/// The `crypt(3)` alphabet (uses `.` and `/` as the first two values). +/// +/// Not standardized, but folk wisdom on the net asserts that this alphabet is what crypt uses. +pub const CRYPT: Alphabet = Alphabet::from_str_unchecked( + "./0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz", +); + +/// The bcrypt alphabet. +pub const BCRYPT: Alphabet = Alphabet::from_str_unchecked( + "./ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789", +); + +/// The alphabet used in IMAP-modified UTF-7 (uses `+` and `,`). +/// +/// See [RFC 3501](https://tools.ietf.org/html/rfc3501#section-5.1.3) +pub const IMAP_MUTF7: Alphabet = Alphabet::from_str_unchecked( + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,", +); + +/// The alphabet used in BinHex 4.0 files. +/// +/// See [BinHex 4.0 Definition](http://files.stairways.com/other/binhex-40-specs-info.txt) +pub const BIN_HEX: Alphabet = Alphabet::from_str_unchecked( + "!\"#$%&'()*+,-0123456789@ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdehijklmpqr", +); + +#[cfg(test)] +mod tests { + use crate::alphabet::*; + use std::convert::TryFrom as _; + + #[test] + fn detects_duplicate_start() { + assert_eq!( + ParseAlphabetError::DuplicatedByte(b'A'), + Alphabet::new("AACDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/") + .unwrap_err() + ); + } + + #[test] + fn detects_duplicate_end() { + assert_eq!( + ParseAlphabetError::DuplicatedByte(b'/'), + Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789//") + .unwrap_err() + ); + } + + #[test] + fn detects_duplicate_middle() { + assert_eq!( + ParseAlphabetError::DuplicatedByte(b'Z'), + Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZZbcdefghijklmnopqrstuvwxyz0123456789+/") + .unwrap_err() + ); + } + + #[test] + fn detects_length() { + assert_eq!( + ParseAlphabetError::InvalidLength, + Alphabet::new( + "xxxxxxxxxABCDEFGHIJKLMNOPQRSTUVWXYZZbcdefghijklmnopqrstuvwxyz0123456789+/", + ) + .unwrap_err() + ); + } + + #[test] + fn detects_padding() { + assert_eq!( + ParseAlphabetError::ReservedByte(b'='), + Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+=") + .unwrap_err() + ); + } + + #[test] + fn detects_unprintable() { + // form feed + assert_eq!( + ParseAlphabetError::UnprintableByte(0xc), + Alphabet::new("\x0cBCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/") + .unwrap_err() + ); + } + + #[test] + fn same_as_unchecked() { + assert_eq!( + STANDARD, + Alphabet::try_from("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/") + .unwrap() + ); + } +} diff --git a/src/chunked_encoder.rs b/src/chunked_encoder.rs index bd45ec9..0457259 100644 --- a/src/chunked_encoder.rs +++ b/src/chunked_encoder.rs @@ -1,13 +1,12 @@ -use crate::{ - encode::{add_padding, encode_to_slice}, - Config, -}; #[cfg(any(feature = "alloc", feature = "std", test))] use alloc::string::String; use core::cmp; #[cfg(any(feature = "alloc", feature = "std", test))] use core::str; +use crate::encode::add_padding; +use crate::engine::{Config, Engine}; + /// The output mechanism for ChunkedEncoder's encoded bytes. pub trait Sink { type Error; @@ -19,23 +18,21 @@ pub trait Sink { const BUF_SIZE: usize = 1024; /// A base64 encoder that emits encoded bytes in chunks without heap allocation. -pub struct ChunkedEncoder { - config: Config, +pub struct ChunkedEncoder<'e, E: Engine + ?Sized> { + engine: &'e E, max_input_chunk_len: usize, } -impl ChunkedEncoder { - pub fn new(config: Config) -> ChunkedEncoder { +impl<'e, E: Engine + ?Sized> ChunkedEncoder<'e, E> { + pub fn new(engine: &'e E) -> ChunkedEncoder<'e, E> { ChunkedEncoder { - config, - max_input_chunk_len: max_input_length(BUF_SIZE, config), + engine, + max_input_chunk_len: max_input_length(BUF_SIZE, engine.config().encode_padding()), } } pub fn encode<S: Sink>(&self, bytes: &[u8], sink: &mut S) -> Result<(), S::Error> { let mut encode_buf: [u8; BUF_SIZE] = [0; BUF_SIZE]; - let encode_table = self.config.char_set.encode_table(); - let mut input_index = 0; while input_index < bytes.len() { @@ -44,12 +41,12 @@ impl ChunkedEncoder { let chunk = &bytes[input_index..(input_index + input_chunk_len)]; - let mut b64_bytes_written = encode_to_slice(chunk, &mut encode_buf, encode_table); + let mut b64_bytes_written = self.engine.internal_encode(chunk, &mut encode_buf); input_index += input_chunk_len; let more_input_left = input_index < bytes.len(); - if self.config.pad && !more_input_left { + if self.engine.config().encode_padding() && !more_input_left { // no more input, add padding if needed. Buffer will have room because // max_input_length leaves room for it. b64_bytes_written += add_padding(bytes.len(), &mut encode_buf[b64_bytes_written..]); @@ -69,8 +66,8 @@ impl ChunkedEncoder { /// /// The input length will always be a multiple of 3 so that no encoding state has to be carried over /// between chunks. -fn max_input_length(encoded_buf_len: usize, config: Config) -> usize { - let effective_buf_len = if config.pad { +fn max_input_length(encoded_buf_len: usize, padded: bool) -> usize { + let effective_buf_len = if padded { // make room for padding encoded_buf_len .checked_sub(2) @@ -109,26 +106,28 @@ impl<'a> Sink for StringSink<'a> { #[cfg(test)] pub mod tests { - use super::*; - use crate::{encode_config_buf, tests::random_config, CharacterSet, STANDARD}; - use rand::{ distributions::{Distribution, Uniform}, - FromEntropy, Rng, + Rng, SeedableRng, }; + use crate::{ + alphabet::STANDARD, + engine::general_purpose::{GeneralPurpose, GeneralPurposeConfig, PAD}, + tests::random_engine, + }; + + use super::*; + #[test] fn chunked_encode_empty() { - assert_eq!("", chunked_encode_str(&[], STANDARD)); + assert_eq!("", chunked_encode_str(&[], PAD)); } #[test] fn chunked_encode_intermediate_fast_loop() { // > 8 bytes input, will enter the pretty fast loop - assert_eq!( - "Zm9vYmFyYmF6cXV4", - chunked_encode_str(b"foobarbazqux", STANDARD) - ); + assert_eq!("Zm9vYmFyYmF6cXV4", chunked_encode_str(b"foobarbazqux", PAD)); } #[test] @@ -136,14 +135,14 @@ pub mod tests { // > 32 bytes input, will enter the uber fast loop assert_eq!( "Zm9vYmFyYmF6cXV4cXV1eGNvcmdlZ3JhdWx0Z2FycGx5eg==", - chunked_encode_str(b"foobarbazquxquuxcorgegraultgarplyz", STANDARD) + chunked_encode_str(b"foobarbazquxquuxcorgegraultgarplyz", PAD) ); } #[test] fn chunked_encode_slow_loop_only() { // < 8 bytes input, slow loop only - assert_eq!("Zm9vYmFy", chunked_encode_str(b"foobar", STANDARD)); + assert_eq!("Zm9vYmFy", chunked_encode_str(b"foobar", PAD)); } #[test] @@ -154,32 +153,27 @@ pub mod tests { #[test] fn max_input_length_no_pad() { - let config = config_with_pad(false); - assert_eq!(768, max_input_length(1024, config)); + assert_eq!(768, max_input_length(1024, false)); } #[test] fn max_input_length_with_pad_decrements_one_triple() { - let config = config_with_pad(true); - assert_eq!(765, max_input_length(1024, config)); + assert_eq!(765, max_input_length(1024, true)); } #[test] fn max_input_length_with_pad_one_byte_short() { - let config = config_with_pad(true); - assert_eq!(765, max_input_length(1025, config)); + assert_eq!(765, max_input_length(1025, true)); } #[test] fn max_input_length_with_pad_fits_exactly() { - let config = config_with_pad(true); - assert_eq!(768, max_input_length(1026, config)); + assert_eq!(768, max_input_length(1026, true)); } #[test] fn max_input_length_cant_use_extra_single_encoded_byte() { - let config = Config::new(crate::CharacterSet::Standard, false); - assert_eq!(300, max_input_length(401, config)); + assert_eq!(300, max_input_length(401, false)); } pub fn chunked_encode_matches_normal_encode_random<S: SinkTestHelper>(sink_test_helper: &S) { @@ -197,49 +191,39 @@ pub mod tests { input_buf.push(rng.gen()); } - let config = random_config(&mut rng); + let engine = random_engine(&mut rng); - let chunk_encoded_string = sink_test_helper.encode_to_string(config, &input_buf); - encode_config_buf(&input_buf, config, &mut output_buf); + let chunk_encoded_string = sink_test_helper.encode_to_string(&engine, &input_buf); + engine.encode_string(&input_buf, &mut output_buf); - assert_eq!( - output_buf, chunk_encoded_string, - "input len={}, config: pad={}", - buf_len, config.pad - ); + assert_eq!(output_buf, chunk_encoded_string, "input len={}", buf_len); } } - fn chunked_encode_str(bytes: &[u8], config: Config) -> String { + fn chunked_encode_str(bytes: &[u8], config: GeneralPurposeConfig) -> String { let mut s = String::new(); - { - let mut sink = StringSink::new(&mut s); - let encoder = ChunkedEncoder::new(config); - encoder.encode(bytes, &mut sink).unwrap(); - } - return s; - } + let mut sink = StringSink::new(&mut s); + let engine = GeneralPurpose::new(&STANDARD, config); + let encoder = ChunkedEncoder::new(&engine); + encoder.encode(bytes, &mut sink).unwrap(); - fn config_with_pad(pad: bool) -> Config { - Config::new(CharacterSet::Standard, pad) + s } // An abstraction around sinks so that we can have tests that easily to any sink implementation pub trait SinkTestHelper { - fn encode_to_string(&self, config: Config, bytes: &[u8]) -> String; + fn encode_to_string<E: Engine>(&self, engine: &E, bytes: &[u8]) -> String; } struct StringSinkTestHelper; impl SinkTestHelper for StringSinkTestHelper { - fn encode_to_string(&self, config: Config, bytes: &[u8]) -> String { - let encoder = ChunkedEncoder::new(config); + fn encode_to_string<E: Engine>(&self, engine: &E, bytes: &[u8]) -> String { + let encoder = ChunkedEncoder::new(engine); let mut s = String::new(); - { - let mut sink = StringSink::new(&mut s); - encoder.encode(bytes, &mut sink).unwrap(); - } + let mut sink = StringSink::new(&mut s); + encoder.encode(bytes, &mut sink).unwrap(); s } diff --git a/src/decode.rs b/src/decode.rs index 4cc937d..0471518 100644 --- a/src/decode.rs +++ b/src/decode.rs @@ -1,32 +1,15 @@ -use crate::{tables, Config, PAD_BYTE}; - -#[cfg(any(feature = "alloc", feature = "std", test))] -use crate::STANDARD; +use crate::engine::{general_purpose::STANDARD, DecodeEstimate, Engine}; #[cfg(any(feature = "alloc", feature = "std", test))] use alloc::vec::Vec; use core::fmt; #[cfg(any(feature = "std", test))] use std::error; -// decode logic operates on chunks of 8 input bytes without padding -const INPUT_CHUNK_LEN: usize = 8; -const DECODED_CHUNK_LEN: usize = 6; -// we read a u64 and write a u64, but a u64 of input only yields 6 bytes of output, so the last -// 2 bytes of any output u64 should not be counted as written to (but must be available in a -// slice). -const DECODED_CHUNK_SUFFIX: usize = 2; - -// how many u64's of input to handle at a time -const CHUNKS_PER_FAST_LOOP_BLOCK: usize = 4; -const INPUT_BLOCK_LEN: usize = CHUNKS_PER_FAST_LOOP_BLOCK * INPUT_CHUNK_LEN; -// includes the trailing 2 bytes for the final u64 write -const DECODED_BLOCK_LEN: usize = - CHUNKS_PER_FAST_LOOP_BLOCK * DECODED_CHUNK_LEN + DECODED_CHUNK_SUFFIX; - /// Errors that can occur while decoding. #[derive(Clone, Debug, PartialEq, Eq)] pub enum DecodeError { /// An invalid byte was found in the input. The offset and offending byte are provided. + /// Padding characters (`=`) interspersed in the encoded form will be treated as invalid bytes. InvalidByte(usize, u8), /// The length of the input is invalid. /// A typical cause of this is stray trailing whitespace or other separator bytes. @@ -36,561 +19,160 @@ pub enum DecodeError { InvalidLength, /// The last non-padding input symbol's encoded 6 bits have nonzero bits that will be discarded. /// This is indicative of corrupted or truncated Base64. - /// Unlike InvalidByte, which reports symbols that aren't in the alphabet, this error is for + /// Unlike `InvalidByte`, which reports symbols that aren't in the alphabet, this error is for /// symbols that are in the alphabet but represent nonsensical encodings. InvalidLastSymbol(usize, u8), + /// The nature of the padding was not as configured: absent or incorrect when it must be + /// canonical, or present when it must be absent, etc. + InvalidPadding, } impl fmt::Display for DecodeError { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match *self { - DecodeError::InvalidByte(index, byte) => { - write!(f, "Invalid byte {}, offset {}.", byte, index) - } - DecodeError::InvalidLength => write!(f, "Encoded text cannot have a 6-bit remainder."), - DecodeError::InvalidLastSymbol(index, byte) => { + Self::InvalidByte(index, byte) => write!(f, "Invalid byte {}, offset {}.", byte, index), + Self::InvalidLength => write!(f, "Encoded text cannot have a 6-bit remainder."), + Self::InvalidLastSymbol(index, byte) => { write!(f, "Invalid last symbol {}, offset {}.", byte, index) } + Self::InvalidPadding => write!(f, "Invalid padding"), } } } #[cfg(any(feature = "std", test))] impl error::Error for DecodeError { - fn description(&self) -> &str { - match *self { - DecodeError::InvalidByte(_, _) => "invalid byte", - DecodeError::InvalidLength => "invalid length", - DecodeError::InvalidLastSymbol(_, _) => "invalid last symbol", + fn cause(&self) -> Option<&dyn error::Error> { + None + } +} + +/// Errors that can occur while decoding into a slice. +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum DecodeSliceError { + /// A [DecodeError] occurred + DecodeError(DecodeError), + /// The provided slice _may_ be too small. + /// + /// The check is conservative (assumes the last triplet of output bytes will all be needed). + OutputSliceTooSmall, +} + +impl fmt::Display for DecodeSliceError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::DecodeError(e) => write!(f, "DecodeError: {}", e), + Self::OutputSliceTooSmall => write!(f, "Output slice too small"), } } +} +#[cfg(any(feature = "std", test))] +impl error::Error for DecodeSliceError { fn cause(&self) -> Option<&dyn error::Error> { - None + match self { + DecodeSliceError::DecodeError(e) => Some(e), + DecodeSliceError::OutputSliceTooSmall => None, + } } } -///Decode from string reference as octets. -///Returns a Result containing a Vec<u8>. -///Convenience `decode_config(input, base64::STANDARD);`. -/// -///# Example -/// -///```rust -///extern crate base64; +impl From<DecodeError> for DecodeSliceError { + fn from(e: DecodeError) -> Self { + DecodeSliceError::DecodeError(e) + } +} + +/// Decode base64 using the [`STANDARD` engine](STANDARD). /// -///fn main() { -/// let bytes = base64::decode("aGVsbG8gd29ybGQ=").unwrap(); -/// println!("{:?}", bytes); -///} -///``` +/// See [Engine::decode]. +#[deprecated(since = "0.21.0", note = "Use Engine::decode")] #[cfg(any(feature = "alloc", feature = "std", test))] pub fn decode<T: AsRef<[u8]>>(input: T) -> Result<Vec<u8>, DecodeError> { - decode_config(input, STANDARD) + STANDARD.decode(input) } -///Decode from string reference as octets. -///Returns a Result containing a Vec<u8>. -/// -///# Example -/// -///```rust -///extern crate base64; +/// Decode from string reference as octets using the specified [Engine]. /// -///fn main() { -/// let bytes = base64::decode_config("aGVsbG8gd29ybGR+Cg==", base64::STANDARD).unwrap(); -/// println!("{:?}", bytes); -/// -/// let bytes_url = base64::decode_config("aGVsbG8gaW50ZXJuZXR-Cg==", base64::URL_SAFE).unwrap(); -/// println!("{:?}", bytes_url); -///} -///``` +/// See [Engine::decode]. +///Returns a `Result` containing a `Vec<u8>`. +#[deprecated(since = "0.21.0", note = "Use Engine::decode")] #[cfg(any(feature = "alloc", feature = "std", test))] -pub fn decode_config<T: AsRef<[u8]>>(input: T, config: Config) -> Result<Vec<u8>, DecodeError> { - let mut buffer = Vec::<u8>::with_capacity(input.as_ref().len() * 4 / 3); - - decode_config_buf(input, config, &mut buffer).map(|_| buffer) +pub fn decode_engine<E: Engine, T: AsRef<[u8]>>( + input: T, + engine: &E, +) -> Result<Vec<u8>, DecodeError> { + engine.decode(input) } -///Decode from string reference as octets. -///Writes into the supplied buffer to avoid allocation. -///Returns a Result containing an empty tuple, aka (). -/// -///# Example -/// -///```rust -///extern crate base64; -/// -///fn main() { -/// let mut buffer = Vec::<u8>::new(); -/// base64::decode_config_buf("aGVsbG8gd29ybGR+Cg==", base64::STANDARD, &mut buffer).unwrap(); -/// println!("{:?}", buffer); +/// Decode from string reference as octets. /// -/// buffer.clear(); -/// -/// base64::decode_config_buf("aGVsbG8gaW50ZXJuZXR-Cg==", base64::URL_SAFE, &mut buffer) -/// .unwrap(); -/// println!("{:?}", buffer); -///} -///``` +/// See [Engine::decode_vec]. #[cfg(any(feature = "alloc", feature = "std", test))] -pub fn decode_config_buf<T: AsRef<[u8]>>( +#[deprecated(since = "0.21.0", note = "Use Engine::decode_vec")] +pub fn decode_engine_vec<E: Engine, T: AsRef<[u8]>>( input: T, - config: Config, buffer: &mut Vec<u8>, + engine: &E, ) -> Result<(), DecodeError> { - let input_bytes = input.as_ref(); - - let starting_output_len = buffer.len(); - - let num_chunks = num_chunks(input_bytes); - let decoded_len_estimate = num_chunks - .checked_mul(DECODED_CHUNK_LEN) - .and_then(|p| p.checked_add(starting_output_len)) - .expect("Overflow when calculating output buffer length"); - buffer.resize(decoded_len_estimate, 0); - - let bytes_written; - { - let buffer_slice = &mut buffer.as_mut_slice()[starting_output_len..]; - bytes_written = decode_helper(input_bytes, num_chunks, config, buffer_slice)?; - } - - buffer.truncate(starting_output_len + bytes_written); - - Ok(()) + engine.decode_vec(input, buffer) } /// Decode the input into the provided output slice. /// -/// This will not write any bytes past exactly what is decoded (no stray garbage bytes at the end). -/// -/// If you don't know ahead of time what the decoded length should be, size your buffer with a -/// conservative estimate for the decoded length of an input: 3 bytes of output for every 4 bytes of -/// input, rounded up, or in other words `(input_len + 3) / 4 * 3`. -/// -/// If the slice is not large enough, this will panic. -pub fn decode_config_slice<T: AsRef<[u8]>>( +/// See [Engine::decode_slice]. +#[deprecated(since = "0.21.0", note = "Use Engine::decode_slice")] +pub fn decode_engine_slice<E: Engine, T: AsRef<[u8]>>( input: T, - config: Config, output: &mut [u8], -) -> Result<usize, DecodeError> { - let input_bytes = input.as_ref(); - - decode_helper(input_bytes, num_chunks(input_bytes), config, output) -} - -/// Return the number of input chunks (including a possibly partial final chunk) in the input -fn num_chunks(input: &[u8]) -> usize { - input - .len() - .checked_add(INPUT_CHUNK_LEN - 1) - .expect("Overflow when calculating number of chunks in input") - / INPUT_CHUNK_LEN + engine: &E, +) -> Result<usize, DecodeSliceError> { + engine.decode_slice(input, output) } -/// Helper to avoid duplicating num_chunks calculation, which is costly on short inputs. -/// Returns the number of bytes written, or an error. -// We're on the fragile edge of compiler heuristics here. If this is not inlined, slow. If this is -// inlined(always), a different slow. plain ol' inline makes the benchmarks happiest at the moment, -// but this is fragile and the best setting changes with only minor code modifications. -#[inline] -fn decode_helper( - input: &[u8], - num_chunks: usize, - config: Config, - output: &mut [u8], -) -> Result<usize, DecodeError> { - let char_set = config.char_set; - let decode_table = char_set.decode_table(); - - let remainder_len = input.len() % INPUT_CHUNK_LEN; - - // Because the fast decode loop writes in groups of 8 bytes (unrolled to - // CHUNKS_PER_FAST_LOOP_BLOCK times 8 bytes, where possible) and outputs 8 bytes at a time (of - // which only 6 are valid data), we need to be sure that we stop using the fast decode loop - // soon enough that there will always be 2 more bytes of valid data written after that loop. - let trailing_bytes_to_skip = match remainder_len { - // if input is a multiple of the chunk size, ignore the last chunk as it may have padding, - // and the fast decode logic cannot handle padding - 0 => INPUT_CHUNK_LEN, - // 1 and 5 trailing bytes are illegal: can't decode 6 bits of input into a byte - 1 | 5 => { - // trailing whitespace is so common that it's worth it to check the last byte to - // possibly return a better error message - if let Some(b) = input.last() { - if *b != PAD_BYTE && decode_table[*b as usize] == tables::INVALID_VALUE { - return Err(DecodeError::InvalidByte(input.len() - 1, *b)); - } - } - - return Err(DecodeError::InvalidLength); - } - // This will decode to one output byte, which isn't enough to overwrite the 2 extra bytes - // written by the fast decode loop. So, we have to ignore both these 2 bytes and the - // previous chunk. - 2 => INPUT_CHUNK_LEN + 2, - // If this is 3 unpadded chars, then it would actually decode to 2 bytes. However, if this - // is an erroneous 2 chars + 1 pad char that would decode to 1 byte, then it should fail - // with an error, not panic from going past the bounds of the output slice, so we let it - // use stage 3 + 4. - 3 => INPUT_CHUNK_LEN + 3, - // This can also decode to one output byte because it may be 2 input chars + 2 padding - // chars, which would decode to 1 byte. - 4 => INPUT_CHUNK_LEN + 4, - // Everything else is a legal decode len (given that we don't require padding), and will - // decode to at least 2 bytes of output. - _ => remainder_len, - }; - - // rounded up to include partial chunks - let mut remaining_chunks = num_chunks; - - let mut input_index = 0; - let mut output_index = 0; - - { - let length_of_fast_decode_chunks = input.len().saturating_sub(trailing_bytes_to_skip); - - // Fast loop, stage 1 - // manual unroll to CHUNKS_PER_FAST_LOOP_BLOCK of u64s to amortize slice bounds checks - if let Some(max_start_index) = length_of_fast_decode_chunks.checked_sub(INPUT_BLOCK_LEN) { - while input_index <= max_start_index { - let input_slice = &input[input_index..(input_index + INPUT_BLOCK_LEN)]; - let output_slice = &mut output[output_index..(output_index + DECODED_BLOCK_LEN)]; - - decode_chunk( - &input_slice[0..], - input_index, - decode_table, - &mut output_slice[0..], - )?; - decode_chunk( - &input_slice[8..], - input_index + 8, - decode_table, - &mut output_slice[6..], - )?; - decode_chunk( - &input_slice[16..], - input_index + 16, - decode_table, - &mut output_slice[12..], - )?; - decode_chunk( - &input_slice[24..], - input_index + 24, - decode_table, - &mut output_slice[18..], - )?; - - input_index += INPUT_BLOCK_LEN; - output_index += DECODED_BLOCK_LEN - DECODED_CHUNK_SUFFIX; - remaining_chunks -= CHUNKS_PER_FAST_LOOP_BLOCK; - } - } - - // Fast loop, stage 2 (aka still pretty fast loop) - // 8 bytes at a time for whatever we didn't do in stage 1. - if let Some(max_start_index) = length_of_fast_decode_chunks.checked_sub(INPUT_CHUNK_LEN) { - while input_index < max_start_index { - decode_chunk( - &input[input_index..(input_index + INPUT_CHUNK_LEN)], - input_index, - decode_table, - &mut output - [output_index..(output_index + DECODED_CHUNK_LEN + DECODED_CHUNK_SUFFIX)], - )?; - - output_index += DECODED_CHUNK_LEN; - input_index += INPUT_CHUNK_LEN; - remaining_chunks -= 1; - } - } - } - - // Stage 3 - // If input length was such that a chunk had to be deferred until after the fast loop - // because decoding it would have produced 2 trailing bytes that wouldn't then be - // overwritten, we decode that chunk here. This way is slower but doesn't write the 2 - // trailing bytes. - // However, we still need to avoid the last chunk (partial or complete) because it could - // have padding, so we always do 1 fewer to avoid the last chunk. - for _ in 1..remaining_chunks { - decode_chunk_precise( - &input[input_index..], - input_index, - decode_table, - &mut output[output_index..(output_index + DECODED_CHUNK_LEN)], - )?; - - input_index += INPUT_CHUNK_LEN; - output_index += DECODED_CHUNK_LEN; - } - - // always have one more (possibly partial) block of 8 input - debug_assert!(input.len() - input_index > 1 || input.is_empty()); - debug_assert!(input.len() - input_index <= 8); - - // Stage 4 - // Finally, decode any leftovers that aren't a complete input block of 8 bytes. - // Use a u64 as a stack-resident 8 byte buffer. - let mut leftover_bits: u64 = 0; - let mut morsels_in_leftover = 0; - let mut padding_bytes = 0; - let mut first_padding_index: usize = 0; - let mut last_symbol = 0_u8; - let start_of_leftovers = input_index; - for (i, b) in input[start_of_leftovers..].iter().enumerate() { - // '=' padding - if *b == PAD_BYTE { - // There can be bad padding in a few ways: - // 1 - Padding with non-padding characters after it - // 2 - Padding after zero or one non-padding characters before it - // in the current quad. - // 3 - More than two characters of padding. If 3 or 4 padding chars - // are in the same quad, that implies it will be caught by #2. - // If it spreads from one quad to another, it will be caught by - // #2 in the second quad. - - if i % 4 < 2 { - // Check for case #2. - let bad_padding_index = start_of_leftovers - + if padding_bytes > 0 { - // If we've already seen padding, report the first padding index. - // This is to be consistent with the faster logic above: it will report an - // error on the first padding character (since it doesn't expect to see - // anything but actual encoded data). - first_padding_index - } else { - // haven't seen padding before, just use where we are now - i - }; - return Err(DecodeError::InvalidByte(bad_padding_index, *b)); - } - - if padding_bytes == 0 { - first_padding_index = i; - } - - padding_bytes += 1; - continue; - } - - // Check for case #1. - // To make '=' handling consistent with the main loop, don't allow - // non-suffix '=' in trailing chunk either. Report error as first - // erroneous padding. - if padding_bytes > 0 { - return Err(DecodeError::InvalidByte( - start_of_leftovers + first_padding_index, - PAD_BYTE, - )); - } - last_symbol = *b; - - // can use up to 8 * 6 = 48 bits of the u64, if last chunk has no padding. - // To minimize shifts, pack the leftovers from left to right. - let shift = 64 - (morsels_in_leftover + 1) * 6; - // tables are all 256 elements, lookup with a u8 index always succeeds - let morsel = decode_table[*b as usize]; - if morsel == tables::INVALID_VALUE { - return Err(DecodeError::InvalidByte(start_of_leftovers + i, *b)); - } - - leftover_bits |= (morsel as u64) << shift; - morsels_in_leftover += 1; - } - - let leftover_bits_ready_to_append = match morsels_in_leftover { - 0 => 0, - 2 => 8, - 3 => 16, - 4 => 24, - 6 => 32, - 7 => 40, - 8 => 48, - _ => unreachable!( - "Impossible: must only have 0 to 8 input bytes in last chunk, with no invalid lengths" - ), - }; - - // if there are bits set outside the bits we care about, last symbol encodes trailing bits that - // will not be included in the output - let mask = !0 >> leftover_bits_ready_to_append; - if !config.decode_allow_trailing_bits && (leftover_bits & mask) != 0 { - // last morsel is at `morsels_in_leftover` - 1 - return Err(DecodeError::InvalidLastSymbol( - start_of_leftovers + morsels_in_leftover - 1, - last_symbol, - )); - } - - let mut leftover_bits_appended_to_buf = 0; - while leftover_bits_appended_to_buf < leftover_bits_ready_to_append { - // `as` simply truncates the higher bits, which is what we want here - let selected_bits = (leftover_bits >> (56 - leftover_bits_appended_to_buf)) as u8; - output[output_index] = selected_bits; - output_index += 1; - - leftover_bits_appended_to_buf += 8; - } - - Ok(output_index) -} - -#[inline] -fn write_u64(output: &mut [u8], value: u64) { - output[..8].copy_from_slice(&value.to_be_bytes()); -} - -/// Decode 8 bytes of input into 6 bytes of output. 8 bytes of output will be written, but only the -/// first 6 of those contain meaningful data. +/// Returns a conservative estimate of the decoded size of `encoded_len` base64 symbols (rounded up +/// to the next group of 3 decoded bytes). /// -/// `input` is the bytes to decode, of which the first 8 bytes will be processed. -/// `index_at_start_of_input` is the offset in the overall input (used for reporting errors -/// accurately) -/// `decode_table` is the lookup table for the particular base64 alphabet. -/// `output` will have its first 8 bytes overwritten, of which only the first 6 are valid decoded -/// data. -// yes, really inline (worth 30-50% speedup) -#[inline(always)] -fn decode_chunk( - input: &[u8], - index_at_start_of_input: usize, - decode_table: &[u8; 256], - output: &mut [u8], -) -> Result<(), DecodeError> { - let mut accum: u64; - - let morsel = decode_table[input[0] as usize]; - if morsel == tables::INVALID_VALUE { - return Err(DecodeError::InvalidByte(index_at_start_of_input, input[0])); - } - accum = (morsel as u64) << 58; - - let morsel = decode_table[input[1] as usize]; - if morsel == tables::INVALID_VALUE { - return Err(DecodeError::InvalidByte( - index_at_start_of_input + 1, - input[1], - )); - } - accum |= (morsel as u64) << 52; - - let morsel = decode_table[input[2] as usize]; - if morsel == tables::INVALID_VALUE { - return Err(DecodeError::InvalidByte( - index_at_start_of_input + 2, - input[2], - )); - } - accum |= (morsel as u64) << 46; - - let morsel = decode_table[input[3] as usize]; - if morsel == tables::INVALID_VALUE { - return Err(DecodeError::InvalidByte( - index_at_start_of_input + 3, - input[3], - )); - } - accum |= (morsel as u64) << 40; - - let morsel = decode_table[input[4] as usize]; - if morsel == tables::INVALID_VALUE { - return Err(DecodeError::InvalidByte( - index_at_start_of_input + 4, - input[4], - )); - } - accum |= (morsel as u64) << 34; - - let morsel = decode_table[input[5] as usize]; - if morsel == tables::INVALID_VALUE { - return Err(DecodeError::InvalidByte( - index_at_start_of_input + 5, - input[5], - )); - } - accum |= (morsel as u64) << 28; - - let morsel = decode_table[input[6] as usize]; - if morsel == tables::INVALID_VALUE { - return Err(DecodeError::InvalidByte( - index_at_start_of_input + 6, - input[6], - )); - } - accum |= (morsel as u64) << 22; - - let morsel = decode_table[input[7] as usize]; - if morsel == tables::INVALID_VALUE { - return Err(DecodeError::InvalidByte( - index_at_start_of_input + 7, - input[7], - )); - } - accum |= (morsel as u64) << 16; - - write_u64(output, accum); - - Ok(()) -} - -/// Decode an 8-byte chunk, but only write the 6 bytes actually decoded instead of including 2 -/// trailing garbage bytes. -#[inline] -fn decode_chunk_precise( - input: &[u8], - index_at_start_of_input: usize, - decode_table: &[u8; 256], - output: &mut [u8], -) -> Result<(), DecodeError> { - let mut tmp_buf = [0_u8; 8]; - - decode_chunk( - input, - index_at_start_of_input, - decode_table, - &mut tmp_buf[..], - )?; - - output[0..6].copy_from_slice(&tmp_buf[0..6]); - - Ok(()) +/// The resulting length will be a safe choice for the size of a decode buffer, but may have up to +/// 2 trailing bytes that won't end up being needed. +/// +/// # Examples +/// +/// ``` +/// use base64::decoded_len_estimate; +/// +/// assert_eq!(3, decoded_len_estimate(1)); +/// assert_eq!(3, decoded_len_estimate(2)); +/// assert_eq!(3, decoded_len_estimate(3)); +/// assert_eq!(3, decoded_len_estimate(4)); +/// // start of the next quad of encoded symbols +/// assert_eq!(6, decoded_len_estimate(5)); +/// ``` +/// +/// # Panics +/// +/// Panics if decoded length estimation overflows. +/// This would happen for sizes within a few bytes of the maximum value of `usize`. +pub fn decoded_len_estimate(encoded_len: usize) -> usize { + STANDARD + .internal_decoded_len_estimate(encoded_len) + .decoded_len_estimate() } #[cfg(test)] mod tests { use super::*; use crate::{ - encode::encode_config_buf, - encode::encode_config_slice, - tests::{assert_encode_sanity, random_config}, + alphabet, + engine::{general_purpose, Config, GeneralPurpose}, + tests::{assert_encode_sanity, random_engine}, }; - use rand::{ distributions::{Distribution, Uniform}, - FromEntropy, Rng, + Rng, SeedableRng, }; #[test] - fn decode_chunk_precise_writes_only_6_bytes() { - let input = b"Zm9vYmFy"; // "foobar" - let mut output = [0_u8, 1, 2, 3, 4, 5, 6, 7]; - decode_chunk_precise(&input[..], 0, tables::STANDARD_DECODE, &mut output).unwrap(); - assert_eq!(&vec![b'f', b'o', b'o', b'b', b'a', b'r', 6, 7], &output); - } - - #[test] - fn decode_chunk_writes_8_bytes() { - let input = b"Zm9vYmFy"; // "foobar" - let mut output = [0_u8, 1, 2, 3, 4, 5, 6, 7]; - decode_chunk(&input[..], 0, tables::STANDARD_DECODE, &mut output).unwrap(); - assert_eq!(&vec![b'f', b'o', b'o', b'b', b'a', b'r', 0, 0], &output); - } - - #[test] fn decode_into_nonempty_vec_doesnt_clobber_existing_prefix() { let mut orig_data = Vec::new(); let mut encoded_data = String::new(); @@ -616,9 +198,9 @@ mod tests { orig_data.push(rng.gen()); } - let config = random_config(&mut rng); - encode_config_buf(&orig_data, config, &mut encoded_data); - assert_encode_sanity(&encoded_data, config, input_len); + let engine = random_engine(&mut rng); + engine.encode_string(&orig_data, &mut encoded_data); + assert_encode_sanity(&encoded_data, engine.config().encode_padding(), input_len); let prefix_len = prefix_len_range.sample(&mut rng); @@ -631,9 +213,13 @@ mod tests { decoded_with_prefix.copy_from_slice(&prefix); // decode into the non-empty buf - decode_config_buf(&encoded_data, config, &mut decoded_with_prefix).unwrap(); + engine + .decode_vec(&encoded_data, &mut decoded_with_prefix) + .unwrap(); // also decode into the empty buf - decode_config_buf(&encoded_data, config, &mut decoded_without_prefix).unwrap(); + engine + .decode_vec(&encoded_data, &mut decoded_without_prefix) + .unwrap(); assert_eq!( prefix_len + decoded_without_prefix.len(), @@ -649,7 +235,66 @@ mod tests { } #[test] - fn decode_into_slice_doesnt_clobber_existing_prefix_or_suffix() { + fn decode_slice_doesnt_clobber_existing_prefix_or_suffix() { + do_decode_slice_doesnt_clobber_existing_prefix_or_suffix(|e, input, output| { + e.decode_slice(input, output).unwrap() + }) + } + + #[test] + fn decode_slice_unchecked_doesnt_clobber_existing_prefix_or_suffix() { + do_decode_slice_doesnt_clobber_existing_prefix_or_suffix(|e, input, output| { + e.decode_slice_unchecked(input, output).unwrap() + }) + } + + #[test] + fn decode_engine_estimation_works_for_various_lengths() { + let engine = GeneralPurpose::new(&alphabet::STANDARD, general_purpose::NO_PAD); + for num_prefix_quads in 0..100 { + for suffix in &["AA", "AAA", "AAAA"] { + let mut prefix = "AAAA".repeat(num_prefix_quads); + prefix.push_str(suffix); + // make sure no overflow (and thus a panic) occurs + let res = engine.decode(prefix); + assert!(res.is_ok()); + } + } + } + + #[test] + fn decode_slice_output_length_errors() { + for num_quads in 1..100 { + let input = "AAAA".repeat(num_quads); + let mut vec = vec![0; (num_quads - 1) * 3]; + assert_eq!( + DecodeSliceError::OutputSliceTooSmall, + STANDARD.decode_slice(&input, &mut vec).unwrap_err() + ); + vec.push(0); + assert_eq!( + DecodeSliceError::OutputSliceTooSmall, + STANDARD.decode_slice(&input, &mut vec).unwrap_err() + ); + vec.push(0); + assert_eq!( + DecodeSliceError::OutputSliceTooSmall, + STANDARD.decode_slice(&input, &mut vec).unwrap_err() + ); + vec.push(0); + // now it works + assert_eq!( + num_quads * 3, + STANDARD.decode_slice(&input, &mut vec).unwrap() + ); + } + } + + fn do_decode_slice_doesnt_clobber_existing_prefix_or_suffix< + F: Fn(&GeneralPurpose, &[u8], &mut [u8]) -> usize, + >( + call_decode: F, + ) { let mut orig_data = Vec::new(); let mut encoded_data = String::new(); let mut decode_buf = Vec::new(); @@ -671,9 +316,9 @@ mod tests { orig_data.push(rng.gen()); } - let config = random_config(&mut rng); - encode_config_buf(&orig_data, config, &mut encoded_data); - assert_encode_sanity(&encoded_data, config, input_len); + let engine = random_engine(&mut rng); + engine.encode_string(&orig_data, &mut encoded_data); + assert_encode_sanity(&encoded_data, engine.config().encode_padding(), input_len); // fill the buffer with random garbage, long enough to have some room before and after for _ in 0..5000 { @@ -687,7 +332,7 @@ mod tests { // decode into the non-empty buf let decode_bytes_written = - decode_config_slice(&encoded_data, config, &mut decode_buf[offset..]).unwrap(); + call_decode(&engine, encoded_data.as_bytes(), &mut decode_buf[offset..]); assert_eq!(orig_data.len(), decode_bytes_written); assert_eq!( @@ -701,173 +346,4 @@ mod tests { ); } } - - #[test] - fn decode_into_slice_fits_in_precisely_sized_slice() { - let mut orig_data = Vec::new(); - let mut encoded_data = String::new(); - let mut decode_buf = Vec::new(); - - let input_len_range = Uniform::new(0, 1000); - - let mut rng = rand::rngs::SmallRng::from_entropy(); - - for _ in 0..10_000 { - orig_data.clear(); - encoded_data.clear(); - decode_buf.clear(); - - let input_len = input_len_range.sample(&mut rng); - - for _ in 0..input_len { - orig_data.push(rng.gen()); - } - - let config = random_config(&mut rng); - encode_config_buf(&orig_data, config, &mut encoded_data); - assert_encode_sanity(&encoded_data, config, input_len); - - decode_buf.resize(input_len, 0); - - // decode into the non-empty buf - let decode_bytes_written = - decode_config_slice(&encoded_data, config, &mut decode_buf[..]).unwrap(); - - assert_eq!(orig_data.len(), decode_bytes_written); - assert_eq!(orig_data, decode_buf); - } - } - - #[test] - fn detect_invalid_last_symbol_two_bytes() { - let decode = - |input, forgiving| decode_config(input, STANDARD.decode_allow_trailing_bits(forgiving)); - - // example from https://github.com/marshallpierce/rust-base64/issues/75 - assert!(decode("iYU=", false).is_ok()); - // trailing 01 - assert_eq!( - Err(DecodeError::InvalidLastSymbol(2, b'V')), - decode("iYV=", false) - ); - assert_eq!(Ok(vec![137, 133]), decode("iYV=", true)); - // trailing 10 - assert_eq!( - Err(DecodeError::InvalidLastSymbol(2, b'W')), - decode("iYW=", false) - ); - assert_eq!(Ok(vec![137, 133]), decode("iYV=", true)); - // trailing 11 - assert_eq!( - Err(DecodeError::InvalidLastSymbol(2, b'X')), - decode("iYX=", false) - ); - assert_eq!(Ok(vec![137, 133]), decode("iYV=", true)); - - // also works when there are 2 quads in the last block - assert_eq!( - Err(DecodeError::InvalidLastSymbol(6, b'X')), - decode("AAAAiYX=", false) - ); - assert_eq!(Ok(vec![0, 0, 0, 137, 133]), decode("AAAAiYX=", true)); - } - - #[test] - fn detect_invalid_last_symbol_one_byte() { - // 0xFF -> "/w==", so all letters > w, 0-9, and '+', '/' should get InvalidLastSymbol - - assert!(decode("/w==").is_ok()); - // trailing 01 - assert_eq!(Err(DecodeError::InvalidLastSymbol(1, b'x')), decode("/x==")); - assert_eq!(Err(DecodeError::InvalidLastSymbol(1, b'z')), decode("/z==")); - assert_eq!(Err(DecodeError::InvalidLastSymbol(1, b'0')), decode("/0==")); - assert_eq!(Err(DecodeError::InvalidLastSymbol(1, b'9')), decode("/9==")); - assert_eq!(Err(DecodeError::InvalidLastSymbol(1, b'+')), decode("/+==")); - assert_eq!(Err(DecodeError::InvalidLastSymbol(1, b'/')), decode("//==")); - - // also works when there are 2 quads in the last block - assert_eq!( - Err(DecodeError::InvalidLastSymbol(5, b'x')), - decode("AAAA/x==") - ); - } - - #[test] - fn detect_invalid_last_symbol_every_possible_three_symbols() { - let mut base64_to_bytes = ::std::collections::HashMap::new(); - - let mut bytes = [0_u8; 2]; - for b1 in 0_u16..256 { - bytes[0] = b1 as u8; - for b2 in 0_u16..256 { - bytes[1] = b2 as u8; - let mut b64 = vec![0_u8; 4]; - assert_eq!(4, encode_config_slice(&bytes, STANDARD, &mut b64[..])); - let mut v = ::std::vec::Vec::with_capacity(2); - v.extend_from_slice(&bytes[..]); - - assert!(base64_to_bytes.insert(b64, v).is_none()); - } - } - - // every possible combination of symbols must either decode to 2 bytes or get InvalidLastSymbol - - let mut symbols = [0_u8; 4]; - for &s1 in STANDARD.char_set.encode_table().iter() { - symbols[0] = s1; - for &s2 in STANDARD.char_set.encode_table().iter() { - symbols[1] = s2; - for &s3 in STANDARD.char_set.encode_table().iter() { - symbols[2] = s3; - symbols[3] = PAD_BYTE; - - match base64_to_bytes.get(&symbols[..]) { - Some(bytes) => { - assert_eq!(Ok(bytes.to_vec()), decode_config(&symbols, STANDARD)) - } - None => assert_eq!( - Err(DecodeError::InvalidLastSymbol(2, s3)), - decode_config(&symbols[..], STANDARD) - ), - } - } - } - } - } - - #[test] - fn detect_invalid_last_symbol_every_possible_two_symbols() { - let mut base64_to_bytes = ::std::collections::HashMap::new(); - - for b in 0_u16..256 { - let mut b64 = vec![0_u8; 4]; - assert_eq!(4, encode_config_slice(&[b as u8], STANDARD, &mut b64[..])); - let mut v = ::std::vec::Vec::with_capacity(1); - v.push(b as u8); - - assert!(base64_to_bytes.insert(b64, v).is_none()); - } - - // every possible combination of symbols must either decode to 1 byte or get InvalidLastSymbol - - let mut symbols = [0_u8; 4]; - for &s1 in STANDARD.char_set.encode_table().iter() { - symbols[0] = s1; - for &s2 in STANDARD.char_set.encode_table().iter() { - symbols[1] = s2; - symbols[2] = PAD_BYTE; - symbols[3] = PAD_BYTE; - - match base64_to_bytes.get(&symbols[..]) { - Some(bytes) => { - assert_eq!(Ok(bytes.to_vec()), decode_config(&symbols, STANDARD)) - } - None => assert_eq!( - Err(DecodeError::InvalidLastSymbol(1, s2)), - decode_config(&symbols[..], STANDARD) - ), - } - } - } - } } diff --git a/src/display.rs b/src/display.rs index cc70aac..fc292f1 100644 --- a/src/display.rs +++ b/src/display.rs @@ -1,36 +1,36 @@ //! Enables base64'd output anywhere you might use a `Display` implementation, like a format string. //! //! ``` -//! use base64::display::Base64Display; +//! use base64::{display::Base64Display, engine::general_purpose::STANDARD}; //! //! let data = vec![0x0, 0x1, 0x2, 0x3]; -//! let wrapper = Base64Display::with_config(&data, base64::STANDARD); +//! let wrapper = Base64Display::new(&data, &STANDARD); //! //! assert_eq!("base64: AAECAw==", format!("base64: {}", wrapper)); //! ``` use super::chunked_encoder::ChunkedEncoder; -use super::Config; +use crate::engine::Engine; use core::fmt::{Display, Formatter}; use core::{fmt, str}; /// A convenience wrapper for base64'ing bytes into a format string without heap allocation. -pub struct Base64Display<'a> { +pub struct Base64Display<'a, 'e, E: Engine> { bytes: &'a [u8], - chunked_encoder: ChunkedEncoder, + chunked_encoder: ChunkedEncoder<'e, E>, } -impl<'a> Base64Display<'a> { - /// Create a `Base64Display` with the provided config. - pub fn with_config(bytes: &[u8], config: Config) -> Base64Display { +impl<'a, 'e, E: Engine> Base64Display<'a, 'e, E> { + /// Create a `Base64Display` with the provided engine. + pub fn new(bytes: &'a [u8], engine: &'e E) -> Base64Display<'a, 'e, E> { Base64Display { bytes, - chunked_encoder: ChunkedEncoder::new(config), + chunked_encoder: ChunkedEncoder::new(engine), } } } -impl<'a> Display for Base64Display<'a> { +impl<'a, 'e, E: Engine> Display for Base64Display<'a, 'e, E> { fn fmt(&self, formatter: &mut Formatter) -> Result<(), fmt::Error> { let mut sink = FormatterSink { f: formatter }; self.chunked_encoder.encode(self.bytes, &mut sink) @@ -57,18 +57,18 @@ mod tests { use super::super::chunked_encoder::tests::{ chunked_encode_matches_normal_encode_random, SinkTestHelper, }; - use super::super::*; use super::*; + use crate::engine::general_purpose::STANDARD; #[test] fn basic_display() { assert_eq!( "~$Zm9vYmFy#*", - format!("~${}#*", Base64Display::with_config(b"foobar", STANDARD)) + format!("~${}#*", Base64Display::new(b"foobar", &STANDARD)) ); assert_eq!( "~$Zm9vYmFyZg==#*", - format!("~${}#*", Base64Display::with_config(b"foobarf", STANDARD)) + format!("~${}#*", Base64Display::new(b"foobarf", &STANDARD)) ); } @@ -81,8 +81,8 @@ mod tests { struct DisplaySinkTestHelper; impl SinkTestHelper for DisplaySinkTestHelper { - fn encode_to_string(&self, config: Config, bytes: &[u8]) -> String { - format!("{}", Base64Display::with_config(bytes, config)) + fn encode_to_string<E: Engine>(&self, engine: &E, bytes: &[u8]) -> String { + format!("{}", Base64Display::new(bytes, engine)) } } } diff --git a/src/encode.rs b/src/encode.rs index b32bbff..cb17650 100644 --- a/src/encode.rs +++ b/src/encode.rs @@ -1,130 +1,59 @@ -use crate::{Config, PAD_BYTE}; #[cfg(any(feature = "alloc", feature = "std", test))] -use crate::{chunked_encoder, STANDARD}; +use alloc::string::String; +use core::fmt; +#[cfg(any(feature = "std", test))] +use std::error; + #[cfg(any(feature = "alloc", feature = "std", test))] -use alloc::{string::String, vec}; -use core::convert::TryInto; +use crate::engine::general_purpose::STANDARD; +use crate::engine::{Config, Engine}; +use crate::PAD_BYTE; -///Encode arbitrary octets as base64. -///Returns a String. -///Convenience for `encode_config(input, base64::STANDARD);`. -/// -///# Example +/// Encode arbitrary octets as base64 using the [`STANDARD` engine](STANDARD). /// -///```rust -///extern crate base64; -/// -///fn main() { -/// let b64 = base64::encode(b"hello world"); -/// println!("{}", b64); -///} -///``` +/// See [Engine::encode]. +#[allow(unused)] +#[deprecated(since = "0.21.0", note = "Use Engine::encode")] #[cfg(any(feature = "alloc", feature = "std", test))] pub fn encode<T: AsRef<[u8]>>(input: T) -> String { - encode_config(input, STANDARD) + STANDARD.encode(input) } -///Encode arbitrary octets as base64. -///Returns a String. -/// -///# Example +///Encode arbitrary octets as base64 using the provided `Engine` into a new `String`. /// -///```rust -///extern crate base64; -/// -///fn main() { -/// let b64 = base64::encode_config(b"hello world~", base64::STANDARD); -/// println!("{}", b64); -/// -/// let b64_url = base64::encode_config(b"hello internet~", base64::URL_SAFE); -/// println!("{}", b64_url); -///} -///``` +/// See [Engine::encode]. +#[allow(unused)] +#[deprecated(since = "0.21.0", note = "Use Engine::encode")] #[cfg(any(feature = "alloc", feature = "std", test))] -pub fn encode_config<T: AsRef<[u8]>>(input: T, config: Config) -> String { - let mut buf = match encoded_size(input.as_ref().len(), config) { - Some(n) => vec![0; n], - None => panic!("integer overflow when calculating buffer size"), - }; - - encode_with_padding(input.as_ref(), config, buf.len(), &mut buf[..]); - - String::from_utf8(buf).expect("Invalid UTF8") +pub fn encode_engine<E: Engine, T: AsRef<[u8]>>(input: T, engine: &E) -> String { + engine.encode(input) } -///Encode arbitrary octets as base64. -///Writes into the supplied output buffer, which will grow the buffer if needed. -/// -///# Example +///Encode arbitrary octets as base64 into a supplied `String`. /// -///```rust -///extern crate base64; -/// -///fn main() { -/// let mut buf = String::new(); -/// base64::encode_config_buf(b"hello world~", base64::STANDARD, &mut buf); -/// println!("{}", buf); -/// -/// buf.clear(); -/// base64::encode_config_buf(b"hello internet~", base64::URL_SAFE, &mut buf); -/// println!("{}", buf); -///} -///``` +/// See [Engine::encode_string]. +#[allow(unused)] +#[deprecated(since = "0.21.0", note = "Use Engine::encode_string")] #[cfg(any(feature = "alloc", feature = "std", test))] -pub fn encode_config_buf<T: AsRef<[u8]>>(input: T, config: Config, buf: &mut String) { - let input_bytes = input.as_ref(); - - { - let mut sink = chunked_encoder::StringSink::new(buf); - let encoder = chunked_encoder::ChunkedEncoder::new(config); - - encoder - .encode(input_bytes, &mut sink) - .expect("Writing to a String shouldn't fail") - } +pub fn encode_engine_string<E: Engine, T: AsRef<[u8]>>( + input: T, + output_buf: &mut String, + engine: &E, +) { + engine.encode_string(input, output_buf) } -/// Encode arbitrary octets as base64. -/// Writes into the supplied output buffer. -/// -/// This is useful if you wish to avoid allocation entirely (e.g. encoding into a stack-resident -/// or statically-allocated buffer). -/// -/// # Panics -/// -/// If `output` is too small to hold the encoded version of `input`, a panic will result. -/// -/// # Example -/// -/// ```rust -/// extern crate base64; +/// Encode arbitrary octets as base64 into a supplied slice. /// -/// fn main() { -/// let s = b"hello internet!"; -/// let mut buf = Vec::new(); -/// // make sure we'll have a slice big enough for base64 + padding -/// buf.resize(s.len() * 4 / 3 + 4, 0); -/// -/// let bytes_written = base64::encode_config_slice(s, -/// base64::STANDARD, &mut buf); -/// -/// // shorten our vec down to just what was written -/// buf.resize(bytes_written, 0); -/// -/// assert_eq!(s, base64::decode(&buf).unwrap().as_slice()); -/// } -/// ``` -pub fn encode_config_slice<T: AsRef<[u8]>>(input: T, config: Config, output: &mut [u8]) -> usize { - let input_bytes = input.as_ref(); - - let encoded_size = encoded_size(input_bytes.len(), config) - .expect("usize overflow when calculating buffer size"); - - let mut b64_output = &mut output[0..encoded_size]; - - encode_with_padding(&input_bytes, config, encoded_size, &mut b64_output); - - encoded_size +/// See [Engine::encode_slice]. +#[allow(unused)] +#[deprecated(since = "0.21.0", note = "Use Engine::encode_slice")] +pub fn encode_engine_slice<E: Engine, T: AsRef<[u8]>>( + input: T, + output_buf: &mut [u8], + engine: &E, +) -> Result<usize, EncodeSliceError> { + engine.encode_slice(input, output_buf) } /// B64-encode and pad (if configured). @@ -137,12 +66,17 @@ pub fn encode_config_slice<T: AsRef<[u8]>>(input: T, config: Config, output: &mu /// `output` must be of size `encoded_size`. /// /// All bytes in `output` will be written to since it is exactly the size of the output. -fn encode_with_padding(input: &[u8], config: Config, encoded_size: usize, output: &mut [u8]) { - debug_assert_eq!(encoded_size, output.len()); +pub(crate) fn encode_with_padding<E: Engine + ?Sized>( + input: &[u8], + output: &mut [u8], + engine: &E, + expected_encoded_size: usize, +) { + debug_assert_eq!(expected_encoded_size, output.len()); - let b64_bytes_written = encode_to_slice(input, output, config.char_set.encode_table()); + let b64_bytes_written = engine.internal_encode(input, output); - let padding_bytes = if config.pad { + let padding_bytes = if engine.config().encode_padding() { add_padding(input.len(), &mut output[b64_bytes_written..]) } else { 0 @@ -152,144 +86,22 @@ fn encode_with_padding(input: &[u8], config: Config, encoded_size: usize, output .checked_add(padding_bytes) .expect("usize overflow when calculating b64 length"); - debug_assert_eq!(encoded_size, encoded_bytes); -} - -#[inline] -fn read_u64(s: &[u8]) -> u64 { - u64::from_be_bytes(s[..8].try_into().unwrap()) + debug_assert_eq!(expected_encoded_size, encoded_bytes); } -/// Encode input bytes to utf8 base64 bytes. Does not pad. -/// `output` must be long enough to hold the encoded `input` without padding. -/// Returns the number of bytes written. -#[inline] -pub fn encode_to_slice(input: &[u8], output: &mut [u8], encode_table: &[u8; 64]) -> usize { - let mut input_index: usize = 0; - - const BLOCKS_PER_FAST_LOOP: usize = 4; - const LOW_SIX_BITS: u64 = 0x3F; - - // we read 8 bytes at a time (u64) but only actually consume 6 of those bytes. Thus, we need - // 2 trailing bytes to be available to read.. - let last_fast_index = input.len().saturating_sub(BLOCKS_PER_FAST_LOOP * 6 + 2); - let mut output_index = 0; - - if last_fast_index > 0 { - while input_index <= last_fast_index { - // Major performance wins from letting the optimizer do the bounds check once, mostly - // on the output side - let input_chunk = &input[input_index..(input_index + (BLOCKS_PER_FAST_LOOP * 6 + 2))]; - let output_chunk = &mut output[output_index..(output_index + BLOCKS_PER_FAST_LOOP * 8)]; - - // Hand-unrolling for 32 vs 16 or 8 bytes produces yields performance about equivalent - // to unsafe pointer code on a Xeon E5-1650v3. 64 byte unrolling was slightly better for - // large inputs but significantly worse for 50-byte input, unsurprisingly. I suspect - // that it's a not uncommon use case to encode smallish chunks of data (e.g. a 64-byte - // SHA-512 digest), so it would be nice if that fit in the unrolled loop at least once. - // Plus, single-digit percentage performance differences might well be quite different - // on different hardware. - - let input_u64 = read_u64(&input_chunk[0..]); - - output_chunk[0] = encode_table[((input_u64 >> 58) & LOW_SIX_BITS) as usize]; - output_chunk[1] = encode_table[((input_u64 >> 52) & LOW_SIX_BITS) as usize]; - output_chunk[2] = encode_table[((input_u64 >> 46) & LOW_SIX_BITS) as usize]; - output_chunk[3] = encode_table[((input_u64 >> 40) & LOW_SIX_BITS) as usize]; - output_chunk[4] = encode_table[((input_u64 >> 34) & LOW_SIX_BITS) as usize]; - output_chunk[5] = encode_table[((input_u64 >> 28) & LOW_SIX_BITS) as usize]; - output_chunk[6] = encode_table[((input_u64 >> 22) & LOW_SIX_BITS) as usize]; - output_chunk[7] = encode_table[((input_u64 >> 16) & LOW_SIX_BITS) as usize]; - - let input_u64 = read_u64(&input_chunk[6..]); - - output_chunk[8] = encode_table[((input_u64 >> 58) & LOW_SIX_BITS) as usize]; - output_chunk[9] = encode_table[((input_u64 >> 52) & LOW_SIX_BITS) as usize]; - output_chunk[10] = encode_table[((input_u64 >> 46) & LOW_SIX_BITS) as usize]; - output_chunk[11] = encode_table[((input_u64 >> 40) & LOW_SIX_BITS) as usize]; - output_chunk[12] = encode_table[((input_u64 >> 34) & LOW_SIX_BITS) as usize]; - output_chunk[13] = encode_table[((input_u64 >> 28) & LOW_SIX_BITS) as usize]; - output_chunk[14] = encode_table[((input_u64 >> 22) & LOW_SIX_BITS) as usize]; - output_chunk[15] = encode_table[((input_u64 >> 16) & LOW_SIX_BITS) as usize]; - - let input_u64 = read_u64(&input_chunk[12..]); - - output_chunk[16] = encode_table[((input_u64 >> 58) & LOW_SIX_BITS) as usize]; - output_chunk[17] = encode_table[((input_u64 >> 52) & LOW_SIX_BITS) as usize]; - output_chunk[18] = encode_table[((input_u64 >> 46) & LOW_SIX_BITS) as usize]; - output_chunk[19] = encode_table[((input_u64 >> 40) & LOW_SIX_BITS) as usize]; - output_chunk[20] = encode_table[((input_u64 >> 34) & LOW_SIX_BITS) as usize]; - output_chunk[21] = encode_table[((input_u64 >> 28) & LOW_SIX_BITS) as usize]; - output_chunk[22] = encode_table[((input_u64 >> 22) & LOW_SIX_BITS) as usize]; - output_chunk[23] = encode_table[((input_u64 >> 16) & LOW_SIX_BITS) as usize]; - - let input_u64 = read_u64(&input_chunk[18..]); - - output_chunk[24] = encode_table[((input_u64 >> 58) & LOW_SIX_BITS) as usize]; - output_chunk[25] = encode_table[((input_u64 >> 52) & LOW_SIX_BITS) as usize]; - output_chunk[26] = encode_table[((input_u64 >> 46) & LOW_SIX_BITS) as usize]; - output_chunk[27] = encode_table[((input_u64 >> 40) & LOW_SIX_BITS) as usize]; - output_chunk[28] = encode_table[((input_u64 >> 34) & LOW_SIX_BITS) as usize]; - output_chunk[29] = encode_table[((input_u64 >> 28) & LOW_SIX_BITS) as usize]; - output_chunk[30] = encode_table[((input_u64 >> 22) & LOW_SIX_BITS) as usize]; - output_chunk[31] = encode_table[((input_u64 >> 16) & LOW_SIX_BITS) as usize]; - - output_index += BLOCKS_PER_FAST_LOOP * 8; - input_index += BLOCKS_PER_FAST_LOOP * 6; - } - } - - // Encode what's left after the fast loop. - - const LOW_SIX_BITS_U8: u8 = 0x3F; - - let rem = input.len() % 3; - let start_of_rem = input.len() - rem; - - // start at the first index not handled by fast loop, which may be 0. - - while input_index < start_of_rem { - let input_chunk = &input[input_index..(input_index + 3)]; - let output_chunk = &mut output[output_index..(output_index + 4)]; - - output_chunk[0] = encode_table[(input_chunk[0] >> 2) as usize]; - output_chunk[1] = - encode_table[((input_chunk[0] << 4 | input_chunk[1] >> 4) & LOW_SIX_BITS_U8) as usize]; - output_chunk[2] = - encode_table[((input_chunk[1] << 2 | input_chunk[2] >> 6) & LOW_SIX_BITS_U8) as usize]; - output_chunk[3] = encode_table[(input_chunk[2] & LOW_SIX_BITS_U8) as usize]; - - input_index += 3; - output_index += 4; - } - - if rem == 2 { - output[output_index] = encode_table[(input[start_of_rem] >> 2) as usize]; - output[output_index + 1] = encode_table[((input[start_of_rem] << 4 - | input[start_of_rem + 1] >> 4) - & LOW_SIX_BITS_U8) as usize]; - output[output_index + 2] = - encode_table[((input[start_of_rem + 1] << 2) & LOW_SIX_BITS_U8) as usize]; - output_index += 3; - } else if rem == 1 { - output[output_index] = encode_table[(input[start_of_rem] >> 2) as usize]; - output[output_index + 1] = - encode_table[((input[start_of_rem] << 4) & LOW_SIX_BITS_U8) as usize]; - output_index += 2; - } - - output_index -} - -/// calculate the base64 encoded string size, including padding if appropriate -pub fn encoded_size(bytes_len: usize, config: Config) -> Option<usize> { +/// Calculate the base64 encoded length for a given input length, optionally including any +/// appropriate padding bytes. +/// +/// Returns `None` if the encoded length can't be represented in `usize`. This will happen for +/// input lengths in approximately the top quarter of the range of `usize`. +pub fn encoded_len(bytes_len: usize, padding: bool) -> Option<usize> { let rem = bytes_len % 3; let complete_input_chunks = bytes_len / 3; let complete_chunk_output = complete_input_chunks.checked_mul(4); if rem > 0 { - if config.pad { + if padding { complete_chunk_output.and_then(|c| c.checked_add(4)) } else { let encoded_rem = match rem { @@ -305,10 +117,12 @@ pub fn encoded_size(bytes_len: usize, config: Config) -> Option<usize> { } /// Write padding characters. +/// `input_len` is the size of the original, not encoded, input. /// `output` is the slice where padding should be written, of length at least 2. /// /// Returns the number of padding bytes written. -pub fn add_padding(input_len: usize, output: &mut [u8]) -> usize { +pub(crate) fn add_padding(input_len: usize, output: &mut [u8]) -> usize { + // TODO base on encoded len to use cheaper mod by 4 (aka & 7) let rem = input_len % 3; let mut bytes_written = 0; for _ in 0..((3 - rem) % 3) { @@ -319,79 +133,102 @@ pub fn add_padding(input_len: usize, output: &mut [u8]) -> usize { bytes_written } +/// Errors that can occur while encoding into a slice. +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum EncodeSliceError { + /// The provided slice is too small. + OutputSliceTooSmall, +} + +impl fmt::Display for EncodeSliceError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::OutputSliceTooSmall => write!(f, "Output slice too small"), + } + } +} + +#[cfg(any(feature = "std", test))] +impl error::Error for EncodeSliceError { + fn cause(&self) -> Option<&dyn error::Error> { + None + } +} + #[cfg(test)] mod tests { use super::*; + use crate::{ - decode::decode_config_buf, - tests::{assert_encode_sanity, random_config}, - Config, STANDARD, URL_SAFE_NO_PAD, + alphabet, + engine::general_purpose::{GeneralPurpose, NO_PAD, STANDARD}, + tests::{assert_encode_sanity, random_config, random_engine}, }; - use rand::{ distributions::{Distribution, Uniform}, - FromEntropy, Rng, + Rng, SeedableRng, }; - use std; use std::str; + const URL_SAFE_NO_PAD_ENGINE: GeneralPurpose = GeneralPurpose::new(&alphabet::URL_SAFE, NO_PAD); + #[test] fn encoded_size_correct_standard() { - assert_encoded_length(0, 0, STANDARD); + assert_encoded_length(0, 0, &STANDARD, true); - assert_encoded_length(1, 4, STANDARD); - assert_encoded_length(2, 4, STANDARD); - assert_encoded_length(3, 4, STANDARD); + assert_encoded_length(1, 4, &STANDARD, true); + assert_encoded_length(2, 4, &STANDARD, true); + assert_encoded_length(3, 4, &STANDARD, true); - assert_encoded_length(4, 8, STANDARD); - assert_encoded_length(5, 8, STANDARD); - assert_encoded_length(6, 8, STANDARD); + assert_encoded_length(4, 8, &STANDARD, true); + assert_encoded_length(5, 8, &STANDARD, true); + assert_encoded_length(6, 8, &STANDARD, true); - assert_encoded_length(7, 12, STANDARD); - assert_encoded_length(8, 12, STANDARD); - assert_encoded_length(9, 12, STANDARD); + assert_encoded_length(7, 12, &STANDARD, true); + assert_encoded_length(8, 12, &STANDARD, true); + assert_encoded_length(9, 12, &STANDARD, true); - assert_encoded_length(54, 72, STANDARD); + assert_encoded_length(54, 72, &STANDARD, true); - assert_encoded_length(55, 76, STANDARD); - assert_encoded_length(56, 76, STANDARD); - assert_encoded_length(57, 76, STANDARD); + assert_encoded_length(55, 76, &STANDARD, true); + assert_encoded_length(56, 76, &STANDARD, true); + assert_encoded_length(57, 76, &STANDARD, true); - assert_encoded_length(58, 80, STANDARD); + assert_encoded_length(58, 80, &STANDARD, true); } #[test] fn encoded_size_correct_no_pad() { - assert_encoded_length(0, 0, URL_SAFE_NO_PAD); + assert_encoded_length(0, 0, &URL_SAFE_NO_PAD_ENGINE, false); - assert_encoded_length(1, 2, URL_SAFE_NO_PAD); - assert_encoded_length(2, 3, URL_SAFE_NO_PAD); - assert_encoded_length(3, 4, URL_SAFE_NO_PAD); + assert_encoded_length(1, 2, &URL_SAFE_NO_PAD_ENGINE, false); + assert_encoded_length(2, 3, &URL_SAFE_NO_PAD_ENGINE, false); + assert_encoded_length(3, 4, &URL_SAFE_NO_PAD_ENGINE, false); - assert_encoded_length(4, 6, URL_SAFE_NO_PAD); - assert_encoded_length(5, 7, URL_SAFE_NO_PAD); - assert_encoded_length(6, 8, URL_SAFE_NO_PAD); + assert_encoded_length(4, 6, &URL_SAFE_NO_PAD_ENGINE, false); + assert_encoded_length(5, 7, &URL_SAFE_NO_PAD_ENGINE, false); + assert_encoded_length(6, 8, &URL_SAFE_NO_PAD_ENGINE, false); - assert_encoded_length(7, 10, URL_SAFE_NO_PAD); - assert_encoded_length(8, 11, URL_SAFE_NO_PAD); - assert_encoded_length(9, 12, URL_SAFE_NO_PAD); + assert_encoded_length(7, 10, &URL_SAFE_NO_PAD_ENGINE, false); + assert_encoded_length(8, 11, &URL_SAFE_NO_PAD_ENGINE, false); + assert_encoded_length(9, 12, &URL_SAFE_NO_PAD_ENGINE, false); - assert_encoded_length(54, 72, URL_SAFE_NO_PAD); + assert_encoded_length(54, 72, &URL_SAFE_NO_PAD_ENGINE, false); - assert_encoded_length(55, 74, URL_SAFE_NO_PAD); - assert_encoded_length(56, 75, URL_SAFE_NO_PAD); - assert_encoded_length(57, 76, URL_SAFE_NO_PAD); + assert_encoded_length(55, 74, &URL_SAFE_NO_PAD_ENGINE, false); + assert_encoded_length(56, 75, &URL_SAFE_NO_PAD_ENGINE, false); + assert_encoded_length(57, 76, &URL_SAFE_NO_PAD_ENGINE, false); - assert_encoded_length(58, 78, URL_SAFE_NO_PAD); + assert_encoded_length(58, 78, &URL_SAFE_NO_PAD_ENGINE, false); } #[test] fn encoded_size_overflow() { - assert_eq!(None, encoded_size(std::usize::MAX, STANDARD)); + assert_eq!(None, encoded_len(usize::MAX, true)); } #[test] - fn encode_config_buf_into_nonempty_buffer_doesnt_clobber_prefix() { + fn encode_engine_string_into_nonempty_buffer_doesnt_clobber_prefix() { let mut orig_data = Vec::new(); let mut prefix = String::new(); let mut encoded_data_no_prefix = String::new(); @@ -424,29 +261,39 @@ mod tests { } encoded_data_with_prefix.push_str(&prefix); - let config = random_config(&mut rng); - encode_config_buf(&orig_data, config, &mut encoded_data_no_prefix); - encode_config_buf(&orig_data, config, &mut encoded_data_with_prefix); + let engine = random_engine(&mut rng); + engine.encode_string(&orig_data, &mut encoded_data_no_prefix); + engine.encode_string(&orig_data, &mut encoded_data_with_prefix); assert_eq!( encoded_data_no_prefix.len() + prefix_len, encoded_data_with_prefix.len() ); - assert_encode_sanity(&encoded_data_no_prefix, config, input_len); - assert_encode_sanity(&encoded_data_with_prefix[prefix_len..], config, input_len); + assert_encode_sanity( + &encoded_data_no_prefix, + engine.config().encode_padding(), + input_len, + ); + assert_encode_sanity( + &encoded_data_with_prefix[prefix_len..], + engine.config().encode_padding(), + input_len, + ); // append plain encode onto prefix - prefix.push_str(&mut encoded_data_no_prefix); + prefix.push_str(&encoded_data_no_prefix); assert_eq!(prefix, encoded_data_with_prefix); - decode_config_buf(&encoded_data_no_prefix, config, &mut decoded).unwrap(); + engine + .decode_vec(&encoded_data_no_prefix, &mut decoded) + .unwrap(); assert_eq!(orig_data, decoded); } } #[test] - fn encode_config_slice_into_nonempty_buffer_doesnt_clobber_suffix() { + fn encode_engine_slice_into_nonempty_buffer_doesnt_clobber_suffix() { let mut orig_data = Vec::new(); let mut encoded_data = Vec::new(); let mut encoded_data_original_state = Vec::new(); @@ -475,18 +322,18 @@ mod tests { encoded_data_original_state.extend_from_slice(&encoded_data); - let config = random_config(&mut rng); + let engine = random_engine(&mut rng); - let encoded_size = encoded_size(input_len, config).unwrap(); + let encoded_size = encoded_len(input_len, engine.config().encode_padding()).unwrap(); assert_eq!( encoded_size, - encode_config_slice(&orig_data, config, &mut encoded_data) + engine.encode_slice(&orig_data, &mut encoded_data).unwrap() ); assert_encode_sanity( - std::str::from_utf8(&encoded_data[0..encoded_size]).unwrap(), - config, + str::from_utf8(&encoded_data[0..encoded_size]).unwrap(), + engine.config().encode_padding(), input_len, ); @@ -495,50 +342,9 @@ mod tests { &encoded_data_original_state[encoded_size..] ); - decode_config_buf(&encoded_data[0..encoded_size], config, &mut decoded).unwrap(); - assert_eq!(orig_data, decoded); - } - } - - #[test] - fn encode_config_slice_fits_into_precisely_sized_slice() { - let mut orig_data = Vec::new(); - let mut encoded_data = Vec::new(); - let mut decoded = Vec::new(); - - let input_len_range = Uniform::new(0, 1000); - - let mut rng = rand::rngs::SmallRng::from_entropy(); - - for _ in 0..10_000 { - orig_data.clear(); - encoded_data.clear(); - decoded.clear(); - - let input_len = input_len_range.sample(&mut rng); - - for _ in 0..input_len { - orig_data.push(rng.gen()); - } - - let config = random_config(&mut rng); - - let encoded_size = encoded_size(input_len, config).unwrap(); - - encoded_data.resize(encoded_size, 0); - - assert_eq!( - encoded_size, - encode_config_slice(&orig_data, config, &mut encoded_data) - ); - - assert_encode_sanity( - std::str::from_utf8(&encoded_data[0..encoded_size]).unwrap(), - config, - input_len, - ); - - decode_config_buf(&encoded_data[0..encoded_size], config, &mut decoded).unwrap(); + engine + .decode_vec(&encoded_data[0..encoded_size], &mut decoded) + .unwrap(); assert_eq!(orig_data, decoded); } } @@ -563,17 +369,17 @@ mod tests { } let config = random_config(&mut rng); + let engine = random_engine(&mut rng); // fill up the output buffer with garbage - let encoded_size = encoded_size(input_len, config).unwrap(); + let encoded_size = encoded_len(input_len, config.encode_padding()).unwrap(); for _ in 0..encoded_size { output.push(rng.gen()); } - let orig_output_buf = output.to_vec(); + let orig_output_buf = output.clone(); - let bytes_written = - encode_to_slice(&input, &mut output, config.char_set.encode_table()); + let bytes_written = engine.internal_encode(&input, &mut output); // make sure the part beyond bytes_written is the same garbage it was before assert_eq!(orig_output_buf[bytes_written..], output[bytes_written..]); @@ -602,17 +408,17 @@ mod tests { input.push(rng.gen()); } - let config = random_config(&mut rng); + let engine = random_engine(&mut rng); // fill up the output buffer with garbage - let encoded_size = encoded_size(input_len, config).unwrap(); + let encoded_size = encoded_len(input_len, engine.config().encode_padding()).unwrap(); for _ in 0..encoded_size + 1000 { output.push(rng.gen()); } - let orig_output_buf = output.to_vec(); + let orig_output_buf = output.clone(); - encode_with_padding(&input, config, encoded_size, &mut output[0..encoded_size]); + encode_with_padding(&input, &mut output[0..encoded_size], &engine, encoded_size); // make sure the part beyond b64 is the same garbage it was before assert_eq!(orig_output_buf[encoded_size..], output[encoded_size..]); @@ -637,7 +443,7 @@ mod tests { output.push(rng.gen()); } - let orig_output_buf = output.to_vec(); + let orig_output_buf = output.clone(); let bytes_written = add_padding(input_len, &mut output); @@ -649,8 +455,13 @@ mod tests { } } - fn assert_encoded_length(input_len: usize, encoded_len: usize, config: Config) { - assert_eq!(encoded_len, encoded_size(input_len, config).unwrap()); + fn assert_encoded_length<E: Engine>( + input_len: usize, + enc_len: usize, + engine: &E, + padded: bool, + ) { + assert_eq!(enc_len, encoded_len(input_len, padded).unwrap()); let mut bytes: Vec<u8> = Vec::new(); let mut rng = rand::rngs::SmallRng::from_entropy(); @@ -659,17 +470,19 @@ mod tests { bytes.push(rng.gen()); } - let encoded = encode_config(&bytes, config); - assert_encode_sanity(&encoded, config, input_len); + let encoded = engine.encode(&bytes); + assert_encode_sanity(&encoded, padded, input_len); - assert_eq!(encoded_len, encoded.len()); + assert_eq!(enc_len, encoded.len()); } #[test] fn encode_imap() { assert_eq!( - encode_config(b"\xFB\xFF", crate::IMAP_MUTF7), - encode_config(b"\xFB\xFF", crate::STANDARD_NO_PAD).replace("/", ",") + &GeneralPurpose::new(&alphabet::IMAP_MUTF7, NO_PAD).encode(b"\xFB\xFF"), + &GeneralPurpose::new(&alphabet::STANDARD, NO_PAD) + .encode(b"\xFB\xFF") + .replace('/', ",") ); } } diff --git a/src/engine/general_purpose/decode.rs b/src/engine/general_purpose/decode.rs new file mode 100644 index 0000000..e9fd788 --- /dev/null +++ b/src/engine/general_purpose/decode.rs @@ -0,0 +1,348 @@ +use crate::{ + engine::{general_purpose::INVALID_VALUE, DecodeEstimate, DecodePaddingMode}, + DecodeError, PAD_BYTE, +}; + +// decode logic operates on chunks of 8 input bytes without padding +const INPUT_CHUNK_LEN: usize = 8; +const DECODED_CHUNK_LEN: usize = 6; + +// we read a u64 and write a u64, but a u64 of input only yields 6 bytes of output, so the last +// 2 bytes of any output u64 should not be counted as written to (but must be available in a +// slice). +const DECODED_CHUNK_SUFFIX: usize = 2; + +// how many u64's of input to handle at a time +const CHUNKS_PER_FAST_LOOP_BLOCK: usize = 4; + +const INPUT_BLOCK_LEN: usize = CHUNKS_PER_FAST_LOOP_BLOCK * INPUT_CHUNK_LEN; + +// includes the trailing 2 bytes for the final u64 write +const DECODED_BLOCK_LEN: usize = + CHUNKS_PER_FAST_LOOP_BLOCK * DECODED_CHUNK_LEN + DECODED_CHUNK_SUFFIX; + +#[doc(hidden)] +pub struct GeneralPurposeEstimate { + /// Total number of decode chunks, including a possibly partial last chunk + num_chunks: usize, + decoded_len_estimate: usize, +} + +impl GeneralPurposeEstimate { + pub(crate) fn new(encoded_len: usize) -> Self { + Self { + num_chunks: encoded_len + .checked_add(INPUT_CHUNK_LEN - 1) + .expect("Overflow when calculating number of chunks in input") + / INPUT_CHUNK_LEN, + decoded_len_estimate: encoded_len + .checked_add(3) + .expect("Overflow when calculating decoded len estimate") + / 4 + * 3, + } + } +} + +impl DecodeEstimate for GeneralPurposeEstimate { + fn decoded_len_estimate(&self) -> usize { + self.decoded_len_estimate + } +} + +/// Helper to avoid duplicating num_chunks calculation, which is costly on short inputs. +/// Returns the number of bytes written, or an error. +// We're on the fragile edge of compiler heuristics here. If this is not inlined, slow. If this is +// inlined(always), a different slow. plain ol' inline makes the benchmarks happiest at the moment, +// but this is fragile and the best setting changes with only minor code modifications. +#[inline] +pub(crate) fn decode_helper( + input: &[u8], + estimate: GeneralPurposeEstimate, + output: &mut [u8], + decode_table: &[u8; 256], + decode_allow_trailing_bits: bool, + padding_mode: DecodePaddingMode, +) -> Result<usize, DecodeError> { + let remainder_len = input.len() % INPUT_CHUNK_LEN; + + // Because the fast decode loop writes in groups of 8 bytes (unrolled to + // CHUNKS_PER_FAST_LOOP_BLOCK times 8 bytes, where possible) and outputs 8 bytes at a time (of + // which only 6 are valid data), we need to be sure that we stop using the fast decode loop + // soon enough that there will always be 2 more bytes of valid data written after that loop. + let trailing_bytes_to_skip = match remainder_len { + // if input is a multiple of the chunk size, ignore the last chunk as it may have padding, + // and the fast decode logic cannot handle padding + 0 => INPUT_CHUNK_LEN, + // 1 and 5 trailing bytes are illegal: can't decode 6 bits of input into a byte + 1 | 5 => { + // trailing whitespace is so common that it's worth it to check the last byte to + // possibly return a better error message + if let Some(b) = input.last() { + if *b != PAD_BYTE && decode_table[*b as usize] == INVALID_VALUE { + return Err(DecodeError::InvalidByte(input.len() - 1, *b)); + } + } + + return Err(DecodeError::InvalidLength); + } + // This will decode to one output byte, which isn't enough to overwrite the 2 extra bytes + // written by the fast decode loop. So, we have to ignore both these 2 bytes and the + // previous chunk. + 2 => INPUT_CHUNK_LEN + 2, + // If this is 3 un-padded chars, then it would actually decode to 2 bytes. However, if this + // is an erroneous 2 chars + 1 pad char that would decode to 1 byte, then it should fail + // with an error, not panic from going past the bounds of the output slice, so we let it + // use stage 3 + 4. + 3 => INPUT_CHUNK_LEN + 3, + // This can also decode to one output byte because it may be 2 input chars + 2 padding + // chars, which would decode to 1 byte. + 4 => INPUT_CHUNK_LEN + 4, + // Everything else is a legal decode len (given that we don't require padding), and will + // decode to at least 2 bytes of output. + _ => remainder_len, + }; + + // rounded up to include partial chunks + let mut remaining_chunks = estimate.num_chunks; + + let mut input_index = 0; + let mut output_index = 0; + + { + let length_of_fast_decode_chunks = input.len().saturating_sub(trailing_bytes_to_skip); + + // Fast loop, stage 1 + // manual unroll to CHUNKS_PER_FAST_LOOP_BLOCK of u64s to amortize slice bounds checks + if let Some(max_start_index) = length_of_fast_decode_chunks.checked_sub(INPUT_BLOCK_LEN) { + while input_index <= max_start_index { + let input_slice = &input[input_index..(input_index + INPUT_BLOCK_LEN)]; + let output_slice = &mut output[output_index..(output_index + DECODED_BLOCK_LEN)]; + + decode_chunk( + &input_slice[0..], + input_index, + decode_table, + &mut output_slice[0..], + )?; + decode_chunk( + &input_slice[8..], + input_index + 8, + decode_table, + &mut output_slice[6..], + )?; + decode_chunk( + &input_slice[16..], + input_index + 16, + decode_table, + &mut output_slice[12..], + )?; + decode_chunk( + &input_slice[24..], + input_index + 24, + decode_table, + &mut output_slice[18..], + )?; + + input_index += INPUT_BLOCK_LEN; + output_index += DECODED_BLOCK_LEN - DECODED_CHUNK_SUFFIX; + remaining_chunks -= CHUNKS_PER_FAST_LOOP_BLOCK; + } + } + + // Fast loop, stage 2 (aka still pretty fast loop) + // 8 bytes at a time for whatever we didn't do in stage 1. + if let Some(max_start_index) = length_of_fast_decode_chunks.checked_sub(INPUT_CHUNK_LEN) { + while input_index < max_start_index { + decode_chunk( + &input[input_index..(input_index + INPUT_CHUNK_LEN)], + input_index, + decode_table, + &mut output + [output_index..(output_index + DECODED_CHUNK_LEN + DECODED_CHUNK_SUFFIX)], + )?; + + output_index += DECODED_CHUNK_LEN; + input_index += INPUT_CHUNK_LEN; + remaining_chunks -= 1; + } + } + } + + // Stage 3 + // If input length was such that a chunk had to be deferred until after the fast loop + // because decoding it would have produced 2 trailing bytes that wouldn't then be + // overwritten, we decode that chunk here. This way is slower but doesn't write the 2 + // trailing bytes. + // However, we still need to avoid the last chunk (partial or complete) because it could + // have padding, so we always do 1 fewer to avoid the last chunk. + for _ in 1..remaining_chunks { + decode_chunk_precise( + &input[input_index..], + input_index, + decode_table, + &mut output[output_index..(output_index + DECODED_CHUNK_LEN)], + )?; + + input_index += INPUT_CHUNK_LEN; + output_index += DECODED_CHUNK_LEN; + } + + // always have one more (possibly partial) block of 8 input + debug_assert!(input.len() - input_index > 1 || input.is_empty()); + debug_assert!(input.len() - input_index <= 8); + + super::decode_suffix::decode_suffix( + input, + input_index, + output, + output_index, + decode_table, + decode_allow_trailing_bits, + padding_mode, + ) +} + +/// Decode 8 bytes of input into 6 bytes of output. 8 bytes of output will be written, but only the +/// first 6 of those contain meaningful data. +/// +/// `input` is the bytes to decode, of which the first 8 bytes will be processed. +/// `index_at_start_of_input` is the offset in the overall input (used for reporting errors +/// accurately) +/// `decode_table` is the lookup table for the particular base64 alphabet. +/// `output` will have its first 8 bytes overwritten, of which only the first 6 are valid decoded +/// data. +// yes, really inline (worth 30-50% speedup) +#[inline(always)] +fn decode_chunk( + input: &[u8], + index_at_start_of_input: usize, + decode_table: &[u8; 256], + output: &mut [u8], +) -> Result<(), DecodeError> { + let morsel = decode_table[input[0] as usize]; + if morsel == INVALID_VALUE { + return Err(DecodeError::InvalidByte(index_at_start_of_input, input[0])); + } + let mut accum = (morsel as u64) << 58; + + let morsel = decode_table[input[1] as usize]; + if morsel == INVALID_VALUE { + return Err(DecodeError::InvalidByte( + index_at_start_of_input + 1, + input[1], + )); + } + accum |= (morsel as u64) << 52; + + let morsel = decode_table[input[2] as usize]; + if morsel == INVALID_VALUE { + return Err(DecodeError::InvalidByte( + index_at_start_of_input + 2, + input[2], + )); + } + accum |= (morsel as u64) << 46; + + let morsel = decode_table[input[3] as usize]; + if morsel == INVALID_VALUE { + return Err(DecodeError::InvalidByte( + index_at_start_of_input + 3, + input[3], + )); + } + accum |= (morsel as u64) << 40; + + let morsel = decode_table[input[4] as usize]; + if morsel == INVALID_VALUE { + return Err(DecodeError::InvalidByte( + index_at_start_of_input + 4, + input[4], + )); + } + accum |= (morsel as u64) << 34; + + let morsel = decode_table[input[5] as usize]; + if morsel == INVALID_VALUE { + return Err(DecodeError::InvalidByte( + index_at_start_of_input + 5, + input[5], + )); + } + accum |= (morsel as u64) << 28; + + let morsel = decode_table[input[6] as usize]; + if morsel == INVALID_VALUE { + return Err(DecodeError::InvalidByte( + index_at_start_of_input + 6, + input[6], + )); + } + accum |= (morsel as u64) << 22; + + let morsel = decode_table[input[7] as usize]; + if morsel == INVALID_VALUE { + return Err(DecodeError::InvalidByte( + index_at_start_of_input + 7, + input[7], + )); + } + accum |= (morsel as u64) << 16; + + write_u64(output, accum); + + Ok(()) +} + +/// Decode an 8-byte chunk, but only write the 6 bytes actually decoded instead of including 2 +/// trailing garbage bytes. +#[inline] +fn decode_chunk_precise( + input: &[u8], + index_at_start_of_input: usize, + decode_table: &[u8; 256], + output: &mut [u8], +) -> Result<(), DecodeError> { + let mut tmp_buf = [0_u8; 8]; + + decode_chunk( + input, + index_at_start_of_input, + decode_table, + &mut tmp_buf[..], + )?; + + output[0..6].copy_from_slice(&tmp_buf[0..6]); + + Ok(()) +} + +#[inline] +fn write_u64(output: &mut [u8], value: u64) { + output[..8].copy_from_slice(&value.to_be_bytes()); +} + +#[cfg(test)] +mod tests { + use super::*; + + use crate::engine::general_purpose::STANDARD; + + #[test] + fn decode_chunk_precise_writes_only_6_bytes() { + let input = b"Zm9vYmFy"; // "foobar" + let mut output = [0_u8, 1, 2, 3, 4, 5, 6, 7]; + + decode_chunk_precise(&input[..], 0, &STANDARD.decode_table, &mut output).unwrap(); + assert_eq!(&vec![b'f', b'o', b'o', b'b', b'a', b'r', 6, 7], &output); + } + + #[test] + fn decode_chunk_writes_8_bytes() { + let input = b"Zm9vYmFy"; // "foobar" + let mut output = [0_u8, 1, 2, 3, 4, 5, 6, 7]; + + decode_chunk(&input[..], 0, &STANDARD.decode_table, &mut output).unwrap(); + assert_eq!(&vec![b'f', b'o', b'o', b'b', b'a', b'r', 0, 0], &output); + } +} diff --git a/src/engine/general_purpose/decode_suffix.rs b/src/engine/general_purpose/decode_suffix.rs new file mode 100644 index 0000000..5652035 --- /dev/null +++ b/src/engine/general_purpose/decode_suffix.rs @@ -0,0 +1,161 @@ +use crate::{ + engine::{general_purpose::INVALID_VALUE, DecodePaddingMode}, + DecodeError, PAD_BYTE, +}; + +/// Decode the last 1-8 bytes, checking for trailing set bits and padding per the provided +/// parameters. +/// +/// Returns the total number of bytes decoded, including the ones indicated as already written by +/// `output_index`. +pub(crate) fn decode_suffix( + input: &[u8], + input_index: usize, + output: &mut [u8], + mut output_index: usize, + decode_table: &[u8; 256], + decode_allow_trailing_bits: bool, + padding_mode: DecodePaddingMode, +) -> Result<usize, DecodeError> { + // Decode any leftovers that aren't a complete input block of 8 bytes. + // Use a u64 as a stack-resident 8 byte buffer. + let mut leftover_bits: u64 = 0; + let mut morsels_in_leftover = 0; + let mut padding_bytes = 0; + let mut first_padding_index: usize = 0; + let mut last_symbol = 0_u8; + let start_of_leftovers = input_index; + + for (i, &b) in input[start_of_leftovers..].iter().enumerate() { + // '=' padding + if b == PAD_BYTE { + // There can be bad padding bytes in a few ways: + // 1 - Padding with non-padding characters after it + // 2 - Padding after zero or one characters in the current quad (should only + // be after 2 or 3 chars) + // 3 - More than two characters of padding. If 3 or 4 padding chars + // are in the same quad, that implies it will be caught by #2. + // If it spreads from one quad to another, it will be an invalid byte + // in the first quad. + // 4 - Non-canonical padding -- 1 byte when it should be 2, etc. + // Per config, non-canonical but still functional non- or partially-padded base64 + // may be treated as an error condition. + + if i % 4 < 2 { + // Check for case #2. + let bad_padding_index = start_of_leftovers + + if padding_bytes > 0 { + // If we've already seen padding, report the first padding index. + // This is to be consistent with the normal decode logic: it will report an + // error on the first padding character (since it doesn't expect to see + // anything but actual encoded data). + // This could only happen if the padding started in the previous quad since + // otherwise this case would have been hit at i % 4 == 0 if it was the same + // quad. + first_padding_index + } else { + // haven't seen padding before, just use where we are now + i + }; + return Err(DecodeError::InvalidByte(bad_padding_index, b)); + } + + if padding_bytes == 0 { + first_padding_index = i; + } + + padding_bytes += 1; + continue; + } + + // Check for case #1. + // To make '=' handling consistent with the main loop, don't allow + // non-suffix '=' in trailing chunk either. Report error as first + // erroneous padding. + if padding_bytes > 0 { + return Err(DecodeError::InvalidByte( + start_of_leftovers + first_padding_index, + PAD_BYTE, + )); + } + + last_symbol = b; + + // can use up to 8 * 6 = 48 bits of the u64, if last chunk has no padding. + // Pack the leftovers from left to right. + let shift = 64 - (morsels_in_leftover + 1) * 6; + let morsel = decode_table[b as usize]; + if morsel == INVALID_VALUE { + return Err(DecodeError::InvalidByte(start_of_leftovers + i, b)); + } + + leftover_bits |= (morsel as u64) << shift; + morsels_in_leftover += 1; + } + + match padding_mode { + DecodePaddingMode::Indifferent => { /* everything we care about was already checked */ } + DecodePaddingMode::RequireCanonical => { + if (padding_bytes + morsels_in_leftover) % 4 != 0 { + return Err(DecodeError::InvalidPadding); + } + } + DecodePaddingMode::RequireNone => { + if padding_bytes > 0 { + // check at the end to make sure we let the cases of padding that should be InvalidByte + // get hit + return Err(DecodeError::InvalidPadding); + } + } + } + + // When encoding 1 trailing byte (e.g. 0xFF), 2 base64 bytes ("/w") are needed. + // / is the symbol for 63 (0x3F, bottom 6 bits all set) and w is 48 (0x30, top 2 bits + // of bottom 6 bits set). + // When decoding two symbols back to one trailing byte, any final symbol higher than + // w would still decode to the original byte because we only care about the top two + // bits in the bottom 6, but would be a non-canonical encoding. So, we calculate a + // mask based on how many bits are used for just the canonical encoding, and optionally + // error if any other bits are set. In the example of one encoded byte -> 2 symbols, + // 2 symbols can technically encode 12 bits, but the last 4 are non canonical, and + // useless since there are no more symbols to provide the necessary 4 additional bits + // to finish the second original byte. + + let leftover_bits_ready_to_append = match morsels_in_leftover { + 0 => 0, + 2 => 8, + 3 => 16, + 4 => 24, + 6 => 32, + 7 => 40, + 8 => 48, + // can also be detected as case #2 bad padding above + _ => unreachable!( + "Impossible: must only have 0 to 8 input bytes in last chunk, with no invalid lengths" + ), + }; + + // if there are bits set outside the bits we care about, last symbol encodes trailing bits that + // will not be included in the output + let mask = !0 >> leftover_bits_ready_to_append; + if !decode_allow_trailing_bits && (leftover_bits & mask) != 0 { + // last morsel is at `morsels_in_leftover` - 1 + return Err(DecodeError::InvalidLastSymbol( + start_of_leftovers + morsels_in_leftover - 1, + last_symbol, + )); + } + + // TODO benchmark simply converting to big endian bytes + let mut leftover_bits_appended_to_buf = 0; + while leftover_bits_appended_to_buf < leftover_bits_ready_to_append { + // `as` simply truncates the higher bits, which is what we want here + let selected_bits = (leftover_bits >> (56 - leftover_bits_appended_to_buf)) as u8; + output[output_index] = selected_bits; + output_index += 1; + + leftover_bits_appended_to_buf += 8; + } + + Ok(output_index) +} diff --git a/src/engine/general_purpose/mod.rs b/src/engine/general_purpose/mod.rs new file mode 100644 index 0000000..af8897b --- /dev/null +++ b/src/engine/general_purpose/mod.rs @@ -0,0 +1,349 @@ +//! Provides the [GeneralPurpose] engine and associated config types. +use crate::{ + alphabet, + alphabet::Alphabet, + engine::{Config, DecodePaddingMode}, + DecodeError, +}; +use core::convert::TryInto; + +mod decode; +pub(crate) mod decode_suffix; +pub use decode::GeneralPurposeEstimate; + +pub(crate) const INVALID_VALUE: u8 = 255; + +/// A general-purpose base64 engine. +/// +/// - It uses no vector CPU instructions, so it will work on any system. +/// - It is reasonably fast (~2-3GiB/s). +/// - It is not constant-time, though, so it is vulnerable to timing side-channel attacks. For loading cryptographic keys, etc, it is suggested to use the forthcoming constant-time implementation. +pub struct GeneralPurpose { + encode_table: [u8; 64], + decode_table: [u8; 256], + config: GeneralPurposeConfig, +} + +impl GeneralPurpose { + /// Create a `GeneralPurpose` engine from an [Alphabet]. + /// + /// While not very expensive to initialize, ideally these should be cached + /// if the engine will be used repeatedly. + pub const fn new(alphabet: &Alphabet, config: GeneralPurposeConfig) -> Self { + Self { + encode_table: encode_table(alphabet), + decode_table: decode_table(alphabet), + config, + } + } +} + +impl super::Engine for GeneralPurpose { + type Config = GeneralPurposeConfig; + type DecodeEstimate = GeneralPurposeEstimate; + + fn internal_encode(&self, input: &[u8], output: &mut [u8]) -> usize { + let mut input_index: usize = 0; + + const BLOCKS_PER_FAST_LOOP: usize = 4; + const LOW_SIX_BITS: u64 = 0x3F; + + // we read 8 bytes at a time (u64) but only actually consume 6 of those bytes. Thus, we need + // 2 trailing bytes to be available to read.. + let last_fast_index = input.len().saturating_sub(BLOCKS_PER_FAST_LOOP * 6 + 2); + let mut output_index = 0; + + if last_fast_index > 0 { + while input_index <= last_fast_index { + // Major performance wins from letting the optimizer do the bounds check once, mostly + // on the output side + let input_chunk = + &input[input_index..(input_index + (BLOCKS_PER_FAST_LOOP * 6 + 2))]; + let output_chunk = + &mut output[output_index..(output_index + BLOCKS_PER_FAST_LOOP * 8)]; + + // Hand-unrolling for 32 vs 16 or 8 bytes produces yields performance about equivalent + // to unsafe pointer code on a Xeon E5-1650v3. 64 byte unrolling was slightly better for + // large inputs but significantly worse for 50-byte input, unsurprisingly. I suspect + // that it's a not uncommon use case to encode smallish chunks of data (e.g. a 64-byte + // SHA-512 digest), so it would be nice if that fit in the unrolled loop at least once. + // Plus, single-digit percentage performance differences might well be quite different + // on different hardware. + + let input_u64 = read_u64(&input_chunk[0..]); + + output_chunk[0] = self.encode_table[((input_u64 >> 58) & LOW_SIX_BITS) as usize]; + output_chunk[1] = self.encode_table[((input_u64 >> 52) & LOW_SIX_BITS) as usize]; + output_chunk[2] = self.encode_table[((input_u64 >> 46) & LOW_SIX_BITS) as usize]; + output_chunk[3] = self.encode_table[((input_u64 >> 40) & LOW_SIX_BITS) as usize]; + output_chunk[4] = self.encode_table[((input_u64 >> 34) & LOW_SIX_BITS) as usize]; + output_chunk[5] = self.encode_table[((input_u64 >> 28) & LOW_SIX_BITS) as usize]; + output_chunk[6] = self.encode_table[((input_u64 >> 22) & LOW_SIX_BITS) as usize]; + output_chunk[7] = self.encode_table[((input_u64 >> 16) & LOW_SIX_BITS) as usize]; + + let input_u64 = read_u64(&input_chunk[6..]); + + output_chunk[8] = self.encode_table[((input_u64 >> 58) & LOW_SIX_BITS) as usize]; + output_chunk[9] = self.encode_table[((input_u64 >> 52) & LOW_SIX_BITS) as usize]; + output_chunk[10] = self.encode_table[((input_u64 >> 46) & LOW_SIX_BITS) as usize]; + output_chunk[11] = self.encode_table[((input_u64 >> 40) & LOW_SIX_BITS) as usize]; + output_chunk[12] = self.encode_table[((input_u64 >> 34) & LOW_SIX_BITS) as usize]; + output_chunk[13] = self.encode_table[((input_u64 >> 28) & LOW_SIX_BITS) as usize]; + output_chunk[14] = self.encode_table[((input_u64 >> 22) & LOW_SIX_BITS) as usize]; + output_chunk[15] = self.encode_table[((input_u64 >> 16) & LOW_SIX_BITS) as usize]; + + let input_u64 = read_u64(&input_chunk[12..]); + + output_chunk[16] = self.encode_table[((input_u64 >> 58) & LOW_SIX_BITS) as usize]; + output_chunk[17] = self.encode_table[((input_u64 >> 52) & LOW_SIX_BITS) as usize]; + output_chunk[18] = self.encode_table[((input_u64 >> 46) & LOW_SIX_BITS) as usize]; + output_chunk[19] = self.encode_table[((input_u64 >> 40) & LOW_SIX_BITS) as usize]; + output_chunk[20] = self.encode_table[((input_u64 >> 34) & LOW_SIX_BITS) as usize]; + output_chunk[21] = self.encode_table[((input_u64 >> 28) & LOW_SIX_BITS) as usize]; + output_chunk[22] = self.encode_table[((input_u64 >> 22) & LOW_SIX_BITS) as usize]; + output_chunk[23] = self.encode_table[((input_u64 >> 16) & LOW_SIX_BITS) as usize]; + + let input_u64 = read_u64(&input_chunk[18..]); + + output_chunk[24] = self.encode_table[((input_u64 >> 58) & LOW_SIX_BITS) as usize]; + output_chunk[25] = self.encode_table[((input_u64 >> 52) & LOW_SIX_BITS) as usize]; + output_chunk[26] = self.encode_table[((input_u64 >> 46) & LOW_SIX_BITS) as usize]; + output_chunk[27] = self.encode_table[((input_u64 >> 40) & LOW_SIX_BITS) as usize]; + output_chunk[28] = self.encode_table[((input_u64 >> 34) & LOW_SIX_BITS) as usize]; + output_chunk[29] = self.encode_table[((input_u64 >> 28) & LOW_SIX_BITS) as usize]; + output_chunk[30] = self.encode_table[((input_u64 >> 22) & LOW_SIX_BITS) as usize]; + output_chunk[31] = self.encode_table[((input_u64 >> 16) & LOW_SIX_BITS) as usize]; + + output_index += BLOCKS_PER_FAST_LOOP * 8; + input_index += BLOCKS_PER_FAST_LOOP * 6; + } + } + + // Encode what's left after the fast loop. + + const LOW_SIX_BITS_U8: u8 = 0x3F; + + let rem = input.len() % 3; + let start_of_rem = input.len() - rem; + + // start at the first index not handled by fast loop, which may be 0. + + while input_index < start_of_rem { + let input_chunk = &input[input_index..(input_index + 3)]; + let output_chunk = &mut output[output_index..(output_index + 4)]; + + output_chunk[0] = self.encode_table[(input_chunk[0] >> 2) as usize]; + output_chunk[1] = self.encode_table + [((input_chunk[0] << 4 | input_chunk[1] >> 4) & LOW_SIX_BITS_U8) as usize]; + output_chunk[2] = self.encode_table + [((input_chunk[1] << 2 | input_chunk[2] >> 6) & LOW_SIX_BITS_U8) as usize]; + output_chunk[3] = self.encode_table[(input_chunk[2] & LOW_SIX_BITS_U8) as usize]; + + input_index += 3; + output_index += 4; + } + + if rem == 2 { + output[output_index] = self.encode_table[(input[start_of_rem] >> 2) as usize]; + output[output_index + 1] = + self.encode_table[((input[start_of_rem] << 4 | input[start_of_rem + 1] >> 4) + & LOW_SIX_BITS_U8) as usize]; + output[output_index + 2] = + self.encode_table[((input[start_of_rem + 1] << 2) & LOW_SIX_BITS_U8) as usize]; + output_index += 3; + } else if rem == 1 { + output[output_index] = self.encode_table[(input[start_of_rem] >> 2) as usize]; + output[output_index + 1] = + self.encode_table[((input[start_of_rem] << 4) & LOW_SIX_BITS_U8) as usize]; + output_index += 2; + } + + output_index + } + + fn internal_decoded_len_estimate(&self, input_len: usize) -> Self::DecodeEstimate { + GeneralPurposeEstimate::new(input_len) + } + + fn internal_decode( + &self, + input: &[u8], + output: &mut [u8], + estimate: Self::DecodeEstimate, + ) -> Result<usize, DecodeError> { + decode::decode_helper( + input, + estimate, + output, + &self.decode_table, + self.config.decode_allow_trailing_bits, + self.config.decode_padding_mode, + ) + } + + fn config(&self) -> &Self::Config { + &self.config + } +} + +/// Returns a table mapping a 6-bit index to the ASCII byte encoding of the index +pub(crate) const fn encode_table(alphabet: &Alphabet) -> [u8; 64] { + // the encode table is just the alphabet: + // 6-bit index lookup -> printable byte + let mut encode_table = [0_u8; 64]; + { + let mut index = 0; + while index < 64 { + encode_table[index] = alphabet.symbols[index]; + index += 1; + } + } + + encode_table +} + +/// Returns a table mapping base64 bytes as the lookup index to either: +/// - [INVALID_VALUE] for bytes that aren't members of the alphabet +/// - a byte whose lower 6 bits are the value that was encoded into the index byte +pub(crate) const fn decode_table(alphabet: &Alphabet) -> [u8; 256] { + let mut decode_table = [INVALID_VALUE; 256]; + + // Since the table is full of `INVALID_VALUE` already, we only need to overwrite + // the parts that are valid. + let mut index = 0; + while index < 64 { + // The index in the alphabet is the 6-bit value we care about. + // Since the index is in 0-63, it is safe to cast to u8. + decode_table[alphabet.symbols[index] as usize] = index as u8; + index += 1; + } + + decode_table +} + +#[inline] +fn read_u64(s: &[u8]) -> u64 { + u64::from_be_bytes(s[..8].try_into().unwrap()) +} + +/// Contains configuration parameters for base64 encoding and decoding. +/// +/// ``` +/// # use base64::engine::GeneralPurposeConfig; +/// let config = GeneralPurposeConfig::new() +/// .with_encode_padding(false); +/// // further customize using `.with_*` methods as needed +/// ``` +/// +/// The constants [PAD] and [NO_PAD] cover most use cases. +/// +/// To specify the characters used, see [Alphabet]. +#[derive(Clone, Copy, Debug)] +pub struct GeneralPurposeConfig { + encode_padding: bool, + decode_allow_trailing_bits: bool, + decode_padding_mode: DecodePaddingMode, +} + +impl GeneralPurposeConfig { + /// Create a new config with `padding` = `true`, `decode_allow_trailing_bits` = `false`, and + /// `decode_padding_mode = DecodePaddingMode::RequireCanonicalPadding`. + /// + /// This probably matches most people's expectations, but consider disabling padding to save + /// a few bytes unless you specifically need it for compatibility with some legacy system. + pub const fn new() -> Self { + Self { + // RFC states that padding must be applied by default + encode_padding: true, + decode_allow_trailing_bits: false, + decode_padding_mode: DecodePaddingMode::RequireCanonical, + } + } + + /// Create a new config based on `self` with an updated `padding` setting. + /// + /// If `padding` is `true`, encoding will append either 1 or 2 `=` padding characters as needed + /// to produce an output whose length is a multiple of 4. + /// + /// Padding is not needed for correct decoding and only serves to waste bytes, but it's in the + /// [spec](https://datatracker.ietf.org/doc/html/rfc4648#section-3.2). + /// + /// For new applications, consider not using padding if the decoders you're using don't require + /// padding to be present. + pub const fn with_encode_padding(self, padding: bool) -> Self { + Self { + encode_padding: padding, + ..self + } + } + + /// Create a new config based on `self` with an updated `decode_allow_trailing_bits` setting. + /// + /// Most users will not need to configure this. It's useful if you need to decode base64 + /// produced by a buggy encoder that has bits set in the unused space on the last base64 + /// character as per [forgiving-base64 decode](https://infra.spec.whatwg.org/#forgiving-base64-decode). + /// If invalid trailing bits are present and this is `true`, those bits will + /// be silently ignored, else `DecodeError::InvalidLastSymbol` will be emitted. + pub const fn with_decode_allow_trailing_bits(self, allow: bool) -> Self { + Self { + decode_allow_trailing_bits: allow, + ..self + } + } + + /// Create a new config based on `self` with an updated `decode_padding_mode` setting. + /// + /// Padding is not useful in terms of representing encoded data -- it makes no difference to + /// the decoder if padding is present or not, so if you have some un-padded input to decode, it + /// is perfectly fine to use `DecodePaddingMode::Indifferent` to prevent errors from being + /// emitted. + /// + /// However, since in practice + /// [people who learned nothing from BER vs DER seem to expect base64 to have one canonical encoding](https://eprint.iacr.org/2022/361), + /// the default setting is the stricter `DecodePaddingMode::RequireCanonicalPadding`. + /// + /// Or, if "canonical" in your circumstance means _no_ padding rather than padding to the + /// next multiple of four, there's `DecodePaddingMode::RequireNoPadding`. + pub const fn with_decode_padding_mode(self, mode: DecodePaddingMode) -> Self { + Self { + decode_padding_mode: mode, + ..self + } + } +} + +impl Default for GeneralPurposeConfig { + /// Delegates to [GeneralPurposeConfig::new]. + fn default() -> Self { + Self::new() + } +} + +impl Config for GeneralPurposeConfig { + fn encode_padding(&self) -> bool { + self.encode_padding + } +} + +/// A [GeneralPurpose] engine using the [alphabet::STANDARD] base64 alphabet and [PAD] config. +pub const STANDARD: GeneralPurpose = GeneralPurpose::new(&alphabet::STANDARD, PAD); + +/// A [GeneralPurpose] engine using the [alphabet::STANDARD] base64 alphabet and [NO_PAD] config. +pub const STANDARD_NO_PAD: GeneralPurpose = GeneralPurpose::new(&alphabet::STANDARD, NO_PAD); + +/// A [GeneralPurpose] engine using the [alphabet::URL_SAFE] base64 alphabet and [PAD] config. +pub const URL_SAFE: GeneralPurpose = GeneralPurpose::new(&alphabet::URL_SAFE, PAD); + +/// A [GeneralPurpose] engine using the [alphabet::URL_SAFE] base64 alphabet and [NO_PAD] config. +pub const URL_SAFE_NO_PAD: GeneralPurpose = GeneralPurpose::new(&alphabet::URL_SAFE, NO_PAD); + +/// Include padding bytes when encoding, and require that they be present when decoding. +/// +/// This is the standard per the base64 RFC, but consider using [NO_PAD] instead as padding serves +/// little purpose in practice. +pub const PAD: GeneralPurposeConfig = GeneralPurposeConfig::new(); + +/// Don't add padding when encoding, and require no padding when decoding. +pub const NO_PAD: GeneralPurposeConfig = GeneralPurposeConfig::new() + .with_encode_padding(false) + .with_decode_padding_mode(DecodePaddingMode::RequireNone); diff --git a/src/engine/mod.rs b/src/engine/mod.rs new file mode 100644 index 0000000..12dfaa8 --- /dev/null +++ b/src/engine/mod.rs @@ -0,0 +1,410 @@ +//! Provides the [Engine] abstraction and out of the box implementations. +#[cfg(any(feature = "alloc", feature = "std", test))] +use crate::chunked_encoder; +use crate::{ + encode::{encode_with_padding, EncodeSliceError}, + encoded_len, DecodeError, DecodeSliceError, +}; +#[cfg(any(feature = "alloc", feature = "std", test))] +use alloc::vec::Vec; + +#[cfg(any(feature = "alloc", feature = "std", test))] +use alloc::{string::String, vec}; + +pub mod general_purpose; + +#[cfg(test)] +mod naive; + +#[cfg(test)] +mod tests; + +pub use general_purpose::{GeneralPurpose, GeneralPurposeConfig}; + +/// An `Engine` provides low-level encoding and decoding operations that all other higher-level parts of the API use. Users of the library will generally not need to implement this. +/// +/// Different implementations offer different characteristics. The library currently ships with +/// [GeneralPurpose] that offers good speed and works on any CPU, with more choices +/// coming later, like a constant-time one when side channel resistance is called for, and vendor-specific vectorized ones for more speed. +/// +/// See [general_purpose::STANDARD_NO_PAD] if you just want standard base64. Otherwise, when possible, it's +/// recommended to store the engine in a `const` so that references to it won't pose any lifetime +/// issues, and to avoid repeating the cost of engine setup. +/// +/// Since almost nobody will need to implement `Engine`, docs for internal methods are hidden. +// When adding an implementation of Engine, include them in the engine test suite: +// - add an implementation of [engine::tests::EngineWrapper] +// - add the implementation to the `all_engines` macro +// All tests run on all engines listed in the macro. +pub trait Engine: Send + Sync { + /// The config type used by this engine + type Config: Config; + /// The decode estimate used by this engine + type DecodeEstimate: DecodeEstimate; + + /// This is not meant to be called directly; it is only for `Engine` implementors. + /// See the other `encode*` functions on this trait. + /// + /// Encode the `input` bytes into the `output` buffer based on the mapping in `encode_table`. + /// + /// `output` will be long enough to hold the encoded data. + /// + /// Returns the number of bytes written. + /// + /// No padding should be written; that is handled separately. + /// + /// Must not write any bytes into the output slice other than the encoded data. + #[doc(hidden)] + fn internal_encode(&self, input: &[u8], output: &mut [u8]) -> usize; + + /// This is not meant to be called directly; it is only for `Engine` implementors. + /// + /// As an optimization to prevent the decoded length from being calculated twice, it is + /// sometimes helpful to have a conservative estimate of the decoded size before doing the + /// decoding, so this calculation is done separately and passed to [Engine::decode()] as needed. + /// + /// # Panics + /// + /// Panics if decoded length estimation overflows. + #[doc(hidden)] + fn internal_decoded_len_estimate(&self, input_len: usize) -> Self::DecodeEstimate; + + /// This is not meant to be called directly; it is only for `Engine` implementors. + /// See the other `decode*` functions on this trait. + /// + /// Decode `input` base64 bytes into the `output` buffer. + /// + /// `decode_estimate` is the result of [Engine::internal_decoded_len_estimate()], which is passed in to avoid + /// calculating it again (expensive on short inputs).` + /// + /// Returns the number of bytes written to `output`. + /// + /// Each complete 4-byte chunk of encoded data decodes to 3 bytes of decoded data, but this + /// function must also handle the final possibly partial chunk. + /// If the input length is not a multiple of 4, or uses padding bytes to reach a multiple of 4, + /// the trailing 2 or 3 bytes must decode to 1 or 2 bytes, respectively, as per the + /// [RFC](https://tools.ietf.org/html/rfc4648#section-3.5). + /// + /// Decoding must not write any bytes into the output slice other than the decoded data. + /// + /// Non-canonical trailing bits in the final tokens or non-canonical padding must be reported as + /// errors unless the engine is configured otherwise. + /// + /// # Panics + /// + /// Panics if `output` is too small. + #[doc(hidden)] + fn internal_decode( + &self, + input: &[u8], + output: &mut [u8], + decode_estimate: Self::DecodeEstimate, + ) -> Result<usize, DecodeError>; + + /// Returns the config for this engine. + fn config(&self) -> &Self::Config; + + /// Encode arbitrary octets as base64 using the provided `Engine`. + /// Returns a `String`. + /// + /// # Example + /// + /// ```rust + /// use base64::{Engine as _, engine::{self, general_purpose}, alphabet}; + /// + /// let b64 = general_purpose::STANDARD.encode(b"hello world~"); + /// println!("{}", b64); + /// + /// const CUSTOM_ENGINE: engine::GeneralPurpose = + /// engine::GeneralPurpose::new(&alphabet::URL_SAFE, general_purpose::NO_PAD); + /// + /// let b64_url = CUSTOM_ENGINE.encode(b"hello internet~"); + #[cfg(any(feature = "alloc", feature = "std", test))] + fn encode<T: AsRef<[u8]>>(&self, input: T) -> String { + let encoded_size = encoded_len(input.as_ref().len(), self.config().encode_padding()) + .expect("integer overflow when calculating buffer size"); + let mut buf = vec![0; encoded_size]; + + encode_with_padding(input.as_ref(), &mut buf[..], self, encoded_size); + + String::from_utf8(buf).expect("Invalid UTF8") + } + + /// Encode arbitrary octets as base64 into a supplied `String`. + /// Writes into the supplied `String`, which may allocate if its internal buffer isn't big enough. + /// + /// # Example + /// + /// ```rust + /// use base64::{Engine as _, engine::{self, general_purpose}, alphabet}; + /// const CUSTOM_ENGINE: engine::GeneralPurpose = + /// engine::GeneralPurpose::new(&alphabet::URL_SAFE, general_purpose::NO_PAD); + /// + /// fn main() { + /// let mut buf = String::new(); + /// general_purpose::STANDARD.encode_string(b"hello world~", &mut buf); + /// println!("{}", buf); + /// + /// buf.clear(); + /// CUSTOM_ENGINE.encode_string(b"hello internet~", &mut buf); + /// println!("{}", buf); + /// } + /// ``` + #[cfg(any(feature = "alloc", feature = "std", test))] + fn encode_string<T: AsRef<[u8]>>(&self, input: T, output_buf: &mut String) { + let input_bytes = input.as_ref(); + + { + let mut sink = chunked_encoder::StringSink::new(output_buf); + + chunked_encoder::ChunkedEncoder::new(self) + .encode(input_bytes, &mut sink) + .expect("Writing to a String shouldn't fail"); + } + } + + /// Encode arbitrary octets as base64 into a supplied slice. + /// Writes into the supplied output buffer. + /// + /// This is useful if you wish to avoid allocation entirely (e.g. encoding into a stack-resident + /// or statically-allocated buffer). + /// + /// # Example + /// + /// ```rust + /// use base64::{Engine as _, engine::general_purpose}; + /// let s = b"hello internet!"; + /// let mut buf = Vec::new(); + /// // make sure we'll have a slice big enough for base64 + padding + /// buf.resize(s.len() * 4 / 3 + 4, 0); + /// + /// let bytes_written = general_purpose::STANDARD.encode_slice(s, &mut buf).unwrap(); + /// + /// // shorten our vec down to just what was written + /// buf.truncate(bytes_written); + /// + /// assert_eq!(s, general_purpose::STANDARD.decode(&buf).unwrap().as_slice()); + /// ``` + fn encode_slice<T: AsRef<[u8]>>( + &self, + input: T, + output_buf: &mut [u8], + ) -> Result<usize, EncodeSliceError> { + let input_bytes = input.as_ref(); + + let encoded_size = encoded_len(input_bytes.len(), self.config().encode_padding()) + .expect("usize overflow when calculating buffer size"); + + if output_buf.len() < encoded_size { + return Err(EncodeSliceError::OutputSliceTooSmall); + } + + let b64_output = &mut output_buf[0..encoded_size]; + + encode_with_padding(input_bytes, b64_output, self, encoded_size); + + Ok(encoded_size) + } + + /// Decode from string reference as octets using the specified [Engine]. + /// Returns a `Result` containing a `Vec<u8>`. + /// + /// # Example + /// + /// ```rust + /// use base64::{Engine as _, alphabet, engine::{self, general_purpose}}; + /// + /// let bytes = general_purpose::STANDARD + /// .decode("aGVsbG8gd29ybGR+Cg==").unwrap(); + /// println!("{:?}", bytes); + /// + /// // custom engine setup + /// let bytes_url = engine::GeneralPurpose::new( + /// &alphabet::URL_SAFE, + /// general_purpose::NO_PAD) + /// .decode("aGVsbG8gaW50ZXJuZXR-Cg").unwrap(); + /// println!("{:?}", bytes_url); + /// ``` + /// + /// # Panics + /// + /// Panics if decoded length estimation overflows. + /// This would happen for sizes within a few bytes of the maximum value of `usize`. + #[cfg(any(feature = "alloc", feature = "std", test))] + fn decode<T: AsRef<[u8]>>(&self, input: T) -> Result<Vec<u8>, DecodeError> { + let input_bytes = input.as_ref(); + + let estimate = self.internal_decoded_len_estimate(input_bytes.len()); + let mut buffer = vec![0; estimate.decoded_len_estimate()]; + + let bytes_written = self.internal_decode(input_bytes, &mut buffer, estimate)?; + buffer.truncate(bytes_written); + + Ok(buffer) + } + + /// Decode from string reference as octets. + /// Writes into the supplied `Vec`, which may allocate if its internal buffer isn't big enough. + /// Returns a `Result` containing an empty tuple, aka `()`. + /// + /// # Example + /// + /// ```rust + /// use base64::{Engine as _, alphabet, engine::{self, general_purpose}}; + /// const CUSTOM_ENGINE: engine::GeneralPurpose = + /// engine::GeneralPurpose::new(&alphabet::URL_SAFE, general_purpose::PAD); + /// + /// fn main() { + /// use base64::Engine; + /// let mut buffer = Vec::<u8>::new(); + /// // with the default engine + /// general_purpose::STANDARD + /// .decode_vec("aGVsbG8gd29ybGR+Cg==", &mut buffer,).unwrap(); + /// println!("{:?}", buffer); + /// + /// buffer.clear(); + /// + /// // with a custom engine + /// CUSTOM_ENGINE.decode_vec( + /// "aGVsbG8gaW50ZXJuZXR-Cg==", + /// &mut buffer, + /// ).unwrap(); + /// println!("{:?}", buffer); + /// } + /// ``` + /// + /// # Panics + /// + /// Panics if decoded length estimation overflows. + /// This would happen for sizes within a few bytes of the maximum value of `usize`. + #[cfg(any(feature = "alloc", feature = "std", test))] + fn decode_vec<T: AsRef<[u8]>>( + &self, + input: T, + buffer: &mut Vec<u8>, + ) -> Result<(), DecodeError> { + let input_bytes = input.as_ref(); + + let starting_output_len = buffer.len(); + + let estimate = self.internal_decoded_len_estimate(input_bytes.len()); + let total_len_estimate = estimate + .decoded_len_estimate() + .checked_add(starting_output_len) + .expect("Overflow when calculating output buffer length"); + buffer.resize(total_len_estimate, 0); + + let buffer_slice = &mut buffer.as_mut_slice()[starting_output_len..]; + let bytes_written = self.internal_decode(input_bytes, buffer_slice, estimate)?; + + buffer.truncate(starting_output_len + bytes_written); + + Ok(()) + } + + /// Decode the input into the provided output slice. + /// + /// Returns an error if `output` is smaller than the estimated decoded length. + /// + /// This will not write any bytes past exactly what is decoded (no stray garbage bytes at the end). + /// + /// See [crate::decoded_len_estimate] for calculating buffer sizes. + /// + /// See [Engine::decode_slice_unchecked] for a version that panics instead of returning an error + /// if the output buffer is too small. + /// + /// # Panics + /// + /// Panics if decoded length estimation overflows. + /// This would happen for sizes within a few bytes of the maximum value of `usize`. + fn decode_slice<T: AsRef<[u8]>>( + &self, + input: T, + output: &mut [u8], + ) -> Result<usize, DecodeSliceError> { + let input_bytes = input.as_ref(); + + let estimate = self.internal_decoded_len_estimate(input_bytes.len()); + if output.len() < estimate.decoded_len_estimate() { + return Err(DecodeSliceError::OutputSliceTooSmall); + } + + self.internal_decode(input_bytes, output, estimate) + .map_err(|e| e.into()) + } + + /// Decode the input into the provided output slice. + /// + /// This will not write any bytes past exactly what is decoded (no stray garbage bytes at the end). + /// + /// See [crate::decoded_len_estimate] for calculating buffer sizes. + /// + /// See [Engine::decode_slice] for a version that returns an error instead of panicking if the output + /// buffer is too small. + /// + /// # Panics + /// + /// Panics if decoded length estimation overflows. + /// This would happen for sizes within a few bytes of the maximum value of `usize`. + /// + /// Panics if the provided output buffer is too small for the decoded data. + fn decode_slice_unchecked<T: AsRef<[u8]>>( + &self, + input: T, + output: &mut [u8], + ) -> Result<usize, DecodeError> { + let input_bytes = input.as_ref(); + + self.internal_decode( + input_bytes, + output, + self.internal_decoded_len_estimate(input_bytes.len()), + ) + } +} + +/// The minimal level of configuration that engines must support. +pub trait Config { + /// Returns `true` if padding should be added after the encoded output. + /// + /// Padding is added outside the engine's encode() since the engine may be used + /// to encode only a chunk of the overall output, so it can't always know when + /// the output is "done" and would therefore need padding (if configured). + // It could be provided as a separate parameter when encoding, but that feels like + // leaking an implementation detail to the user, and it's hopefully more convenient + // to have to only pass one thing (the engine) to any part of the API. + fn encode_padding(&self) -> bool; +} + +/// The decode estimate used by an engine implementation. Users do not need to interact with this; +/// it is only for engine implementors. +/// +/// Implementors may store relevant data here when constructing this to avoid having to calculate +/// them again during actual decoding. +pub trait DecodeEstimate { + /// Returns a conservative (err on the side of too big) estimate of the decoded length to use + /// for pre-allocating buffers, etc. + /// + /// The estimate must be no larger than the next largest complete triple of decoded bytes. + /// That is, the final quad of tokens to decode may be assumed to be complete with no padding. + /// + /// # Panics + /// + /// Panics if decoded length estimation overflows. + /// This would happen for sizes within a few bytes of the maximum value of `usize`. + fn decoded_len_estimate(&self) -> usize; +} + +/// Controls how pad bytes are handled when decoding. +/// +/// Each [Engine] must support at least the behavior indicated by +/// [DecodePaddingMode::RequireCanonical], and may support other modes. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum DecodePaddingMode { + /// Canonical padding is allowed, but any fewer padding bytes than that is also allowed. + Indifferent, + /// Padding must be canonical (0, 1, or 2 `=` as needed to produce a 4 byte suffix). + RequireCanonical, + /// Padding must be absent -- for when you want predictable padding, without any wasted bytes. + RequireNone, +} diff --git a/src/engine/naive.rs b/src/engine/naive.rs new file mode 100644 index 0000000..6665c5e --- /dev/null +++ b/src/engine/naive.rs @@ -0,0 +1,219 @@ +use crate::{ + alphabet::Alphabet, + engine::{ + general_purpose::{self, decode_table, encode_table}, + Config, DecodeEstimate, DecodePaddingMode, Engine, + }, + DecodeError, PAD_BYTE, +}; +use alloc::ops::BitOr; +use std::ops::{BitAnd, Shl, Shr}; + +/// Comparatively simple implementation that can be used as something to compare against in tests +pub struct Naive { + encode_table: [u8; 64], + decode_table: [u8; 256], + config: NaiveConfig, +} + +impl Naive { + const ENCODE_INPUT_CHUNK_SIZE: usize = 3; + const DECODE_INPUT_CHUNK_SIZE: usize = 4; + + pub const fn new(alphabet: &Alphabet, config: NaiveConfig) -> Self { + Self { + encode_table: encode_table(alphabet), + decode_table: decode_table(alphabet), + config, + } + } + + fn decode_byte_into_u32(&self, offset: usize, byte: u8) -> Result<u32, DecodeError> { + let decoded = self.decode_table[byte as usize]; + + if decoded == general_purpose::INVALID_VALUE { + return Err(DecodeError::InvalidByte(offset, byte)); + } + + Ok(decoded as u32) + } +} + +impl Engine for Naive { + type Config = NaiveConfig; + type DecodeEstimate = NaiveEstimate; + + fn internal_encode(&self, input: &[u8], output: &mut [u8]) -> usize { + // complete chunks first + + const LOW_SIX_BITS: u32 = 0x3F; + + let rem = input.len() % Self::ENCODE_INPUT_CHUNK_SIZE; + // will never underflow + let complete_chunk_len = input.len() - rem; + + let mut input_index = 0_usize; + let mut output_index = 0_usize; + if let Some(last_complete_chunk_index) = + complete_chunk_len.checked_sub(Self::ENCODE_INPUT_CHUNK_SIZE) + { + while input_index <= last_complete_chunk_index { + let chunk = &input[input_index..input_index + Self::ENCODE_INPUT_CHUNK_SIZE]; + + // populate low 24 bits from 3 bytes + let chunk_int: u32 = + (chunk[0] as u32).shl(16) | (chunk[1] as u32).shl(8) | (chunk[2] as u32); + // encode 4x 6-bit output bytes + output[output_index] = self.encode_table[chunk_int.shr(18) as usize]; + output[output_index + 1] = + self.encode_table[chunk_int.shr(12_u8).bitand(LOW_SIX_BITS) as usize]; + output[output_index + 2] = + self.encode_table[chunk_int.shr(6_u8).bitand(LOW_SIX_BITS) as usize]; + output[output_index + 3] = + self.encode_table[chunk_int.bitand(LOW_SIX_BITS) as usize]; + + input_index += Self::ENCODE_INPUT_CHUNK_SIZE; + output_index += 4; + } + } + + // then leftovers + if rem == 2 { + let chunk = &input[input_index..input_index + 2]; + + // high six bits of chunk[0] + output[output_index] = self.encode_table[chunk[0].shr(2) as usize]; + // bottom 2 bits of [0], high 4 bits of [1] + output[output_index + 1] = + self.encode_table[(chunk[0].shl(4_u8).bitor(chunk[1].shr(4_u8)) as u32) + .bitand(LOW_SIX_BITS) as usize]; + // bottom 4 bits of [1], with the 2 bottom bits as zero + output[output_index + 2] = + self.encode_table[(chunk[1].shl(2_u8) as u32).bitand(LOW_SIX_BITS) as usize]; + + output_index += 3; + } else if rem == 1 { + let byte = input[input_index]; + output[output_index] = self.encode_table[byte.shr(2) as usize]; + output[output_index + 1] = + self.encode_table[(byte.shl(4_u8) as u32).bitand(LOW_SIX_BITS) as usize]; + output_index += 2; + } + + output_index + } + + fn internal_decoded_len_estimate(&self, input_len: usize) -> Self::DecodeEstimate { + NaiveEstimate::new(input_len) + } + + fn internal_decode( + &self, + input: &[u8], + output: &mut [u8], + estimate: Self::DecodeEstimate, + ) -> Result<usize, DecodeError> { + if estimate.rem == 1 { + // trailing whitespace is so common that it's worth it to check the last byte to + // possibly return a better error message + if let Some(b) = input.last() { + if *b != PAD_BYTE + && self.decode_table[*b as usize] == general_purpose::INVALID_VALUE + { + return Err(DecodeError::InvalidByte(input.len() - 1, *b)); + } + } + + return Err(DecodeError::InvalidLength); + } + + let mut input_index = 0_usize; + let mut output_index = 0_usize; + const BOTTOM_BYTE: u32 = 0xFF; + + // can only use the main loop on non-trailing chunks + if input.len() > Self::DECODE_INPUT_CHUNK_SIZE { + // skip the last chunk, whether it's partial or full, since it might + // have padding, and start at the beginning of the chunk before that + let last_complete_chunk_start_index = estimate.complete_chunk_len + - if estimate.rem == 0 { + // Trailing chunk is also full chunk, so there must be at least 2 chunks, and + // this won't underflow + Self::DECODE_INPUT_CHUNK_SIZE * 2 + } else { + // Trailing chunk is partial, so it's already excluded in + // complete_chunk_len + Self::DECODE_INPUT_CHUNK_SIZE + }; + + while input_index <= last_complete_chunk_start_index { + let chunk = &input[input_index..input_index + Self::DECODE_INPUT_CHUNK_SIZE]; + let decoded_int: u32 = self.decode_byte_into_u32(input_index, chunk[0])?.shl(18) + | self + .decode_byte_into_u32(input_index + 1, chunk[1])? + .shl(12) + | self.decode_byte_into_u32(input_index + 2, chunk[2])?.shl(6) + | self.decode_byte_into_u32(input_index + 3, chunk[3])?; + + output[output_index] = decoded_int.shr(16_u8).bitand(BOTTOM_BYTE) as u8; + output[output_index + 1] = decoded_int.shr(8_u8).bitand(BOTTOM_BYTE) as u8; + output[output_index + 2] = decoded_int.bitand(BOTTOM_BYTE) as u8; + + input_index += Self::DECODE_INPUT_CHUNK_SIZE; + output_index += 3; + } + } + + general_purpose::decode_suffix::decode_suffix( + input, + input_index, + output, + output_index, + &self.decode_table, + self.config.decode_allow_trailing_bits, + self.config.decode_padding_mode, + ) + } + + fn config(&self) -> &Self::Config { + &self.config + } +} + +pub struct NaiveEstimate { + /// remainder from dividing input by `Naive::DECODE_CHUNK_SIZE` + rem: usize, + /// Length of input that is in complete `Naive::DECODE_CHUNK_SIZE`-length chunks + complete_chunk_len: usize, +} + +impl NaiveEstimate { + fn new(input_len: usize) -> Self { + let rem = input_len % Naive::DECODE_INPUT_CHUNK_SIZE; + let complete_chunk_len = input_len - rem; + + Self { + rem, + complete_chunk_len, + } + } +} + +impl DecodeEstimate for NaiveEstimate { + fn decoded_len_estimate(&self) -> usize { + ((self.complete_chunk_len / 4) + ((self.rem > 0) as usize)) * 3 + } +} + +#[derive(Clone, Copy, Debug)] +pub struct NaiveConfig { + pub encode_padding: bool, + pub decode_allow_trailing_bits: bool, + pub decode_padding_mode: DecodePaddingMode, +} + +impl Config for NaiveConfig { + fn encode_padding(&self) -> bool { + self.encode_padding + } +} diff --git a/src/engine/tests.rs b/src/engine/tests.rs new file mode 100644 index 0000000..906bba0 --- /dev/null +++ b/src/engine/tests.rs @@ -0,0 +1,1430 @@ +// rstest_reuse template functions have unused variables +#![allow(unused_variables)] + +use rand::{ + self, + distributions::{self, Distribution as _}, + rngs, Rng as _, SeedableRng as _, +}; +use rstest::rstest; +use rstest_reuse::{apply, template}; +use std::{collections, fmt}; + +use crate::{ + alphabet::{Alphabet, STANDARD}, + encode::add_padding, + encoded_len, + engine::{general_purpose, naive, Config, DecodeEstimate, DecodePaddingMode, Engine}, + tests::{assert_encode_sanity, random_alphabet, random_config}, + DecodeError, PAD_BYTE, +}; + +// the case::foo syntax includes the "foo" in the generated test method names +#[template] +#[rstest(engine_wrapper, +case::general_purpose(GeneralPurposeWrapper {}), +case::naive(NaiveWrapper {}), +)] +fn all_engines<E: EngineWrapper>(engine_wrapper: E) {} + +#[apply(all_engines)] +fn rfc_test_vectors_std_alphabet<E: EngineWrapper>(engine_wrapper: E) { + let data = vec![ + ("", ""), + ("f", "Zg=="), + ("fo", "Zm8="), + ("foo", "Zm9v"), + ("foob", "Zm9vYg=="), + ("fooba", "Zm9vYmE="), + ("foobar", "Zm9vYmFy"), + ]; + + let engine = E::standard(); + let engine_no_padding = E::standard_unpadded(); + + for (orig, encoded) in &data { + let encoded_without_padding = encoded.trim_end_matches('='); + + // unpadded + { + let mut encode_buf = [0_u8; 8]; + let mut decode_buf = [0_u8; 6]; + + let encode_len = + engine_no_padding.internal_encode(orig.as_bytes(), &mut encode_buf[..]); + assert_eq!( + &encoded_without_padding, + &std::str::from_utf8(&encode_buf[0..encode_len]).unwrap() + ); + let decode_len = engine_no_padding + .decode_slice_unchecked(encoded_without_padding.as_bytes(), &mut decode_buf[..]) + .unwrap(); + assert_eq!(orig.len(), decode_len); + + assert_eq!( + orig, + &std::str::from_utf8(&decode_buf[0..decode_len]).unwrap() + ); + + // if there was any padding originally, the no padding engine won't decode it + if encoded.as_bytes().contains(&PAD_BYTE) { + assert_eq!( + Err(DecodeError::InvalidPadding), + engine_no_padding.decode(encoded) + ) + } + } + + // padded + { + let mut encode_buf = [0_u8; 8]; + let mut decode_buf = [0_u8; 6]; + + let encode_len = engine.internal_encode(orig.as_bytes(), &mut encode_buf[..]); + assert_eq!( + // doesn't have padding added yet + &encoded_without_padding, + &std::str::from_utf8(&encode_buf[0..encode_len]).unwrap() + ); + let pad_len = add_padding(orig.len(), &mut encode_buf[encode_len..]); + assert_eq!(encoded.as_bytes(), &encode_buf[..encode_len + pad_len]); + + let decode_len = engine + .decode_slice_unchecked(encoded.as_bytes(), &mut decode_buf[..]) + .unwrap(); + assert_eq!(orig.len(), decode_len); + + assert_eq!( + orig, + &std::str::from_utf8(&decode_buf[0..decode_len]).unwrap() + ); + + // if there was (canonical) padding, and we remove it, the standard engine won't decode + if encoded.as_bytes().contains(&PAD_BYTE) { + assert_eq!( + Err(DecodeError::InvalidPadding), + engine.decode(encoded_without_padding) + ) + } + } + } +} + +#[apply(all_engines)] +fn roundtrip_random<E: EngineWrapper>(engine_wrapper: E) { + let mut rng = seeded_rng(); + + let mut orig_data = Vec::<u8>::new(); + let mut encode_buf = Vec::<u8>::new(); + let mut decode_buf = Vec::<u8>::new(); + + let len_range = distributions::Uniform::new(1, 1_000); + + for _ in 0..10_000 { + let engine = E::random(&mut rng); + + orig_data.clear(); + encode_buf.clear(); + decode_buf.clear(); + + let (orig_len, _, encoded_len) = generate_random_encoded_data( + &engine, + &mut orig_data, + &mut encode_buf, + &mut rng, + &len_range, + ); + + // exactly the right size + decode_buf.resize(orig_len, 0); + + let dec_len = engine + .decode_slice_unchecked(&encode_buf[0..encoded_len], &mut decode_buf[..]) + .unwrap(); + + assert_eq!(orig_len, dec_len); + assert_eq!(&orig_data[..], &decode_buf[..dec_len]); + } +} + +#[apply(all_engines)] +fn encode_doesnt_write_extra_bytes<E: EngineWrapper>(engine_wrapper: E) { + let mut rng = seeded_rng(); + + let mut orig_data = Vec::<u8>::new(); + let mut encode_buf = Vec::<u8>::new(); + let mut encode_buf_backup = Vec::<u8>::new(); + + let input_len_range = distributions::Uniform::new(0, 1000); + + for _ in 0..10_000 { + let engine = E::random(&mut rng); + let padded = engine.config().encode_padding(); + + orig_data.clear(); + encode_buf.clear(); + encode_buf_backup.clear(); + + let orig_len = fill_rand(&mut orig_data, &mut rng, &input_len_range); + + let prefix_len = 1024; + // plenty of prefix and suffix + fill_rand_len(&mut encode_buf, &mut rng, prefix_len * 2 + orig_len * 2); + encode_buf_backup.extend_from_slice(&encode_buf[..]); + + let expected_encode_len_no_pad = encoded_len(orig_len, false).unwrap(); + + let encoded_len_no_pad = + engine.internal_encode(&orig_data[..], &mut encode_buf[prefix_len..]); + assert_eq!(expected_encode_len_no_pad, encoded_len_no_pad); + + // no writes past what it claimed to write + assert_eq!(&encode_buf_backup[..prefix_len], &encode_buf[..prefix_len]); + assert_eq!( + &encode_buf_backup[(prefix_len + encoded_len_no_pad)..], + &encode_buf[(prefix_len + encoded_len_no_pad)..] + ); + + let encoded_data = &encode_buf[prefix_len..(prefix_len + encoded_len_no_pad)]; + assert_encode_sanity( + std::str::from_utf8(encoded_data).unwrap(), + // engines don't pad + false, + orig_len, + ); + + // pad so we can decode it in case our random engine requires padding + let pad_len = if padded { + add_padding(orig_len, &mut encode_buf[prefix_len + encoded_len_no_pad..]) + } else { + 0 + }; + + assert_eq!( + orig_data, + engine + .decode(&encode_buf[prefix_len..(prefix_len + encoded_len_no_pad + pad_len)],) + .unwrap() + ); + } +} + +#[apply(all_engines)] +fn encode_engine_slice_fits_into_precisely_sized_slice<E: EngineWrapper>(engine_wrapper: E) { + let mut orig_data = Vec::new(); + let mut encoded_data = Vec::new(); + let mut decoded = Vec::new(); + + let input_len_range = distributions::Uniform::new(0, 1000); + + let mut rng = rngs::SmallRng::from_entropy(); + + for _ in 0..10_000 { + orig_data.clear(); + encoded_data.clear(); + decoded.clear(); + + let input_len = input_len_range.sample(&mut rng); + + for _ in 0..input_len { + orig_data.push(rng.gen()); + } + + let engine = E::random(&mut rng); + + let encoded_size = encoded_len(input_len, engine.config().encode_padding()).unwrap(); + + encoded_data.resize(encoded_size, 0); + + assert_eq!( + encoded_size, + engine.encode_slice(&orig_data, &mut encoded_data).unwrap() + ); + + assert_encode_sanity( + std::str::from_utf8(&encoded_data[0..encoded_size]).unwrap(), + engine.config().encode_padding(), + input_len, + ); + + engine + .decode_vec(&encoded_data[0..encoded_size], &mut decoded) + .unwrap(); + assert_eq!(orig_data, decoded); + } +} + +#[apply(all_engines)] +fn decode_doesnt_write_extra_bytes<E>(engine_wrapper: E) +where + E: EngineWrapper, + <<E as EngineWrapper>::Engine as Engine>::Config: fmt::Debug, +{ + let mut rng = seeded_rng(); + + let mut orig_data = Vec::<u8>::new(); + let mut encode_buf = Vec::<u8>::new(); + let mut decode_buf = Vec::<u8>::new(); + let mut decode_buf_backup = Vec::<u8>::new(); + + let len_range = distributions::Uniform::new(1, 1_000); + + for _ in 0..10_000 { + let engine = E::random(&mut rng); + + orig_data.clear(); + encode_buf.clear(); + decode_buf.clear(); + decode_buf_backup.clear(); + + let orig_len = fill_rand(&mut orig_data, &mut rng, &len_range); + encode_buf.resize(orig_len * 2 + 100, 0); + + let encoded_len = engine + .encode_slice(&orig_data[..], &mut encode_buf[..]) + .unwrap(); + encode_buf.truncate(encoded_len); + + // oversize decode buffer so we can easily tell if it writes anything more than + // just the decoded data + let prefix_len = 1024; + // plenty of prefix and suffix + fill_rand_len(&mut decode_buf, &mut rng, prefix_len * 2 + orig_len * 2); + decode_buf_backup.extend_from_slice(&decode_buf[..]); + + let dec_len = engine + .decode_slice_unchecked(&encode_buf, &mut decode_buf[prefix_len..]) + .unwrap(); + + assert_eq!(orig_len, dec_len); + assert_eq!( + &orig_data[..], + &decode_buf[prefix_len..prefix_len + dec_len] + ); + assert_eq!(&decode_buf_backup[..prefix_len], &decode_buf[..prefix_len]); + assert_eq!( + &decode_buf_backup[prefix_len + dec_len..], + &decode_buf[prefix_len + dec_len..] + ); + } +} + +#[apply(all_engines)] +fn decode_detect_invalid_last_symbol<E: EngineWrapper>(engine_wrapper: E) { + // 0xFF -> "/w==", so all letters > w, 0-9, and '+', '/' should get InvalidLastSymbol + let engine = E::standard(); + + assert_eq!(Ok(vec![0x89, 0x85]), engine.decode("iYU=")); + assert_eq!(Ok(vec![0xFF]), engine.decode("/w==")); + + for (suffix, offset) in vec![ + // suffix, offset of bad byte from start of suffix + ("/x==", 1_usize), + ("/z==", 1_usize), + ("/0==", 1_usize), + ("/9==", 1_usize), + ("/+==", 1_usize), + ("//==", 1_usize), + // trailing 01 + ("iYV=", 2_usize), + // trailing 10 + ("iYW=", 2_usize), + // trailing 11 + ("iYX=", 2_usize), + ] { + for prefix_quads in 0..256 { + let mut encoded = "AAAA".repeat(prefix_quads); + encoded.push_str(suffix); + + assert_eq!( + Err(DecodeError::InvalidLastSymbol( + encoded.len() - 4 + offset, + suffix.as_bytes()[offset], + )), + engine.decode(encoded.as_str()) + ); + } + } +} + +#[apply(all_engines)] +fn decode_detect_invalid_last_symbol_when_length_is_also_invalid<E: EngineWrapper>( + engine_wrapper: E, +) { + let mut rng = seeded_rng(); + + // check across enough lengths that it would likely cover any implementation's various internal + // small/large input division + for len in (0_usize..256).map(|len| len * 4 + 1) { + let engine = E::random_alphabet(&mut rng, &STANDARD); + + let mut input = vec![b'A'; len]; + + // with a valid last char, it's InvalidLength + assert_eq!(Err(DecodeError::InvalidLength), engine.decode(&input)); + // after mangling the last char, it's InvalidByte + input[len - 1] = b'"'; + assert_eq!( + Err(DecodeError::InvalidByte(len - 1, b'"')), + engine.decode(&input) + ); + } +} + +#[apply(all_engines)] +fn decode_detect_invalid_last_symbol_every_possible_two_symbols<E: EngineWrapper>( + engine_wrapper: E, +) { + let engine = E::standard(); + + let mut base64_to_bytes = collections::HashMap::new(); + + for b in 0_u8..=255 { + let mut b64 = vec![0_u8; 4]; + assert_eq!(2, engine.internal_encode(&[b], &mut b64[..])); + let _ = add_padding(1, &mut b64[2..]); + + assert!(base64_to_bytes.insert(b64, vec![b]).is_none()); + } + + // every possible combination of trailing symbols must either decode to 1 byte or get InvalidLastSymbol, with or without any leading chunks + + let mut prefix = Vec::new(); + for _ in 0..256 { + let mut clone = prefix.clone(); + + let mut symbols = [0_u8; 4]; + for &s1 in STANDARD.symbols.iter() { + symbols[0] = s1; + for &s2 in STANDARD.symbols.iter() { + symbols[1] = s2; + symbols[2] = PAD_BYTE; + symbols[3] = PAD_BYTE; + + // chop off previous symbols + clone.truncate(prefix.len()); + clone.extend_from_slice(&symbols[..]); + let decoded_prefix_len = prefix.len() / 4 * 3; + + match base64_to_bytes.get(&symbols[..]) { + Some(bytes) => { + let res = engine + .decode(&clone) + // remove prefix + .map(|decoded| decoded[decoded_prefix_len..].to_vec()); + + assert_eq!(Ok(bytes.clone()), res); + } + None => assert_eq!( + Err(DecodeError::InvalidLastSymbol(1, s2)), + engine.decode(&symbols[..]) + ), + } + } + } + + prefix.extend_from_slice(b"AAAA"); + } +} + +#[apply(all_engines)] +fn decode_detect_invalid_last_symbol_every_possible_three_symbols<E: EngineWrapper>( + engine_wrapper: E, +) { + let engine = E::standard(); + + let mut base64_to_bytes = collections::HashMap::new(); + + let mut bytes = [0_u8; 2]; + for b1 in 0_u8..=255 { + bytes[0] = b1; + for b2 in 0_u8..=255 { + bytes[1] = b2; + let mut b64 = vec![0_u8; 4]; + assert_eq!(3, engine.internal_encode(&bytes, &mut b64[..])); + let _ = add_padding(2, &mut b64[3..]); + + let mut v = Vec::with_capacity(2); + v.extend_from_slice(&bytes[..]); + + assert!(base64_to_bytes.insert(b64, v).is_none()); + } + } + + // every possible combination of symbols must either decode to 2 bytes or get InvalidLastSymbol, with or without any leading chunks + + let mut prefix = Vec::new(); + for _ in 0..256 { + let mut input = prefix.clone(); + + let mut symbols = [0_u8; 4]; + for &s1 in STANDARD.symbols.iter() { + symbols[0] = s1; + for &s2 in STANDARD.symbols.iter() { + symbols[1] = s2; + for &s3 in STANDARD.symbols.iter() { + symbols[2] = s3; + symbols[3] = PAD_BYTE; + + // chop off previous symbols + input.truncate(prefix.len()); + input.extend_from_slice(&symbols[..]); + let decoded_prefix_len = prefix.len() / 4 * 3; + + match base64_to_bytes.get(&symbols[..]) { + Some(bytes) => { + let res = engine + .decode(&input) + // remove prefix + .map(|decoded| decoded[decoded_prefix_len..].to_vec()); + + assert_eq!(Ok(bytes.clone()), res); + } + None => assert_eq!( + Err(DecodeError::InvalidLastSymbol(2, s3)), + engine.decode(&symbols[..]) + ), + } + } + } + } + prefix.extend_from_slice(b"AAAA"); + } +} + +#[apply(all_engines)] +fn decode_invalid_trailing_bits_ignored_when_configured<E: EngineWrapper>(engine_wrapper: E) { + let strict = E::standard(); + let forgiving = E::standard_allow_trailing_bits(); + + fn assert_tolerant_decode<E: Engine>( + engine: &E, + input: &mut String, + b64_prefix_len: usize, + expected_decode_bytes: Vec<u8>, + data: &str, + ) { + let prefixed = prefixed_data(input, b64_prefix_len, data); + let decoded = engine.decode(prefixed); + // prefix is always complete chunks + let decoded_prefix_len = b64_prefix_len / 4 * 3; + assert_eq!( + Ok(expected_decode_bytes), + decoded.map(|v| v[decoded_prefix_len..].to_vec()) + ); + } + + let mut prefix = String::new(); + for _ in 0..256 { + let mut input = prefix.clone(); + + // example from https://github.com/marshallpierce/rust-base64/issues/75 + assert!(strict + .decode(prefixed_data(&mut input, prefix.len(), "/w==")) + .is_ok()); + assert!(strict + .decode(prefixed_data(&mut input, prefix.len(), "iYU=")) + .is_ok()); + // trailing 01 + assert_tolerant_decode(&forgiving, &mut input, prefix.len(), vec![255], "/x=="); + assert_tolerant_decode(&forgiving, &mut input, prefix.len(), vec![137, 133], "iYV="); + // trailing 10 + assert_tolerant_decode(&forgiving, &mut input, prefix.len(), vec![255], "/y=="); + assert_tolerant_decode(&forgiving, &mut input, prefix.len(), vec![137, 133], "iYW="); + // trailing 11 + assert_tolerant_decode(&forgiving, &mut input, prefix.len(), vec![255], "/z=="); + assert_tolerant_decode(&forgiving, &mut input, prefix.len(), vec![137, 133], "iYX="); + + prefix.push_str("AAAA"); + } +} + +#[apply(all_engines)] +fn decode_invalid_byte_error<E: EngineWrapper>(engine_wrapper: E) { + let mut rng = seeded_rng(); + + let mut orig_data = Vec::<u8>::new(); + let mut encode_buf = Vec::<u8>::new(); + let mut decode_buf = Vec::<u8>::new(); + + let len_range = distributions::Uniform::new(1, 1_000); + + for _ in 0..10_000 { + let alphabet = random_alphabet(&mut rng); + let engine = E::random_alphabet(&mut rng, alphabet); + + orig_data.clear(); + encode_buf.clear(); + decode_buf.clear(); + + let (orig_len, encoded_len_just_data, encoded_len_with_padding) = + generate_random_encoded_data( + &engine, + &mut orig_data, + &mut encode_buf, + &mut rng, + &len_range, + ); + + // exactly the right size + decode_buf.resize(orig_len, 0); + + // replace one encoded byte with an invalid byte + let invalid_byte: u8 = loop { + let byte: u8 = rng.gen(); + + if alphabet.symbols.contains(&byte) { + continue; + } else { + break byte; + } + }; + + let invalid_range = distributions::Uniform::new(0, orig_len); + let invalid_index = invalid_range.sample(&mut rng); + encode_buf[invalid_index] = invalid_byte; + + assert_eq!( + Err(DecodeError::InvalidByte(invalid_index, invalid_byte)), + engine.decode_slice_unchecked( + &encode_buf[0..encoded_len_with_padding], + &mut decode_buf[..], + ) + ); + } +} + +/// Any amount of padding anywhere before the final non padding character = invalid byte at first +/// pad byte. +/// From this, we know padding must extend to the end of the input. +#[apply(all_engines)] +fn decode_padding_before_final_non_padding_char_error_invalid_byte<E: EngineWrapper>( + engine_wrapper: E, +) { + let mut rng = seeded_rng(); + + // the different amounts of proper padding, w/ offset from end for the last non-padding char + let suffixes = vec![("/w==", 2), ("iYu=", 1), ("zzzz", 0)]; + + let prefix_quads_range = distributions::Uniform::from(0..=256); + + for mode in all_pad_modes() { + // we don't encode so we don't care about encode padding + let engine = E::standard_with_pad_mode(true, mode); + + for _ in 0..100_000 { + for (suffix, offset) in suffixes.iter() { + let mut s = "ABCD".repeat(prefix_quads_range.sample(&mut rng)); + s.push_str(suffix); + let mut encoded = s.into_bytes(); + + // calculate a range to write padding into that leaves at least one non padding char + let last_non_padding_offset = encoded.len() - 1 - offset; + + // don't include last non padding char as it must stay not padding + let padding_end = rng.gen_range(0..last_non_padding_offset); + + // don't use more than 100 bytes of padding, but also use shorter lengths when + // padding_end is near the start of the encoded data to avoid biasing to padding + // the entire prefix on short lengths + let padding_len = rng.gen_range(1..=usize::min(100, padding_end + 1)); + let padding_start = padding_end.saturating_sub(padding_len); + + encoded[padding_start..=padding_end].fill(PAD_BYTE); + + assert_eq!( + Err(DecodeError::InvalidByte(padding_start, PAD_BYTE)), + engine.decode(&encoded), + ); + } + } + } +} + +/// Any amount of padding before final chunk that crosses over into final chunk with 1-4 bytes = +/// invalid byte at first pad byte (except for 1 byte suffix = invalid length). +/// From this we know the padding must start in the final chunk. +#[apply(all_engines)] +fn decode_padding_starts_before_final_chunk_error_invalid_byte<E: EngineWrapper>( + engine_wrapper: E, +) { + let mut rng = seeded_rng(); + + // must have at least one prefix quad + let prefix_quads_range = distributions::Uniform::from(1..256); + // including 1 just to make sure that it really does produce invalid length + let suffix_pad_len_range = distributions::Uniform::from(1..=4); + for mode in all_pad_modes() { + // we don't encode so we don't care about encode padding + let engine = E::standard_with_pad_mode(true, mode); + for _ in 0..100_000 { + let suffix_len = suffix_pad_len_range.sample(&mut rng); + let mut encoded = "ABCD" + .repeat(prefix_quads_range.sample(&mut rng)) + .into_bytes(); + encoded.resize(encoded.len() + suffix_len, PAD_BYTE); + + // amount of padding must be long enough to extend back from suffix into previous + // quads + let padding_len = rng.gen_range(suffix_len + 1..encoded.len()); + // no non-padding after padding in this test, so padding goes to the end + let padding_start = encoded.len() - padding_len; + encoded[padding_start..].fill(PAD_BYTE); + + if suffix_len == 1 { + assert_eq!(Err(DecodeError::InvalidLength), engine.decode(&encoded),); + } else { + assert_eq!( + Err(DecodeError::InvalidByte(padding_start, PAD_BYTE)), + engine.decode(&encoded), + ); + } + } + } +} + +/// 0-1 bytes of data before any amount of padding in final chunk = invalid byte, since padding +/// is not valid data (consistent with error for pad bytes in earlier chunks). +/// From this we know there must be 2-3 bytes of data before padding +#[apply(all_engines)] +fn decode_too_little_data_before_padding_error_invalid_byte<E: EngineWrapper>(engine_wrapper: E) { + let mut rng = seeded_rng(); + + // want to test no prefix quad case, so start at 0 + let prefix_quads_range = distributions::Uniform::from(0_usize..256); + let suffix_data_len_range = distributions::Uniform::from(0_usize..=1); + for mode in all_pad_modes() { + // we don't encode so we don't care about encode padding + let engine = E::standard_with_pad_mode(true, mode); + for _ in 0..100_000 { + let suffix_data_len = suffix_data_len_range.sample(&mut rng); + let prefix_quad_len = prefix_quads_range.sample(&mut rng); + + // ensure there is a suffix quad + let min_padding = usize::from(suffix_data_len == 0); + + // for all possible padding lengths + for padding_len in min_padding..=(4 - suffix_data_len) { + let mut encoded = "ABCD".repeat(prefix_quad_len).into_bytes(); + encoded.resize(encoded.len() + suffix_data_len, b'A'); + encoded.resize(encoded.len() + padding_len, PAD_BYTE); + + if suffix_data_len + padding_len == 1 { + assert_eq!(Err(DecodeError::InvalidLength), engine.decode(&encoded),); + } else { + assert_eq!( + Err(DecodeError::InvalidByte( + prefix_quad_len * 4 + suffix_data_len, + PAD_BYTE, + )), + engine.decode(&encoded), + "suffix data len {} pad len {}", + suffix_data_len, + padding_len + ); + } + } + } + } +} + +// https://eprint.iacr.org/2022/361.pdf table 2, test 1 +#[apply(all_engines)] +fn decode_malleability_test_case_3_byte_suffix_valid<E: EngineWrapper>(engine_wrapper: E) { + assert_eq!( + b"Hello".as_slice(), + &E::standard().decode("SGVsbG8=").unwrap() + ); +} + +// https://eprint.iacr.org/2022/361.pdf table 2, test 2 +#[apply(all_engines)] +fn decode_malleability_test_case_3_byte_suffix_invalid_trailing_symbol<E: EngineWrapper>( + engine_wrapper: E, +) { + assert_eq!( + DecodeError::InvalidLastSymbol(6, 0x39), + E::standard().decode("SGVsbG9=").unwrap_err() + ); +} + +// https://eprint.iacr.org/2022/361.pdf table 2, test 3 +#[apply(all_engines)] +fn decode_malleability_test_case_3_byte_suffix_no_padding<E: EngineWrapper>(engine_wrapper: E) { + assert_eq!( + DecodeError::InvalidPadding, + E::standard().decode("SGVsbG9").unwrap_err() + ); +} + +// https://eprint.iacr.org/2022/361.pdf table 2, test 4 +#[apply(all_engines)] +fn decode_malleability_test_case_2_byte_suffix_valid_two_padding_symbols<E: EngineWrapper>( + engine_wrapper: E, +) { + assert_eq!( + b"Hell".as_slice(), + &E::standard().decode("SGVsbA==").unwrap() + ); +} + +// https://eprint.iacr.org/2022/361.pdf table 2, test 5 +#[apply(all_engines)] +fn decode_malleability_test_case_2_byte_suffix_short_padding<E: EngineWrapper>(engine_wrapper: E) { + assert_eq!( + DecodeError::InvalidPadding, + E::standard().decode("SGVsbA=").unwrap_err() + ); +} + +// https://eprint.iacr.org/2022/361.pdf table 2, test 6 +#[apply(all_engines)] +fn decode_malleability_test_case_2_byte_suffix_no_padding<E: EngineWrapper>(engine_wrapper: E) { + assert_eq!( + DecodeError::InvalidPadding, + E::standard().decode("SGVsbA").unwrap_err() + ); +} + +// https://eprint.iacr.org/2022/361.pdf table 2, test 7 +#[apply(all_engines)] +fn decode_malleability_test_case_2_byte_suffix_too_much_padding<E: EngineWrapper>( + engine_wrapper: E, +) { + assert_eq!( + DecodeError::InvalidByte(6, PAD_BYTE), + E::standard().decode("SGVsbA====").unwrap_err() + ); +} + +/// Requires canonical padding -> accepts 2 + 2, 3 + 1, 4 + 0 final quad configurations +#[apply(all_engines)] +fn decode_pad_mode_requires_canonical_accepts_canonical<E: EngineWrapper>(engine_wrapper: E) { + assert_all_suffixes_ok( + E::standard_with_pad_mode(true, DecodePaddingMode::RequireCanonical), + vec!["/w==", "iYU=", "AAAA"], + ); +} + +/// Requires canonical padding -> rejects 2 + 0-1, 3 + 0 final chunk configurations +#[apply(all_engines)] +fn decode_pad_mode_requires_canonical_rejects_non_canonical<E: EngineWrapper>(engine_wrapper: E) { + let engine = E::standard_with_pad_mode(true, DecodePaddingMode::RequireCanonical); + + let suffixes = vec!["/w", "/w=", "iYU"]; + for num_prefix_quads in 0..256 { + for &suffix in suffixes.iter() { + let mut encoded = "AAAA".repeat(num_prefix_quads); + encoded.push_str(suffix); + + let res = engine.decode(&encoded); + + assert_eq!(Err(DecodeError::InvalidPadding), res); + } + } +} + +/// Requires no padding -> accepts 2 + 0, 3 + 0, 4 + 0 final chunk configuration +#[apply(all_engines)] +fn decode_pad_mode_requires_no_padding_accepts_no_padding<E: EngineWrapper>(engine_wrapper: E) { + assert_all_suffixes_ok( + E::standard_with_pad_mode(true, DecodePaddingMode::RequireNone), + vec!["/w", "iYU", "AAAA"], + ); +} + +/// Requires no padding -> rejects 2 + 1-2, 3 + 1 final chunk configuration +#[apply(all_engines)] +fn decode_pad_mode_requires_no_padding_rejects_any_padding<E: EngineWrapper>(engine_wrapper: E) { + let engine = E::standard_with_pad_mode(true, DecodePaddingMode::RequireNone); + + let suffixes = vec!["/w=", "/w==", "iYU="]; + for num_prefix_quads in 0..256 { + for &suffix in suffixes.iter() { + let mut encoded = "AAAA".repeat(num_prefix_quads); + encoded.push_str(suffix); + + let res = engine.decode(&encoded); + + assert_eq!(Err(DecodeError::InvalidPadding), res); + } + } +} + +/// Indifferent padding accepts 2 + 0-2, 3 + 0-1, 4 + 0 final chunk configuration +#[apply(all_engines)] +fn decode_pad_mode_indifferent_padding_accepts_anything<E: EngineWrapper>(engine_wrapper: E) { + assert_all_suffixes_ok( + E::standard_with_pad_mode(true, DecodePaddingMode::Indifferent), + vec!["/w", "/w=", "/w==", "iYU", "iYU=", "AAAA"], + ); +} + +//this is a MAY in the rfc: https://tools.ietf.org/html/rfc4648#section-3.3 +#[apply(all_engines)] +fn decode_pad_byte_in_penultimate_quad_error<E: EngineWrapper>(engine_wrapper: E) { + for mode in all_pad_modes() { + // we don't encode so we don't care about encode padding + let engine = E::standard_with_pad_mode(true, mode); + + for num_prefix_quads in 0..256 { + // leave room for at least one pad byte in penultimate quad + for num_valid_bytes_penultimate_quad in 0..4 { + // can't have 1 or it would be invalid length + for num_pad_bytes_in_final_quad in 2..=4 { + let mut s: String = "ABCD".repeat(num_prefix_quads); + + // varying amounts of padding in the penultimate quad + for _ in 0..num_valid_bytes_penultimate_quad { + s.push('A'); + } + // finish penultimate quad with padding + for _ in num_valid_bytes_penultimate_quad..4 { + s.push('='); + } + // and more padding in the final quad + for _ in 0..num_pad_bytes_in_final_quad { + s.push('='); + } + + // padding should be an invalid byte before the final quad. + // Could argue that the *next* padding byte (in the next quad) is technically the first + // erroneous one, but reporting that accurately is more complex and probably nobody cares + assert_eq!( + DecodeError::InvalidByte( + num_prefix_quads * 4 + num_valid_bytes_penultimate_quad, + b'=', + ), + engine.decode(&s).unwrap_err() + ); + } + } + } + } +} + +#[apply(all_engines)] +fn decode_bytes_after_padding_in_final_quad_error<E: EngineWrapper>(engine_wrapper: E) { + for mode in all_pad_modes() { + // we don't encode so we don't care about encode padding + let engine = E::standard_with_pad_mode(true, mode); + + for num_prefix_quads in 0..256 { + // leave at least one byte in the quad for padding + for bytes_after_padding in 1..4 { + let mut s: String = "ABCD".repeat(num_prefix_quads); + + // every invalid padding position with a 3-byte final quad: 1 to 3 bytes after padding + for _ in 0..(3 - bytes_after_padding) { + s.push('A'); + } + s.push('='); + for _ in 0..bytes_after_padding { + s.push('A'); + } + + // First (and only) padding byte is invalid. + assert_eq!( + DecodeError::InvalidByte( + num_prefix_quads * 4 + (3 - bytes_after_padding), + b'=' + ), + engine.decode(&s).unwrap_err() + ); + } + } + } +} + +#[apply(all_engines)] +fn decode_absurd_pad_error<E: EngineWrapper>(engine_wrapper: E) { + for mode in all_pad_modes() { + // we don't encode so we don't care about encode padding + let engine = E::standard_with_pad_mode(true, mode); + + for num_prefix_quads in 0..256 { + let mut s: String = "ABCD".repeat(num_prefix_quads); + s.push_str("==Y=Wx===pY=2U====="); + + // first padding byte + assert_eq!( + DecodeError::InvalidByte(num_prefix_quads * 4, b'='), + engine.decode(&s).unwrap_err() + ); + } + } +} + +#[apply(all_engines)] +fn decode_too_much_padding_returns_error<E: EngineWrapper>(engine_wrapper: E) { + for mode in all_pad_modes() { + // we don't encode so we don't care about encode padding + let engine = E::standard_with_pad_mode(true, mode); + + for num_prefix_quads in 0..256 { + // add enough padding to ensure that we'll hit all decode stages at the different lengths + for pad_bytes in 1..=64 { + let mut s: String = "ABCD".repeat(num_prefix_quads); + let padding: String = "=".repeat(pad_bytes); + s.push_str(&padding); + + if pad_bytes % 4 == 1 { + assert_eq!(DecodeError::InvalidLength, engine.decode(&s).unwrap_err()); + } else { + assert_eq!( + DecodeError::InvalidByte(num_prefix_quads * 4, b'='), + engine.decode(&s).unwrap_err() + ); + } + } + } + } +} + +#[apply(all_engines)] +fn decode_padding_followed_by_non_padding_returns_error<E: EngineWrapper>(engine_wrapper: E) { + for mode in all_pad_modes() { + // we don't encode so we don't care about encode padding + let engine = E::standard_with_pad_mode(true, mode); + + for num_prefix_quads in 0..256 { + for pad_bytes in 0..=32 { + let mut s: String = "ABCD".repeat(num_prefix_quads); + let padding: String = "=".repeat(pad_bytes); + s.push_str(&padding); + s.push('E'); + + if pad_bytes % 4 == 0 { + assert_eq!(DecodeError::InvalidLength, engine.decode(&s).unwrap_err()); + } else { + assert_eq!( + DecodeError::InvalidByte(num_prefix_quads * 4, b'='), + engine.decode(&s).unwrap_err() + ); + } + } + } + } +} + +#[apply(all_engines)] +fn decode_one_char_in_final_quad_with_padding_error<E: EngineWrapper>(engine_wrapper: E) { + for mode in all_pad_modes() { + // we don't encode so we don't care about encode padding + let engine = E::standard_with_pad_mode(true, mode); + + for num_prefix_quads in 0..256 { + let mut s: String = "ABCD".repeat(num_prefix_quads); + s.push_str("E="); + + assert_eq!( + DecodeError::InvalidByte(num_prefix_quads * 4 + 1, b'='), + engine.decode(&s).unwrap_err() + ); + + // more padding doesn't change the error + s.push('='); + assert_eq!( + DecodeError::InvalidByte(num_prefix_quads * 4 + 1, b'='), + engine.decode(&s).unwrap_err() + ); + + s.push('='); + assert_eq!( + DecodeError::InvalidByte(num_prefix_quads * 4 + 1, b'='), + engine.decode(&s).unwrap_err() + ); + } + } +} + +#[apply(all_engines)] +fn decode_too_few_symbols_in_final_quad_error<E: EngineWrapper>(engine_wrapper: E) { + for mode in all_pad_modes() { + // we don't encode so we don't care about encode padding + let engine = E::standard_with_pad_mode(true, mode); + + for num_prefix_quads in 0..256 { + // <2 is invalid + for final_quad_symbols in 0..2 { + for padding_symbols in 0..=(4 - final_quad_symbols) { + let mut s: String = "ABCD".repeat(num_prefix_quads); + + for _ in 0..final_quad_symbols { + s.push('A'); + } + for _ in 0..padding_symbols { + s.push('='); + } + + match final_quad_symbols + padding_symbols { + 0 => continue, + 1 => { + assert_eq!(DecodeError::InvalidLength, engine.decode(&s).unwrap_err()); + } + _ => { + // error reported at first padding byte + assert_eq!( + DecodeError::InvalidByte( + num_prefix_quads * 4 + final_quad_symbols, + b'=', + ), + engine.decode(&s).unwrap_err() + ); + } + } + } + } + } + } +} + +#[apply(all_engines)] +fn decode_invalid_trailing_bytes<E: EngineWrapper>(engine_wrapper: E) { + for mode in all_pad_modes() { + // we don't encode so we don't care about encode padding + let engine = E::standard_with_pad_mode(true, mode); + + for num_prefix_quads in 0..256 { + let mut s: String = "ABCD".repeat(num_prefix_quads); + s.push_str("Cg==\n"); + + // The case of trailing newlines is common enough to warrant a test for a good error + // message. + assert_eq!( + Err(DecodeError::InvalidByte(num_prefix_quads * 4 + 4, b'\n')), + engine.decode(&s) + ); + + // extra padding, however, is still InvalidLength + let s = s.replace('\n', "="); + assert_eq!(Err(DecodeError::InvalidLength), engine.decode(s)); + } + } +} + +#[apply(all_engines)] +fn decode_wrong_length_error<E: EngineWrapper>(engine_wrapper: E) { + let engine = E::standard_with_pad_mode(true, DecodePaddingMode::Indifferent); + + for num_prefix_quads in 0..256 { + // at least one token, otherwise it wouldn't be a final quad + for num_tokens_final_quad in 1..=4 { + for num_padding in 0..=(4 - num_tokens_final_quad) { + let mut s: String = "IIII".repeat(num_prefix_quads); + for _ in 0..num_tokens_final_quad { + s.push('g'); + } + for _ in 0..num_padding { + s.push('='); + } + + let res = engine.decode(&s); + if num_tokens_final_quad >= 2 { + assert!(res.is_ok()); + } else if num_tokens_final_quad == 1 && num_padding > 0 { + // = is invalid if it's too early + assert_eq!( + Err(DecodeError::InvalidByte( + num_prefix_quads * 4 + num_tokens_final_quad, + 61 + )), + res + ); + } else if num_padding > 2 { + assert_eq!(Err(DecodeError::InvalidPadding), res); + } else { + assert_eq!(Err(DecodeError::InvalidLength), res); + } + } + } + } +} + +#[apply(all_engines)] +fn decode_into_slice_fits_in_precisely_sized_slice<E: EngineWrapper>(engine_wrapper: E) { + let mut orig_data = Vec::new(); + let mut encoded_data = String::new(); + let mut decode_buf = Vec::new(); + + let input_len_range = distributions::Uniform::new(0, 1000); + let mut rng = rngs::SmallRng::from_entropy(); + + for _ in 0..10_000 { + orig_data.clear(); + encoded_data.clear(); + decode_buf.clear(); + + let input_len = input_len_range.sample(&mut rng); + + for _ in 0..input_len { + orig_data.push(rng.gen()); + } + + let engine = E::random(&mut rng); + engine.encode_string(&orig_data, &mut encoded_data); + assert_encode_sanity(&encoded_data, engine.config().encode_padding(), input_len); + + decode_buf.resize(input_len, 0); + + // decode into the non-empty buf + let decode_bytes_written = engine + .decode_slice_unchecked(encoded_data.as_bytes(), &mut decode_buf[..]) + .unwrap(); + + assert_eq!(orig_data.len(), decode_bytes_written); + assert_eq!(orig_data, decode_buf); + } +} + +#[apply(all_engines)] +fn decode_length_estimate_delta<E: EngineWrapper>(engine_wrapper: E) { + for engine in [E::standard(), E::standard_unpadded()] { + for &padding in &[true, false] { + for orig_len in 0..1000 { + let encoded_len = encoded_len(orig_len, padding).unwrap(); + + let decoded_estimate = engine + .internal_decoded_len_estimate(encoded_len) + .decoded_len_estimate(); + assert!(decoded_estimate >= orig_len); + assert!( + decoded_estimate - orig_len < 3, + "estimate: {}, encoded: {}, orig: {}", + decoded_estimate, + encoded_len, + orig_len + ); + } + } + } +} + +/// Returns a tuple of the original data length, the encoded data length (just data), and the length including padding. +/// +/// Vecs provided should be empty. +fn generate_random_encoded_data<E: Engine, R: rand::Rng, D: distributions::Distribution<usize>>( + engine: &E, + orig_data: &mut Vec<u8>, + encode_buf: &mut Vec<u8>, + rng: &mut R, + length_distribution: &D, +) -> (usize, usize, usize) { + let padding: bool = engine.config().encode_padding(); + + let orig_len = fill_rand(orig_data, rng, length_distribution); + let expected_encoded_len = encoded_len(orig_len, padding).unwrap(); + encode_buf.resize(expected_encoded_len, 0); + + let base_encoded_len = engine.internal_encode(&orig_data[..], &mut encode_buf[..]); + + let enc_len_with_padding = if padding { + base_encoded_len + add_padding(orig_len, &mut encode_buf[base_encoded_len..]) + } else { + base_encoded_len + }; + + assert_eq!(expected_encoded_len, enc_len_with_padding); + + (orig_len, base_encoded_len, enc_len_with_padding) +} + +// fill to a random length +fn fill_rand<R: rand::Rng, D: distributions::Distribution<usize>>( + vec: &mut Vec<u8>, + rng: &mut R, + length_distribution: &D, +) -> usize { + let len = length_distribution.sample(rng); + for _ in 0..len { + vec.push(rng.gen()); + } + + len +} + +fn fill_rand_len<R: rand::Rng>(vec: &mut Vec<u8>, rng: &mut R, len: usize) { + for _ in 0..len { + vec.push(rng.gen()); + } +} + +fn prefixed_data<'i, 'd>( + input_with_prefix: &'i mut String, + prefix_len: usize, + data: &'d str, +) -> &'i str { + input_with_prefix.truncate(prefix_len); + input_with_prefix.push_str(data); + input_with_prefix.as_str() +} + +/// A wrapper to make using engines in rstest fixtures easier. +/// The functions don't need to be instance methods, but rstest does seem +/// to want an instance, so instances are passed to test functions and then ignored. +trait EngineWrapper { + type Engine: Engine; + + /// Return an engine configured for RFC standard base64 + fn standard() -> Self::Engine; + + /// Return an engine configured for RFC standard base64, except with no padding appended on + /// encode, and required no padding on decode. + fn standard_unpadded() -> Self::Engine; + + /// Return an engine configured for RFC standard alphabet with the provided encode and decode + /// pad settings + fn standard_with_pad_mode(encode_pad: bool, decode_pad_mode: DecodePaddingMode) + -> Self::Engine; + + /// Return an engine configured for RFC standard base64 that allows invalid trailing bits + fn standard_allow_trailing_bits() -> Self::Engine; + + /// Return an engine configured with a randomized alphabet and config + fn random<R: rand::Rng>(rng: &mut R) -> Self::Engine; + + /// Return an engine configured with the specified alphabet and randomized config + fn random_alphabet<R: rand::Rng>(rng: &mut R, alphabet: &Alphabet) -> Self::Engine; +} + +struct GeneralPurposeWrapper {} + +impl EngineWrapper for GeneralPurposeWrapper { + type Engine = general_purpose::GeneralPurpose; + + fn standard() -> Self::Engine { + general_purpose::GeneralPurpose::new(&STANDARD, general_purpose::PAD) + } + + fn standard_unpadded() -> Self::Engine { + general_purpose::GeneralPurpose::new(&STANDARD, general_purpose::NO_PAD) + } + + fn standard_with_pad_mode( + encode_pad: bool, + decode_pad_mode: DecodePaddingMode, + ) -> Self::Engine { + general_purpose::GeneralPurpose::new( + &STANDARD, + general_purpose::GeneralPurposeConfig::new() + .with_encode_padding(encode_pad) + .with_decode_padding_mode(decode_pad_mode), + ) + } + + fn standard_allow_trailing_bits() -> Self::Engine { + general_purpose::GeneralPurpose::new( + &STANDARD, + general_purpose::GeneralPurposeConfig::new().with_decode_allow_trailing_bits(true), + ) + } + + fn random<R: rand::Rng>(rng: &mut R) -> Self::Engine { + let alphabet = random_alphabet(rng); + + Self::random_alphabet(rng, alphabet) + } + + fn random_alphabet<R: rand::Rng>(rng: &mut R, alphabet: &Alphabet) -> Self::Engine { + general_purpose::GeneralPurpose::new(alphabet, random_config(rng)) + } +} + +struct NaiveWrapper {} + +impl EngineWrapper for NaiveWrapper { + type Engine = naive::Naive; + + fn standard() -> Self::Engine { + naive::Naive::new( + &STANDARD, + naive::NaiveConfig { + encode_padding: true, + decode_allow_trailing_bits: false, + decode_padding_mode: DecodePaddingMode::RequireCanonical, + }, + ) + } + + fn standard_unpadded() -> Self::Engine { + naive::Naive::new( + &STANDARD, + naive::NaiveConfig { + encode_padding: false, + decode_allow_trailing_bits: false, + decode_padding_mode: DecodePaddingMode::RequireNone, + }, + ) + } + + fn standard_with_pad_mode( + encode_pad: bool, + decode_pad_mode: DecodePaddingMode, + ) -> Self::Engine { + naive::Naive::new( + &STANDARD, + naive::NaiveConfig { + encode_padding: false, + decode_allow_trailing_bits: false, + decode_padding_mode: decode_pad_mode, + }, + ) + } + + fn standard_allow_trailing_bits() -> Self::Engine { + naive::Naive::new( + &STANDARD, + naive::NaiveConfig { + encode_padding: true, + decode_allow_trailing_bits: true, + decode_padding_mode: DecodePaddingMode::RequireCanonical, + }, + ) + } + + fn random<R: rand::Rng>(rng: &mut R) -> Self::Engine { + let alphabet = random_alphabet(rng); + + Self::random_alphabet(rng, alphabet) + } + + fn random_alphabet<R: rand::Rng>(rng: &mut R, alphabet: &Alphabet) -> Self::Engine { + let mode = rng.gen(); + + let config = naive::NaiveConfig { + encode_padding: match mode { + DecodePaddingMode::Indifferent => rng.gen(), + DecodePaddingMode::RequireCanonical => true, + DecodePaddingMode::RequireNone => false, + }, + decode_allow_trailing_bits: rng.gen(), + decode_padding_mode: mode, + }; + + naive::Naive::new(alphabet, config) + } +} + +fn seeded_rng() -> impl rand::Rng { + rngs::SmallRng::from_entropy() +} + +fn all_pad_modes() -> Vec<DecodePaddingMode> { + vec![ + DecodePaddingMode::Indifferent, + DecodePaddingMode::RequireCanonical, + DecodePaddingMode::RequireNone, + ] +} + +fn assert_all_suffixes_ok<E: Engine>(engine: E, suffixes: Vec<&str>) { + for num_prefix_quads in 0..256 { + for &suffix in suffixes.iter() { + let mut encoded = "AAAA".repeat(num_prefix_quads); + encoded.push_str(suffix); + + let res = &engine.decode(&encoded); + assert!(res.is_ok()); + } + } +} @@ -1,61 +1,123 @@ -//! # Configs +//! # Getting started //! -//! There isn't just one type of Base64; that would be too simple. You need to choose a character -//! set (standard, URL-safe, etc) and padding suffix (yes/no). -//! The `Config` struct encapsulates this info. There are some common configs included: `STANDARD`, -//! `URL_SAFE`, etc. You can also make your own `Config` if needed. +//! 1. Perhaps one of the preconfigured engines in [engine::general_purpose] will suit, e.g. +//! [engine::general_purpose::STANDARD_NO_PAD]. +//! - These are re-exported in [prelude] with a `BASE64_` prefix for those who prefer to +//! `use base64::prelude::*` or equivalent, e.g. [prelude::BASE64_STANDARD_NO_PAD] +//! 1. If not, choose which alphabet you want. Most usage will want [alphabet::STANDARD] or [alphabet::URL_SAFE]. +//! 1. Choose which [Engine] implementation you want. For the moment there is only one: [engine::GeneralPurpose]. +//! 1. Configure the engine appropriately using the engine's `Config` type. +//! - This is where you'll select whether to add padding (when encoding) or expect it (when +//! decoding). If given the choice, prefer no padding. +//! 1. Build the engine using the selected alphabet and config. //! -//! The functions that don't have `config` in the name (e.g. `encode()` and `decode()`) use the -//! `STANDARD` config . +//! For more detail, see below. //! -//! The functions that write to a slice (the ones that end in `_slice`) are generally the fastest -//! because they don't need to resize anything. If it fits in your workflow and you care about -//! performance, keep using the same buffer (growing as need be) and use the `_slice` methods for -//! the best performance. +//! ## Alphabets +//! +//! An [alphabet::Alphabet] defines what ASCII symbols are used to encode to or decode from. +//! +//! Constants in [alphabet] like [alphabet::STANDARD] or [alphabet::URL_SAFE] provide commonly used +//! alphabets, but you can also build your own custom [alphabet::Alphabet] if needed. +//! +//! ## Engines +//! +//! Once you have an `Alphabet`, you can pick which `Engine` you want. A few parts of the public +//! API provide a default, but otherwise the user must provide an `Engine` to use. +//! +//! See [Engine] for more. +//! +//! ## Config +//! +//! In addition to an `Alphabet`, constructing an `Engine` also requires an [engine::Config]. Each +//! `Engine` has a corresponding `Config` implementation since different `Engine`s may offer different +//! levels of configurability. //! //! # Encoding //! -//! Several different encoding functions are available to you depending on your desire for +//! Several different encoding methods on [Engine] are available to you depending on your desire for //! convenience vs performance. //! -//! | Function | Output | Allocates | -//! | ----------------------- | ---------------------------- | ------------------------------ | -//! | `encode` | Returns a new `String` | Always | -//! | `encode_config` | Returns a new `String` | Always | -//! | `encode_config_buf` | Appends to provided `String` | Only if `String` needs to grow | -//! | `encode_config_slice` | Writes to provided `&[u8]` | Never | +//! | Method | Output | Allocates | +//! | ------------------------ | ---------------------------- | ------------------------------ | +//! | [Engine::encode] | Returns a new `String` | Always | +//! | [Engine::encode_string] | Appends to provided `String` | Only if `String` needs to grow | +//! | [Engine::encode_slice] | Writes to provided `&[u8]` | Never - fastest | //! -//! All of the encoding functions that take a `Config` will pad as per the config. +//! All of the encoding methods will pad as per the engine's config. //! //! # Decoding //! -//! Just as for encoding, there are different decoding functions available. +//! Just as for encoding, there are different decoding methods available. //! -//! | Function | Output | Allocates | -//! | ----------------------- | ----------------------------- | ------------------------------ | -//! | `decode` | Returns a new `Vec<u8>` | Always | -//! | `decode_config` | Returns a new `Vec<u8>` | Always | -//! | `decode_config_buf` | Appends to provided `Vec<u8>` | Only if `Vec` needs to grow | -//! | `decode_config_slice` | Writes to provided `&[u8]` | Never | +//! | Method | Output | Allocates | +//! | ------------------------ | ----------------------------- | ------------------------------ | +//! | [Engine::decode] | Returns a new `Vec<u8>` | Always | +//! | [Engine::decode_vec] | Appends to provided `Vec<u8>` | Only if `Vec` needs to grow | +//! | [Engine::decode_slice] | Writes to provided `&[u8]` | Never - fastest | //! -//! Unlike encoding, where all possible input is valid, decoding can fail (see `DecodeError`). +//! Unlike encoding, where all possible input is valid, decoding can fail (see [DecodeError]). //! -//! Input can be invalid because it has invalid characters or invalid padding. (No padding at all is -//! valid, but excess padding is not.) Whitespace in the input is invalid. +//! Input can be invalid because it has invalid characters or invalid padding. The nature of how +//! padding is checked depends on the engine's config. +//! Whitespace in the input is invalid, just like any other non-base64 byte. //! //! # `Read` and `Write` //! -//! To map a `Read` of b64 bytes to the decoded bytes, wrap a reader (file, network socket, etc) -//! with `base64::read::DecoderReader`. To write raw bytes and have them b64 encoded on the fly, -//! wrap a writer with `base64::write::EncoderWriter`. There is some performance overhead (15% or -//! so) because of the necessary buffer shuffling -- still fast enough that almost nobody cares. -//! Also, these implementations do not heap allocate. +//! To decode a [std::io::Read] of b64 bytes, wrap a reader (file, network socket, etc) with +//! [read::DecoderReader]. +//! +//! To write raw bytes and have them b64 encoded on the fly, wrap a [std::io::Write] with +//! [write::EncoderWriter]. +//! +//! There is some performance overhead (15% or so) because of the necessary buffer shuffling -- +//! still fast enough that almost nobody cares. Also, these implementations do not heap allocate. +//! +//! # `Display` +//! +//! See [display] for how to transparently base64 data via a `Display` implementation. +//! +//! # Examples +//! +//! ## Using predefined engines +//! +//! ``` +//! use base64::{Engine as _, engine::general_purpose}; +//! +//! let orig = b"data"; +//! let encoded: String = general_purpose::STANDARD_NO_PAD.encode(orig); +//! assert_eq!("ZGF0YQ", encoded); +//! assert_eq!(orig.as_slice(), &general_purpose::STANDARD_NO_PAD.decode(encoded).unwrap()); +//! +//! // or, URL-safe +//! let encoded_url = general_purpose::URL_SAFE_NO_PAD.encode(orig); +//! ``` +//! +//! ## Custom alphabet, config, and engine +//! +//! ``` +//! use base64::{engine, alphabet, Engine as _}; +//! +//! // bizarro-world base64: +/ as the first symbols instead of the last +//! let alphabet = +//! alphabet::Alphabet::new("+/ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789") +//! .unwrap(); +//! +//! // a very weird config that encodes with padding but requires no padding when decoding...? +//! let crazy_config = engine::GeneralPurposeConfig::new() +//! .with_decode_allow_trailing_bits(true) +//! .with_encode_padding(true) +//! .with_decode_padding_mode(engine::DecodePaddingMode::RequireNone); +//! +//! let crazy_engine = engine::GeneralPurpose::new(&alphabet, crazy_config); +//! +//! let encoded = crazy_engine.encode(b"abc 123"); +//! +//! ``` //! //! # Panics //! //! If length calculations result in overflowing `usize`, a panic will result. -//! -//! The `_slice` flavors of encode or decode will panic if the provided output slice is too small, #![cfg_attr(feature = "cargo-clippy", allow(clippy::cast_lossless))] #![deny( @@ -69,6 +131,9 @@ warnings )] #![forbid(unsafe_code)] +// Allow globally until https://github.com/rust-lang/rust-clippy/issues/8768 is resolved. +// The desired state is to allow it only for the rstest_reuse import. +#![allow(clippy::single_component_path_imports)] #![cfg_attr(not(any(feature = "std", test)), no_std)] #[cfg(all(feature = "alloc", not(any(feature = "std", test))))] @@ -76,170 +141,39 @@ extern crate alloc; #[cfg(any(feature = "std", test))] extern crate std as alloc; +// has to be included at top level because of the way rstest_reuse defines its macros +#[cfg(test)] +use rstest_reuse; + mod chunked_encoder; pub mod display; #[cfg(any(feature = "std", test))] pub mod read; -mod tables; #[cfg(any(feature = "std", test))] pub mod write; +pub mod engine; +pub use engine::Engine; + +pub mod alphabet; + mod encode; -pub use crate::encode::encode_config_slice; +#[allow(deprecated)] #[cfg(any(feature = "alloc", feature = "std", test))] -pub use crate::encode::{encode, encode_config, encode_config_buf}; +pub use crate::encode::{encode, encode_engine, encode_engine_string}; +#[allow(deprecated)] +pub use crate::encode::{encode_engine_slice, encoded_len, EncodeSliceError}; mod decode; +#[allow(deprecated)] #[cfg(any(feature = "alloc", feature = "std", test))] -pub use crate::decode::{decode, decode_config, decode_config_buf}; -pub use crate::decode::{decode_config_slice, DecodeError}; +pub use crate::decode::{decode, decode_engine, decode_engine_vec}; +#[allow(deprecated)] +pub use crate::decode::{decode_engine_slice, decoded_len_estimate, DecodeError, DecodeSliceError}; + +pub mod prelude; #[cfg(test)] mod tests; -/// Available encoding character sets -#[derive(Clone, Copy, Debug)] -pub enum CharacterSet { - /// The standard character set (uses `+` and `/`). - /// - /// See [RFC 3548](https://tools.ietf.org/html/rfc3548#section-3). - Standard, - /// The URL safe character set (uses `-` and `_`). - /// - /// See [RFC 3548](https://tools.ietf.org/html/rfc3548#section-4). - UrlSafe, - /// The `crypt(3)` character set (uses `./0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz`). - /// - /// Not standardized, but folk wisdom on the net asserts that this alphabet is what crypt uses. - Crypt, - /// The bcrypt character set (uses `./ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789`). - Bcrypt, - /// The character set used in IMAP-modified UTF-7 (uses `+` and `,`). - /// - /// See [RFC 3501](https://tools.ietf.org/html/rfc3501#section-5.1.3) - ImapMutf7, - /// The character set used in BinHex 4.0 files. - /// - /// See [BinHex 4.0 Definition](http://files.stairways.com/other/binhex-40-specs-info.txt) - BinHex, -} - -impl CharacterSet { - fn encode_table(self) -> &'static [u8; 64] { - match self { - CharacterSet::Standard => tables::STANDARD_ENCODE, - CharacterSet::UrlSafe => tables::URL_SAFE_ENCODE, - CharacterSet::Crypt => tables::CRYPT_ENCODE, - CharacterSet::Bcrypt => tables::BCRYPT_ENCODE, - CharacterSet::ImapMutf7 => tables::IMAP_MUTF7_ENCODE, - CharacterSet::BinHex => tables::BINHEX_ENCODE, - } - } - - fn decode_table(self) -> &'static [u8; 256] { - match self { - CharacterSet::Standard => tables::STANDARD_DECODE, - CharacterSet::UrlSafe => tables::URL_SAFE_DECODE, - CharacterSet::Crypt => tables::CRYPT_DECODE, - CharacterSet::Bcrypt => tables::BCRYPT_DECODE, - CharacterSet::ImapMutf7 => tables::IMAP_MUTF7_DECODE, - CharacterSet::BinHex => tables::BINHEX_DECODE, - } - } -} - -/// Contains configuration parameters for base64 encoding -#[derive(Clone, Copy, Debug)] -pub struct Config { - /// Character set to use - char_set: CharacterSet, - /// True to pad output with `=` characters - pad: bool, - /// True to ignore excess nonzero bits in the last few symbols, otherwise an error is returned. - decode_allow_trailing_bits: bool, -} - -impl Config { - /// Create a new `Config`. - pub const fn new(char_set: CharacterSet, pad: bool) -> Config { - Config { - char_set, - pad, - decode_allow_trailing_bits: false, - } - } - - /// Sets whether to pad output with `=` characters. - pub const fn pad(self, pad: bool) -> Config { - Config { pad, ..self } - } - - /// Sets whether to emit errors for nonzero trailing bits. - /// - /// This is useful when implementing - /// [forgiving-base64 decode](https://infra.spec.whatwg.org/#forgiving-base64-decode). - pub const fn decode_allow_trailing_bits(self, allow: bool) -> Config { - Config { - decode_allow_trailing_bits: allow, - ..self - } - } -} - -/// Standard character set with padding. -pub const STANDARD: Config = Config { - char_set: CharacterSet::Standard, - pad: true, - decode_allow_trailing_bits: false, -}; - -/// Standard character set without padding. -pub const STANDARD_NO_PAD: Config = Config { - char_set: CharacterSet::Standard, - pad: false, - decode_allow_trailing_bits: false, -}; - -/// URL-safe character set with padding -pub const URL_SAFE: Config = Config { - char_set: CharacterSet::UrlSafe, - pad: true, - decode_allow_trailing_bits: false, -}; - -/// URL-safe character set without padding -pub const URL_SAFE_NO_PAD: Config = Config { - char_set: CharacterSet::UrlSafe, - pad: false, - decode_allow_trailing_bits: false, -}; - -/// As per `crypt(3)` requirements -pub const CRYPT: Config = Config { - char_set: CharacterSet::Crypt, - pad: false, - decode_allow_trailing_bits: false, -}; - -/// Bcrypt character set -pub const BCRYPT: Config = Config { - char_set: CharacterSet::Bcrypt, - pad: false, - decode_allow_trailing_bits: false, -}; - -/// IMAP modified UTF-7 requirements -pub const IMAP_MUTF7: Config = Config { - char_set: CharacterSet::ImapMutf7, - pad: false, - decode_allow_trailing_bits: false, -}; - -/// BinHex character set -pub const BINHEX: Config = Config { - char_set: CharacterSet::BinHex, - pad: false, - decode_allow_trailing_bits: false, -}; - const PAD_BYTE: u8 = b'='; diff --git a/src/prelude.rs b/src/prelude.rs new file mode 100644 index 0000000..fbeb5ba --- /dev/null +++ b/src/prelude.rs @@ -0,0 +1,19 @@ +//! Preconfigured engines for common use cases. +//! +//! These are re-exports of `const` engines in [crate::engine::general_purpose], renamed with a `BASE64_` +//! prefix for those who prefer to `use` the entire path to a name. +//! +//! # Examples +//! +//! ``` +//! use base64::prelude::{Engine as _, BASE64_STANDARD_NO_PAD}; +//! +//! assert_eq!("c29tZSBieXRlcw", &BASE64_STANDARD_NO_PAD.encode(b"some bytes")); +//! ``` + +pub use crate::engine::Engine; + +pub use crate::engine::general_purpose::STANDARD as BASE64_STANDARD; +pub use crate::engine::general_purpose::STANDARD_NO_PAD as BASE64_STANDARD_NO_PAD; +pub use crate::engine::general_purpose::URL_SAFE as BASE64_URL_SAFE; +pub use crate::engine::general_purpose::URL_SAFE_NO_PAD as BASE64_URL_SAFE_NO_PAD; diff --git a/src/read/decoder.rs b/src/read/decoder.rs index 7a9c4cd..4888c9c 100644 --- a/src/read/decoder.rs +++ b/src/read/decoder.rs @@ -1,5 +1,4 @@ -use crate::{decode_config_slice, Config, DecodeError}; -use std::io::Read; +use crate::{engine::Engine, DecodeError}; use std::{cmp, fmt, io}; // This should be large, but it has to fit on the stack. @@ -16,11 +15,13 @@ const DECODED_CHUNK_SIZE: usize = 3; /// ``` /// use std::io::Read; /// use std::io::Cursor; +/// use base64::engine::general_purpose; /// /// // use a cursor as the simplest possible `Read` -- in real code this is probably a file, etc. /// let mut wrapped_reader = Cursor::new(b"YXNkZg=="); /// let mut decoder = base64::read::DecoderReader::new( -/// &mut wrapped_reader, base64::STANDARD); +/// &mut wrapped_reader, +/// &general_purpose::STANDARD); /// /// // handle errors as you normally would /// let mut result = Vec::new(); @@ -29,10 +30,10 @@ const DECODED_CHUNK_SIZE: usize = 3; /// assert_eq!(b"asdf", &result[..]); /// /// ``` -pub struct DecoderReader<'a, R: 'a + io::Read> { - config: Config, +pub struct DecoderReader<'e, E: Engine, R: io::Read> { + engine: &'e E, /// Where b64 data is read from - r: &'a mut R, + inner: R, // Holds b64 data read from the delegate reader. b64_buffer: [u8; BUF_SIZE], @@ -54,10 +55,9 @@ pub struct DecoderReader<'a, R: 'a + io::Read> { total_b64_decoded: usize, } -impl<'a, R: io::Read> fmt::Debug for DecoderReader<'a, R> { +impl<'e, E: Engine, R: io::Read> fmt::Debug for DecoderReader<'e, E, R> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { f.debug_struct("DecoderReader") - .field("config", &self.config) .field("b64_offset", &self.b64_offset) .field("b64_len", &self.b64_len) .field("decoded_buffer", &self.decoded_buffer) @@ -68,12 +68,12 @@ impl<'a, R: io::Read> fmt::Debug for DecoderReader<'a, R> { } } -impl<'a, R: io::Read> DecoderReader<'a, R> { +impl<'e, E: Engine, R: io::Read> DecoderReader<'e, E, R> { /// Create a new decoder that will read from the provided reader `r`. - pub fn new(r: &'a mut R, config: Config) -> Self { + pub fn new(reader: R, engine: &'e E) -> Self { DecoderReader { - config, - r, + engine, + inner: reader, b64_buffer: [0; BUF_SIZE], b64_offset: 0, b64_len: 0, @@ -89,7 +89,7 @@ impl<'a, R: io::Read> DecoderReader<'a, R> { /// Returns a Result with the number of (decoded) bytes copied. fn flush_decoded_buf(&mut self, buf: &mut [u8]) -> io::Result<usize> { debug_assert!(self.decoded_len > 0); - debug_assert!(buf.len() > 0); + debug_assert!(!buf.is_empty()); let copy_len = cmp::min(self.decoded_len, buf.len()); debug_assert!(copy_len > 0); @@ -114,13 +114,13 @@ impl<'a, R: io::Read> DecoderReader<'a, R> { debug_assert!(self.b64_offset + self.b64_len < BUF_SIZE); let read = self - .r + .inner .read(&mut self.b64_buffer[self.b64_offset + self.b64_len..])?; self.b64_len += read; debug_assert!(self.b64_offset + self.b64_len <= BUF_SIZE); - return Ok(read); + Ok(read) } /// Decode the requested number of bytes from the b64 buffer into the provided buffer. It's the @@ -130,23 +130,26 @@ impl<'a, R: io::Read> DecoderReader<'a, R> { fn decode_to_buf(&mut self, num_bytes: usize, buf: &mut [u8]) -> io::Result<usize> { debug_assert!(self.b64_len >= num_bytes); debug_assert!(self.b64_offset + self.b64_len <= BUF_SIZE); - debug_assert!(buf.len() > 0); - - let decoded = decode_config_slice( - &self.b64_buffer[self.b64_offset..self.b64_offset + num_bytes], - self.config, - &mut buf[..], - ) - .map_err(|e| match e { - DecodeError::InvalidByte(offset, byte) => { - DecodeError::InvalidByte(self.total_b64_decoded + offset, byte) - } - DecodeError::InvalidLength => DecodeError::InvalidLength, - DecodeError::InvalidLastSymbol(offset, byte) => { - DecodeError::InvalidLastSymbol(self.total_b64_decoded + offset, byte) - } - }) - .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?; + debug_assert!(!buf.is_empty()); + + let decoded = self + .engine + .internal_decode( + &self.b64_buffer[self.b64_offset..self.b64_offset + num_bytes], + buf, + self.engine.internal_decoded_len_estimate(num_bytes), + ) + .map_err(|e| match e { + DecodeError::InvalidByte(offset, byte) => { + DecodeError::InvalidByte(self.total_b64_decoded + offset, byte) + } + DecodeError::InvalidLength => DecodeError::InvalidLength, + DecodeError::InvalidLastSymbol(offset, byte) => { + DecodeError::InvalidLastSymbol(self.total_b64_decoded + offset, byte) + } + DecodeError::InvalidPadding => DecodeError::InvalidPadding, + }) + .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?; self.total_b64_decoded += num_bytes; self.b64_offset += num_bytes; @@ -156,9 +159,19 @@ impl<'a, R: io::Read> DecoderReader<'a, R> { Ok(decoded) } + + /// Unwraps this `DecoderReader`, returning the base reader which it reads base64 encoded + /// input from. + /// + /// Because `DecoderReader` performs internal buffering, the state of the inner reader is + /// unspecified. This function is mainly provided because the inner reader type may provide + /// additional functionality beyond the `Read` implementation which may still be useful. + pub fn into_inner(self) -> R { + self.inner + } } -impl<'a, R: Read> Read for DecoderReader<'a, R> { +impl<'e, E: Engine, R: io::Read> io::Read for DecoderReader<'e, E, R> { /// Decode input from the wrapped reader. /// /// Under non-error circumstances, this returns `Ok` with the value being the number of bytes @@ -172,7 +185,7 @@ impl<'a, R: Read> Read for DecoderReader<'a, R> { /// Any errors emitted by the delegate reader are returned. Decoding errors due to invalid /// base64 are also possible, and will have `io::ErrorKind::InvalidData`. fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> { - if buf.len() == 0 { + if buf.is_empty() { return Ok(0); } diff --git a/src/read/decoder_tests.rs b/src/read/decoder_tests.rs index 265d423..65d58d8 100644 --- a/src/read/decoder_tests.rs +++ b/src/read/decoder_tests.rs @@ -1,12 +1,17 @@ -use std::io::{self, Read}; +use std::{ + cmp, + io::{self, Read as _}, + iter, +}; -use rand::{Rng, RngCore}; -use std::{cmp, iter}; +use rand::{Rng as _, RngCore as _}; use super::decoder::{DecoderReader, BUF_SIZE}; -use crate::encode::encode_config_buf; -use crate::tests::random_config; -use crate::{decode_config_buf, DecodeError, STANDARD}; +use crate::{ + engine::{general_purpose::STANDARD, Engine, GeneralPurpose}, + tests::{random_alphabet, random_config, random_engine}, + DecodeError, +}; #[test] fn simple() { @@ -27,7 +32,7 @@ fn simple() { // Read n bytes at a time. for n in 1..base64data.len() + 1 { let mut wrapped_reader = io::Cursor::new(base64data); - let mut decoder = DecoderReader::new(&mut wrapped_reader, STANDARD); + let mut decoder = DecoderReader::new(&mut wrapped_reader, &STANDARD); // handle errors as you normally would let mut text_got = Vec::new(); @@ -59,7 +64,7 @@ fn trailing_junk() { // Read n bytes at a time. for n in 1..base64data.len() + 1 { let mut wrapped_reader = io::Cursor::new(base64data); - let mut decoder = DecoderReader::new(&mut wrapped_reader, STANDARD); + let mut decoder = DecoderReader::new(&mut wrapped_reader, &STANDARD); // handle errors as you normally would let mut buffer = vec![0u8; n]; @@ -92,14 +97,14 @@ fn handles_short_read_from_delegate() { b64.clear(); decoded.clear(); - let size = rng.gen_range(0, 10 * BUF_SIZE); + let size = rng.gen_range(0..(10 * BUF_SIZE)); bytes.extend(iter::repeat(0).take(size)); bytes.truncate(size); rng.fill_bytes(&mut bytes[..size]); assert_eq!(size, bytes.len()); - let config = random_config(&mut rng); - encode_config_buf(&bytes[..], config, &mut b64); + let engine = random_engine(&mut rng); + engine.encode_string(&bytes[..], &mut b64); let mut wrapped_reader = io::Cursor::new(b64.as_bytes()); let mut short_reader = RandomShortRead { @@ -107,7 +112,7 @@ fn handles_short_read_from_delegate() { rng: &mut rng, }; - let mut decoder = DecoderReader::new(&mut short_reader, config); + let mut decoder = DecoderReader::new(&mut short_reader, &engine); let decoded_len = decoder.read_to_end(&mut decoded).unwrap(); assert_eq!(size, decoded_len); @@ -127,7 +132,7 @@ fn read_in_short_increments() { b64.clear(); decoded.clear(); - let size = rng.gen_range(0, 10 * BUF_SIZE); + let size = rng.gen_range(0..(10 * BUF_SIZE)); bytes.extend(iter::repeat(0).take(size)); // leave room to play around with larger buffers decoded.extend(iter::repeat(0).take(size * 3)); @@ -135,12 +140,12 @@ fn read_in_short_increments() { rng.fill_bytes(&mut bytes[..]); assert_eq!(size, bytes.len()); - let config = random_config(&mut rng); + let engine = random_engine(&mut rng); - encode_config_buf(&bytes[..], config, &mut b64); + engine.encode_string(&bytes[..], &mut b64); let mut wrapped_reader = io::Cursor::new(&b64[..]); - let mut decoder = DecoderReader::new(&mut wrapped_reader, config); + let mut decoder = DecoderReader::new(&mut wrapped_reader, &engine); consume_with_short_reads_and_validate(&mut rng, &bytes[..], &mut decoded, &mut decoder); } @@ -158,7 +163,7 @@ fn read_in_short_increments_with_short_delegate_reads() { b64.clear(); decoded.clear(); - let size = rng.gen_range(0, 10 * BUF_SIZE); + let size = rng.gen_range(0..(10 * BUF_SIZE)); bytes.extend(iter::repeat(0).take(size)); // leave room to play around with larger buffers decoded.extend(iter::repeat(0).take(size * 3)); @@ -166,18 +171,23 @@ fn read_in_short_increments_with_short_delegate_reads() { rng.fill_bytes(&mut bytes[..]); assert_eq!(size, bytes.len()); - let config = random_config(&mut rng); + let engine = random_engine(&mut rng); - encode_config_buf(&bytes[..], config, &mut b64); + engine.encode_string(&bytes[..], &mut b64); let mut base_reader = io::Cursor::new(&b64[..]); - let mut decoder = DecoderReader::new(&mut base_reader, config); + let mut decoder = DecoderReader::new(&mut base_reader, &engine); let mut short_reader = RandomShortRead { delegate: &mut decoder, rng: &mut rand::thread_rng(), }; - consume_with_short_reads_and_validate(&mut rng, &bytes[..], &mut decoded, &mut short_reader) + consume_with_short_reads_and_validate( + &mut rng, + &bytes[..], + &mut decoded, + &mut short_reader, + ); } } @@ -195,32 +205,32 @@ fn reports_invalid_last_symbol_correctly() { b64.clear(); b64_bytes.clear(); - let size = rng.gen_range(1, 10 * BUF_SIZE); + let size = rng.gen_range(1..(10 * BUF_SIZE)); bytes.extend(iter::repeat(0).take(size)); decoded.extend(iter::repeat(0).take(size)); rng.fill_bytes(&mut bytes[..]); assert_eq!(size, bytes.len()); - let mut config = random_config(&mut rng); + let config = random_config(&mut rng); + let alphabet = random_alphabet(&mut rng); // changing padding will cause invalid padding errors when we twiddle the last byte - config.pad = false; - - encode_config_buf(&bytes[..], config, &mut b64); + let engine = GeneralPurpose::new(alphabet, config.with_encode_padding(false)); + engine.encode_string(&bytes[..], &mut b64); b64_bytes.extend(b64.bytes()); assert_eq!(b64_bytes.len(), b64.len()); // change the last character to every possible symbol. Should behave the same as bulk // decoding whether invalid or valid. - for &s1 in config.char_set.encode_table().iter() { + for &s1 in alphabet.symbols.iter() { decoded.clear(); bulk_decoded.clear(); // replace the last *b64_bytes.last_mut().unwrap() = s1; - let bulk_res = decode_config_buf(&b64_bytes[..], config, &mut bulk_decoded); + let bulk_res = engine.decode_vec(&b64_bytes[..], &mut bulk_decoded); let mut wrapped_reader = io::Cursor::new(&b64_bytes[..]); - let mut decoder = DecoderReader::new(&mut wrapped_reader, config); + let mut decoder = DecoderReader::new(&mut wrapped_reader, &engine); let stream_res = decoder.read_to_end(&mut decoded).map(|_| ()).map_err(|e| { e.into_inner() @@ -244,20 +254,21 @@ fn reports_invalid_byte_correctly() { b64.clear(); decoded.clear(); - let size = rng.gen_range(1, 10 * BUF_SIZE); + let size = rng.gen_range(1..(10 * BUF_SIZE)); bytes.extend(iter::repeat(0).take(size)); rng.fill_bytes(&mut bytes[..size]); assert_eq!(size, bytes.len()); - let config = random_config(&mut rng); - encode_config_buf(&bytes[..], config, &mut b64); + let engine = random_engine(&mut rng); + + engine.encode_string(&bytes[..], &mut b64); // replace one byte, somewhere, with '*', which is invalid - let bad_byte_pos = rng.gen_range(0, &b64.len()); + let bad_byte_pos = rng.gen_range(0..b64.len()); let mut b64_bytes = b64.bytes().collect::<Vec<u8>>(); b64_bytes[bad_byte_pos] = b'*'; let mut wrapped_reader = io::Cursor::new(b64_bytes.clone()); - let mut decoder = DecoderReader::new(&mut wrapped_reader, config); + let mut decoder = DecoderReader::new(&mut wrapped_reader, &engine); // some gymnastics to avoid double-moving the io::Error, which is not Copy let read_decode_err = decoder @@ -273,7 +284,7 @@ fn reports_invalid_byte_correctly() { .and_then(|o| o); let mut bulk_buf = Vec::new(); - let bulk_decode_err = decode_config_buf(&b64_bytes[..], config, &mut bulk_buf).err(); + let bulk_decode_err = engine.decode_vec(&b64_bytes[..], &mut bulk_buf).err(); // it's tricky to predict where the invalid data's offset will be since if it's in the last // chunk it will be reported at the first padding location because it's treated as invalid @@ -285,12 +296,12 @@ fn reports_invalid_byte_correctly() { } } -fn consume_with_short_reads_and_validate<R: Read>( +fn consume_with_short_reads_and_validate<R: io::Read>( rng: &mut rand::rngs::ThreadRng, expected_bytes: &[u8], - decoded: &mut Vec<u8>, + decoded: &mut [u8], short_reader: &mut R, -) -> () { +) { let mut total_read = 0_usize; loop { assert!( @@ -302,13 +313,13 @@ fn consume_with_short_reads_and_validate<R: Read>( if total_read == expected_bytes.len() { assert_eq!(expected_bytes, &decoded[..total_read]); // should be done - assert_eq!(0, short_reader.read(&mut decoded[..]).unwrap()); + assert_eq!(0, short_reader.read(&mut *decoded).unwrap()); // didn't write anything assert_eq!(expected_bytes, &decoded[..total_read]); break; } - let decode_len = rng.gen_range(1, cmp::max(2, expected_bytes.len() * 2)); + let decode_len = rng.gen_range(1..cmp::max(2, expected_bytes.len() * 2)); let read = short_reader .read(&mut decoded[total_read..total_read + decode_len]) @@ -328,7 +339,7 @@ struct RandomShortRead<'a, 'b, R: io::Read, N: rand::Rng> { impl<'a, 'b, R: io::Read, N: rand::Rng> io::Read for RandomShortRead<'a, 'b, R, N> { fn read(&mut self, buf: &mut [u8]) -> Result<usize, io::Error> { // avoid 0 since it means EOF for non-empty buffers - let effective_len = cmp::min(self.rng.gen_range(1, 20), buf.len()); + let effective_len = cmp::min(self.rng.gen_range(1..20), buf.len()); self.delegate.read(&mut buf[..effective_len]) } diff --git a/src/tables.rs b/src/tables.rs deleted file mode 100644 index a45851c..0000000 --- a/src/tables.rs +++ /dev/null @@ -1,1957 +0,0 @@ -pub const INVALID_VALUE: u8 = 255; -#[rustfmt::skip] -pub const STANDARD_ENCODE: &[u8; 64] = &[ - 65, // input 0 (0x0) => 'A' (0x41) - 66, // input 1 (0x1) => 'B' (0x42) - 67, // input 2 (0x2) => 'C' (0x43) - 68, // input 3 (0x3) => 'D' (0x44) - 69, // input 4 (0x4) => 'E' (0x45) - 70, // input 5 (0x5) => 'F' (0x46) - 71, // input 6 (0x6) => 'G' (0x47) - 72, // input 7 (0x7) => 'H' (0x48) - 73, // input 8 (0x8) => 'I' (0x49) - 74, // input 9 (0x9) => 'J' (0x4A) - 75, // input 10 (0xA) => 'K' (0x4B) - 76, // input 11 (0xB) => 'L' (0x4C) - 77, // input 12 (0xC) => 'M' (0x4D) - 78, // input 13 (0xD) => 'N' (0x4E) - 79, // input 14 (0xE) => 'O' (0x4F) - 80, // input 15 (0xF) => 'P' (0x50) - 81, // input 16 (0x10) => 'Q' (0x51) - 82, // input 17 (0x11) => 'R' (0x52) - 83, // input 18 (0x12) => 'S' (0x53) - 84, // input 19 (0x13) => 'T' (0x54) - 85, // input 20 (0x14) => 'U' (0x55) - 86, // input 21 (0x15) => 'V' (0x56) - 87, // input 22 (0x16) => 'W' (0x57) - 88, // input 23 (0x17) => 'X' (0x58) - 89, // input 24 (0x18) => 'Y' (0x59) - 90, // input 25 (0x19) => 'Z' (0x5A) - 97, // input 26 (0x1A) => 'a' (0x61) - 98, // input 27 (0x1B) => 'b' (0x62) - 99, // input 28 (0x1C) => 'c' (0x63) - 100, // input 29 (0x1D) => 'd' (0x64) - 101, // input 30 (0x1E) => 'e' (0x65) - 102, // input 31 (0x1F) => 'f' (0x66) - 103, // input 32 (0x20) => 'g' (0x67) - 104, // input 33 (0x21) => 'h' (0x68) - 105, // input 34 (0x22) => 'i' (0x69) - 106, // input 35 (0x23) => 'j' (0x6A) - 107, // input 36 (0x24) => 'k' (0x6B) - 108, // input 37 (0x25) => 'l' (0x6C) - 109, // input 38 (0x26) => 'm' (0x6D) - 110, // input 39 (0x27) => 'n' (0x6E) - 111, // input 40 (0x28) => 'o' (0x6F) - 112, // input 41 (0x29) => 'p' (0x70) - 113, // input 42 (0x2A) => 'q' (0x71) - 114, // input 43 (0x2B) => 'r' (0x72) - 115, // input 44 (0x2C) => 's' (0x73) - 116, // input 45 (0x2D) => 't' (0x74) - 117, // input 46 (0x2E) => 'u' (0x75) - 118, // input 47 (0x2F) => 'v' (0x76) - 119, // input 48 (0x30) => 'w' (0x77) - 120, // input 49 (0x31) => 'x' (0x78) - 121, // input 50 (0x32) => 'y' (0x79) - 122, // input 51 (0x33) => 'z' (0x7A) - 48, // input 52 (0x34) => '0' (0x30) - 49, // input 53 (0x35) => '1' (0x31) - 50, // input 54 (0x36) => '2' (0x32) - 51, // input 55 (0x37) => '3' (0x33) - 52, // input 56 (0x38) => '4' (0x34) - 53, // input 57 (0x39) => '5' (0x35) - 54, // input 58 (0x3A) => '6' (0x36) - 55, // input 59 (0x3B) => '7' (0x37) - 56, // input 60 (0x3C) => '8' (0x38) - 57, // input 61 (0x3D) => '9' (0x39) - 43, // input 62 (0x3E) => '+' (0x2B) - 47, // input 63 (0x3F) => '/' (0x2F) -]; -#[rustfmt::skip] -pub const STANDARD_DECODE: &[u8; 256] = &[ - INVALID_VALUE, // input 0 (0x0) - INVALID_VALUE, // input 1 (0x1) - INVALID_VALUE, // input 2 (0x2) - INVALID_VALUE, // input 3 (0x3) - INVALID_VALUE, // input 4 (0x4) - INVALID_VALUE, // input 5 (0x5) - INVALID_VALUE, // input 6 (0x6) - INVALID_VALUE, // input 7 (0x7) - INVALID_VALUE, // input 8 (0x8) - INVALID_VALUE, // input 9 (0x9) - INVALID_VALUE, // input 10 (0xA) - INVALID_VALUE, // input 11 (0xB) - INVALID_VALUE, // input 12 (0xC) - INVALID_VALUE, // input 13 (0xD) - INVALID_VALUE, // input 14 (0xE) - INVALID_VALUE, // input 15 (0xF) - INVALID_VALUE, // input 16 (0x10) - INVALID_VALUE, // input 17 (0x11) - INVALID_VALUE, // input 18 (0x12) - INVALID_VALUE, // input 19 (0x13) - INVALID_VALUE, // input 20 (0x14) - INVALID_VALUE, // input 21 (0x15) - INVALID_VALUE, // input 22 (0x16) - INVALID_VALUE, // input 23 (0x17) - INVALID_VALUE, // input 24 (0x18) - INVALID_VALUE, // input 25 (0x19) - INVALID_VALUE, // input 26 (0x1A) - INVALID_VALUE, // input 27 (0x1B) - INVALID_VALUE, // input 28 (0x1C) - INVALID_VALUE, // input 29 (0x1D) - INVALID_VALUE, // input 30 (0x1E) - INVALID_VALUE, // input 31 (0x1F) - INVALID_VALUE, // input 32 (0x20) - INVALID_VALUE, // input 33 (0x21) - INVALID_VALUE, // input 34 (0x22) - INVALID_VALUE, // input 35 (0x23) - INVALID_VALUE, // input 36 (0x24) - INVALID_VALUE, // input 37 (0x25) - INVALID_VALUE, // input 38 (0x26) - INVALID_VALUE, // input 39 (0x27) - INVALID_VALUE, // input 40 (0x28) - INVALID_VALUE, // input 41 (0x29) - INVALID_VALUE, // input 42 (0x2A) - 62, // input 43 (0x2B char '+') => 62 (0x3E) - INVALID_VALUE, // input 44 (0x2C) - INVALID_VALUE, // input 45 (0x2D) - INVALID_VALUE, // input 46 (0x2E) - 63, // input 47 (0x2F char '/') => 63 (0x3F) - 52, // input 48 (0x30 char '0') => 52 (0x34) - 53, // input 49 (0x31 char '1') => 53 (0x35) - 54, // input 50 (0x32 char '2') => 54 (0x36) - 55, // input 51 (0x33 char '3') => 55 (0x37) - 56, // input 52 (0x34 char '4') => 56 (0x38) - 57, // input 53 (0x35 char '5') => 57 (0x39) - 58, // input 54 (0x36 char '6') => 58 (0x3A) - 59, // input 55 (0x37 char '7') => 59 (0x3B) - 60, // input 56 (0x38 char '8') => 60 (0x3C) - 61, // input 57 (0x39 char '9') => 61 (0x3D) - INVALID_VALUE, // input 58 (0x3A) - INVALID_VALUE, // input 59 (0x3B) - INVALID_VALUE, // input 60 (0x3C) - INVALID_VALUE, // input 61 (0x3D) - INVALID_VALUE, // input 62 (0x3E) - INVALID_VALUE, // input 63 (0x3F) - INVALID_VALUE, // input 64 (0x40) - 0, // input 65 (0x41 char 'A') => 0 (0x0) - 1, // input 66 (0x42 char 'B') => 1 (0x1) - 2, // input 67 (0x43 char 'C') => 2 (0x2) - 3, // input 68 (0x44 char 'D') => 3 (0x3) - 4, // input 69 (0x45 char 'E') => 4 (0x4) - 5, // input 70 (0x46 char 'F') => 5 (0x5) - 6, // input 71 (0x47 char 'G') => 6 (0x6) - 7, // input 72 (0x48 char 'H') => 7 (0x7) - 8, // input 73 (0x49 char 'I') => 8 (0x8) - 9, // input 74 (0x4A char 'J') => 9 (0x9) - 10, // input 75 (0x4B char 'K') => 10 (0xA) - 11, // input 76 (0x4C char 'L') => 11 (0xB) - 12, // input 77 (0x4D char 'M') => 12 (0xC) - 13, // input 78 (0x4E char 'N') => 13 (0xD) - 14, // input 79 (0x4F char 'O') => 14 (0xE) - 15, // input 80 (0x50 char 'P') => 15 (0xF) - 16, // input 81 (0x51 char 'Q') => 16 (0x10) - 17, // input 82 (0x52 char 'R') => 17 (0x11) - 18, // input 83 (0x53 char 'S') => 18 (0x12) - 19, // input 84 (0x54 char 'T') => 19 (0x13) - 20, // input 85 (0x55 char 'U') => 20 (0x14) - 21, // input 86 (0x56 char 'V') => 21 (0x15) - 22, // input 87 (0x57 char 'W') => 22 (0x16) - 23, // input 88 (0x58 char 'X') => 23 (0x17) - 24, // input 89 (0x59 char 'Y') => 24 (0x18) - 25, // input 90 (0x5A char 'Z') => 25 (0x19) - INVALID_VALUE, // input 91 (0x5B) - INVALID_VALUE, // input 92 (0x5C) - INVALID_VALUE, // input 93 (0x5D) - INVALID_VALUE, // input 94 (0x5E) - INVALID_VALUE, // input 95 (0x5F) - INVALID_VALUE, // input 96 (0x60) - 26, // input 97 (0x61 char 'a') => 26 (0x1A) - 27, // input 98 (0x62 char 'b') => 27 (0x1B) - 28, // input 99 (0x63 char 'c') => 28 (0x1C) - 29, // input 100 (0x64 char 'd') => 29 (0x1D) - 30, // input 101 (0x65 char 'e') => 30 (0x1E) - 31, // input 102 (0x66 char 'f') => 31 (0x1F) - 32, // input 103 (0x67 char 'g') => 32 (0x20) - 33, // input 104 (0x68 char 'h') => 33 (0x21) - 34, // input 105 (0x69 char 'i') => 34 (0x22) - 35, // input 106 (0x6A char 'j') => 35 (0x23) - 36, // input 107 (0x6B char 'k') => 36 (0x24) - 37, // input 108 (0x6C char 'l') => 37 (0x25) - 38, // input 109 (0x6D char 'm') => 38 (0x26) - 39, // input 110 (0x6E char 'n') => 39 (0x27) - 40, // input 111 (0x6F char 'o') => 40 (0x28) - 41, // input 112 (0x70 char 'p') => 41 (0x29) - 42, // input 113 (0x71 char 'q') => 42 (0x2A) - 43, // input 114 (0x72 char 'r') => 43 (0x2B) - 44, // input 115 (0x73 char 's') => 44 (0x2C) - 45, // input 116 (0x74 char 't') => 45 (0x2D) - 46, // input 117 (0x75 char 'u') => 46 (0x2E) - 47, // input 118 (0x76 char 'v') => 47 (0x2F) - 48, // input 119 (0x77 char 'w') => 48 (0x30) - 49, // input 120 (0x78 char 'x') => 49 (0x31) - 50, // input 121 (0x79 char 'y') => 50 (0x32) - 51, // input 122 (0x7A char 'z') => 51 (0x33) - INVALID_VALUE, // input 123 (0x7B) - INVALID_VALUE, // input 124 (0x7C) - INVALID_VALUE, // input 125 (0x7D) - INVALID_VALUE, // input 126 (0x7E) - INVALID_VALUE, // input 127 (0x7F) - INVALID_VALUE, // input 128 (0x80) - INVALID_VALUE, // input 129 (0x81) - INVALID_VALUE, // input 130 (0x82) - INVALID_VALUE, // input 131 (0x83) - INVALID_VALUE, // input 132 (0x84) - INVALID_VALUE, // input 133 (0x85) - INVALID_VALUE, // input 134 (0x86) - INVALID_VALUE, // input 135 (0x87) - INVALID_VALUE, // input 136 (0x88) - INVALID_VALUE, // input 137 (0x89) - INVALID_VALUE, // input 138 (0x8A) - INVALID_VALUE, // input 139 (0x8B) - INVALID_VALUE, // input 140 (0x8C) - INVALID_VALUE, // input 141 (0x8D) - INVALID_VALUE, // input 142 (0x8E) - INVALID_VALUE, // input 143 (0x8F) - INVALID_VALUE, // input 144 (0x90) - INVALID_VALUE, // input 145 (0x91) - INVALID_VALUE, // input 146 (0x92) - INVALID_VALUE, // input 147 (0x93) - INVALID_VALUE, // input 148 (0x94) - INVALID_VALUE, // input 149 (0x95) - INVALID_VALUE, // input 150 (0x96) - INVALID_VALUE, // input 151 (0x97) - INVALID_VALUE, // input 152 (0x98) - INVALID_VALUE, // input 153 (0x99) - INVALID_VALUE, // input 154 (0x9A) - INVALID_VALUE, // input 155 (0x9B) - INVALID_VALUE, // input 156 (0x9C) - INVALID_VALUE, // input 157 (0x9D) - INVALID_VALUE, // input 158 (0x9E) - INVALID_VALUE, // input 159 (0x9F) - INVALID_VALUE, // input 160 (0xA0) - INVALID_VALUE, // input 161 (0xA1) - INVALID_VALUE, // input 162 (0xA2) - INVALID_VALUE, // input 163 (0xA3) - INVALID_VALUE, // input 164 (0xA4) - INVALID_VALUE, // input 165 (0xA5) - INVALID_VALUE, // input 166 (0xA6) - INVALID_VALUE, // input 167 (0xA7) - INVALID_VALUE, // input 168 (0xA8) - INVALID_VALUE, // input 169 (0xA9) - INVALID_VALUE, // input 170 (0xAA) - INVALID_VALUE, // input 171 (0xAB) - INVALID_VALUE, // input 172 (0xAC) - INVALID_VALUE, // input 173 (0xAD) - INVALID_VALUE, // input 174 (0xAE) - INVALID_VALUE, // input 175 (0xAF) - INVALID_VALUE, // input 176 (0xB0) - INVALID_VALUE, // input 177 (0xB1) - INVALID_VALUE, // input 178 (0xB2) - INVALID_VALUE, // input 179 (0xB3) - INVALID_VALUE, // input 180 (0xB4) - INVALID_VALUE, // input 181 (0xB5) - INVALID_VALUE, // input 182 (0xB6) - INVALID_VALUE, // input 183 (0xB7) - INVALID_VALUE, // input 184 (0xB8) - INVALID_VALUE, // input 185 (0xB9) - INVALID_VALUE, // input 186 (0xBA) - INVALID_VALUE, // input 187 (0xBB) - INVALID_VALUE, // input 188 (0xBC) - INVALID_VALUE, // input 189 (0xBD) - INVALID_VALUE, // input 190 (0xBE) - INVALID_VALUE, // input 191 (0xBF) - INVALID_VALUE, // input 192 (0xC0) - INVALID_VALUE, // input 193 (0xC1) - INVALID_VALUE, // input 194 (0xC2) - INVALID_VALUE, // input 195 (0xC3) - INVALID_VALUE, // input 196 (0xC4) - INVALID_VALUE, // input 197 (0xC5) - INVALID_VALUE, // input 198 (0xC6) - INVALID_VALUE, // input 199 (0xC7) - INVALID_VALUE, // input 200 (0xC8) - INVALID_VALUE, // input 201 (0xC9) - INVALID_VALUE, // input 202 (0xCA) - INVALID_VALUE, // input 203 (0xCB) - INVALID_VALUE, // input 204 (0xCC) - INVALID_VALUE, // input 205 (0xCD) - INVALID_VALUE, // input 206 (0xCE) - INVALID_VALUE, // input 207 (0xCF) - INVALID_VALUE, // input 208 (0xD0) - INVALID_VALUE, // input 209 (0xD1) - INVALID_VALUE, // input 210 (0xD2) - INVALID_VALUE, // input 211 (0xD3) - INVALID_VALUE, // input 212 (0xD4) - INVALID_VALUE, // input 213 (0xD5) - INVALID_VALUE, // input 214 (0xD6) - INVALID_VALUE, // input 215 (0xD7) - INVALID_VALUE, // input 216 (0xD8) - INVALID_VALUE, // input 217 (0xD9) - INVALID_VALUE, // input 218 (0xDA) - INVALID_VALUE, // input 219 (0xDB) - INVALID_VALUE, // input 220 (0xDC) - INVALID_VALUE, // input 221 (0xDD) - INVALID_VALUE, // input 222 (0xDE) - INVALID_VALUE, // input 223 (0xDF) - INVALID_VALUE, // input 224 (0xE0) - INVALID_VALUE, // input 225 (0xE1) - INVALID_VALUE, // input 226 (0xE2) - INVALID_VALUE, // input 227 (0xE3) - INVALID_VALUE, // input 228 (0xE4) - INVALID_VALUE, // input 229 (0xE5) - INVALID_VALUE, // input 230 (0xE6) - INVALID_VALUE, // input 231 (0xE7) - INVALID_VALUE, // input 232 (0xE8) - INVALID_VALUE, // input 233 (0xE9) - INVALID_VALUE, // input 234 (0xEA) - INVALID_VALUE, // input 235 (0xEB) - INVALID_VALUE, // input 236 (0xEC) - INVALID_VALUE, // input 237 (0xED) - INVALID_VALUE, // input 238 (0xEE) - INVALID_VALUE, // input 239 (0xEF) - INVALID_VALUE, // input 240 (0xF0) - INVALID_VALUE, // input 241 (0xF1) - INVALID_VALUE, // input 242 (0xF2) - INVALID_VALUE, // input 243 (0xF3) - INVALID_VALUE, // input 244 (0xF4) - INVALID_VALUE, // input 245 (0xF5) - INVALID_VALUE, // input 246 (0xF6) - INVALID_VALUE, // input 247 (0xF7) - INVALID_VALUE, // input 248 (0xF8) - INVALID_VALUE, // input 249 (0xF9) - INVALID_VALUE, // input 250 (0xFA) - INVALID_VALUE, // input 251 (0xFB) - INVALID_VALUE, // input 252 (0xFC) - INVALID_VALUE, // input 253 (0xFD) - INVALID_VALUE, // input 254 (0xFE) - INVALID_VALUE, // input 255 (0xFF) -]; -#[rustfmt::skip] -pub const URL_SAFE_ENCODE: &[u8; 64] = &[ - 65, // input 0 (0x0) => 'A' (0x41) - 66, // input 1 (0x1) => 'B' (0x42) - 67, // input 2 (0x2) => 'C' (0x43) - 68, // input 3 (0x3) => 'D' (0x44) - 69, // input 4 (0x4) => 'E' (0x45) - 70, // input 5 (0x5) => 'F' (0x46) - 71, // input 6 (0x6) => 'G' (0x47) - 72, // input 7 (0x7) => 'H' (0x48) - 73, // input 8 (0x8) => 'I' (0x49) - 74, // input 9 (0x9) => 'J' (0x4A) - 75, // input 10 (0xA) => 'K' (0x4B) - 76, // input 11 (0xB) => 'L' (0x4C) - 77, // input 12 (0xC) => 'M' (0x4D) - 78, // input 13 (0xD) => 'N' (0x4E) - 79, // input 14 (0xE) => 'O' (0x4F) - 80, // input 15 (0xF) => 'P' (0x50) - 81, // input 16 (0x10) => 'Q' (0x51) - 82, // input 17 (0x11) => 'R' (0x52) - 83, // input 18 (0x12) => 'S' (0x53) - 84, // input 19 (0x13) => 'T' (0x54) - 85, // input 20 (0x14) => 'U' (0x55) - 86, // input 21 (0x15) => 'V' (0x56) - 87, // input 22 (0x16) => 'W' (0x57) - 88, // input 23 (0x17) => 'X' (0x58) - 89, // input 24 (0x18) => 'Y' (0x59) - 90, // input 25 (0x19) => 'Z' (0x5A) - 97, // input 26 (0x1A) => 'a' (0x61) - 98, // input 27 (0x1B) => 'b' (0x62) - 99, // input 28 (0x1C) => 'c' (0x63) - 100, // input 29 (0x1D) => 'd' (0x64) - 101, // input 30 (0x1E) => 'e' (0x65) - 102, // input 31 (0x1F) => 'f' (0x66) - 103, // input 32 (0x20) => 'g' (0x67) - 104, // input 33 (0x21) => 'h' (0x68) - 105, // input 34 (0x22) => 'i' (0x69) - 106, // input 35 (0x23) => 'j' (0x6A) - 107, // input 36 (0x24) => 'k' (0x6B) - 108, // input 37 (0x25) => 'l' (0x6C) - 109, // input 38 (0x26) => 'm' (0x6D) - 110, // input 39 (0x27) => 'n' (0x6E) - 111, // input 40 (0x28) => 'o' (0x6F) - 112, // input 41 (0x29) => 'p' (0x70) - 113, // input 42 (0x2A) => 'q' (0x71) - 114, // input 43 (0x2B) => 'r' (0x72) - 115, // input 44 (0x2C) => 's' (0x73) - 116, // input 45 (0x2D) => 't' (0x74) - 117, // input 46 (0x2E) => 'u' (0x75) - 118, // input 47 (0x2F) => 'v' (0x76) - 119, // input 48 (0x30) => 'w' (0x77) - 120, // input 49 (0x31) => 'x' (0x78) - 121, // input 50 (0x32) => 'y' (0x79) - 122, // input 51 (0x33) => 'z' (0x7A) - 48, // input 52 (0x34) => '0' (0x30) - 49, // input 53 (0x35) => '1' (0x31) - 50, // input 54 (0x36) => '2' (0x32) - 51, // input 55 (0x37) => '3' (0x33) - 52, // input 56 (0x38) => '4' (0x34) - 53, // input 57 (0x39) => '5' (0x35) - 54, // input 58 (0x3A) => '6' (0x36) - 55, // input 59 (0x3B) => '7' (0x37) - 56, // input 60 (0x3C) => '8' (0x38) - 57, // input 61 (0x3D) => '9' (0x39) - 45, // input 62 (0x3E) => '-' (0x2D) - 95, // input 63 (0x3F) => '_' (0x5F) -]; -#[rustfmt::skip] -pub const URL_SAFE_DECODE: &[u8; 256] = &[ - INVALID_VALUE, // input 0 (0x0) - INVALID_VALUE, // input 1 (0x1) - INVALID_VALUE, // input 2 (0x2) - INVALID_VALUE, // input 3 (0x3) - INVALID_VALUE, // input 4 (0x4) - INVALID_VALUE, // input 5 (0x5) - INVALID_VALUE, // input 6 (0x6) - INVALID_VALUE, // input 7 (0x7) - INVALID_VALUE, // input 8 (0x8) - INVALID_VALUE, // input 9 (0x9) - INVALID_VALUE, // input 10 (0xA) - INVALID_VALUE, // input 11 (0xB) - INVALID_VALUE, // input 12 (0xC) - INVALID_VALUE, // input 13 (0xD) - INVALID_VALUE, // input 14 (0xE) - INVALID_VALUE, // input 15 (0xF) - INVALID_VALUE, // input 16 (0x10) - INVALID_VALUE, // input 17 (0x11) - INVALID_VALUE, // input 18 (0x12) - INVALID_VALUE, // input 19 (0x13) - INVALID_VALUE, // input 20 (0x14) - INVALID_VALUE, // input 21 (0x15) - INVALID_VALUE, // input 22 (0x16) - INVALID_VALUE, // input 23 (0x17) - INVALID_VALUE, // input 24 (0x18) - INVALID_VALUE, // input 25 (0x19) - INVALID_VALUE, // input 26 (0x1A) - INVALID_VALUE, // input 27 (0x1B) - INVALID_VALUE, // input 28 (0x1C) - INVALID_VALUE, // input 29 (0x1D) - INVALID_VALUE, // input 30 (0x1E) - INVALID_VALUE, // input 31 (0x1F) - INVALID_VALUE, // input 32 (0x20) - INVALID_VALUE, // input 33 (0x21) - INVALID_VALUE, // input 34 (0x22) - INVALID_VALUE, // input 35 (0x23) - INVALID_VALUE, // input 36 (0x24) - INVALID_VALUE, // input 37 (0x25) - INVALID_VALUE, // input 38 (0x26) - INVALID_VALUE, // input 39 (0x27) - INVALID_VALUE, // input 40 (0x28) - INVALID_VALUE, // input 41 (0x29) - INVALID_VALUE, // input 42 (0x2A) - INVALID_VALUE, // input 43 (0x2B) - INVALID_VALUE, // input 44 (0x2C) - 62, // input 45 (0x2D char '-') => 62 (0x3E) - INVALID_VALUE, // input 46 (0x2E) - INVALID_VALUE, // input 47 (0x2F) - 52, // input 48 (0x30 char '0') => 52 (0x34) - 53, // input 49 (0x31 char '1') => 53 (0x35) - 54, // input 50 (0x32 char '2') => 54 (0x36) - 55, // input 51 (0x33 char '3') => 55 (0x37) - 56, // input 52 (0x34 char '4') => 56 (0x38) - 57, // input 53 (0x35 char '5') => 57 (0x39) - 58, // input 54 (0x36 char '6') => 58 (0x3A) - 59, // input 55 (0x37 char '7') => 59 (0x3B) - 60, // input 56 (0x38 char '8') => 60 (0x3C) - 61, // input 57 (0x39 char '9') => 61 (0x3D) - INVALID_VALUE, // input 58 (0x3A) - INVALID_VALUE, // input 59 (0x3B) - INVALID_VALUE, // input 60 (0x3C) - INVALID_VALUE, // input 61 (0x3D) - INVALID_VALUE, // input 62 (0x3E) - INVALID_VALUE, // input 63 (0x3F) - INVALID_VALUE, // input 64 (0x40) - 0, // input 65 (0x41 char 'A') => 0 (0x0) - 1, // input 66 (0x42 char 'B') => 1 (0x1) - 2, // input 67 (0x43 char 'C') => 2 (0x2) - 3, // input 68 (0x44 char 'D') => 3 (0x3) - 4, // input 69 (0x45 char 'E') => 4 (0x4) - 5, // input 70 (0x46 char 'F') => 5 (0x5) - 6, // input 71 (0x47 char 'G') => 6 (0x6) - 7, // input 72 (0x48 char 'H') => 7 (0x7) - 8, // input 73 (0x49 char 'I') => 8 (0x8) - 9, // input 74 (0x4A char 'J') => 9 (0x9) - 10, // input 75 (0x4B char 'K') => 10 (0xA) - 11, // input 76 (0x4C char 'L') => 11 (0xB) - 12, // input 77 (0x4D char 'M') => 12 (0xC) - 13, // input 78 (0x4E char 'N') => 13 (0xD) - 14, // input 79 (0x4F char 'O') => 14 (0xE) - 15, // input 80 (0x50 char 'P') => 15 (0xF) - 16, // input 81 (0x51 char 'Q') => 16 (0x10) - 17, // input 82 (0x52 char 'R') => 17 (0x11) - 18, // input 83 (0x53 char 'S') => 18 (0x12) - 19, // input 84 (0x54 char 'T') => 19 (0x13) - 20, // input 85 (0x55 char 'U') => 20 (0x14) - 21, // input 86 (0x56 char 'V') => 21 (0x15) - 22, // input 87 (0x57 char 'W') => 22 (0x16) - 23, // input 88 (0x58 char 'X') => 23 (0x17) - 24, // input 89 (0x59 char 'Y') => 24 (0x18) - 25, // input 90 (0x5A char 'Z') => 25 (0x19) - INVALID_VALUE, // input 91 (0x5B) - INVALID_VALUE, // input 92 (0x5C) - INVALID_VALUE, // input 93 (0x5D) - INVALID_VALUE, // input 94 (0x5E) - 63, // input 95 (0x5F char '_') => 63 (0x3F) - INVALID_VALUE, // input 96 (0x60) - 26, // input 97 (0x61 char 'a') => 26 (0x1A) - 27, // input 98 (0x62 char 'b') => 27 (0x1B) - 28, // input 99 (0x63 char 'c') => 28 (0x1C) - 29, // input 100 (0x64 char 'd') => 29 (0x1D) - 30, // input 101 (0x65 char 'e') => 30 (0x1E) - 31, // input 102 (0x66 char 'f') => 31 (0x1F) - 32, // input 103 (0x67 char 'g') => 32 (0x20) - 33, // input 104 (0x68 char 'h') => 33 (0x21) - 34, // input 105 (0x69 char 'i') => 34 (0x22) - 35, // input 106 (0x6A char 'j') => 35 (0x23) - 36, // input 107 (0x6B char 'k') => 36 (0x24) - 37, // input 108 (0x6C char 'l') => 37 (0x25) - 38, // input 109 (0x6D char 'm') => 38 (0x26) - 39, // input 110 (0x6E char 'n') => 39 (0x27) - 40, // input 111 (0x6F char 'o') => 40 (0x28) - 41, // input 112 (0x70 char 'p') => 41 (0x29) - 42, // input 113 (0x71 char 'q') => 42 (0x2A) - 43, // input 114 (0x72 char 'r') => 43 (0x2B) - 44, // input 115 (0x73 char 's') => 44 (0x2C) - 45, // input 116 (0x74 char 't') => 45 (0x2D) - 46, // input 117 (0x75 char 'u') => 46 (0x2E) - 47, // input 118 (0x76 char 'v') => 47 (0x2F) - 48, // input 119 (0x77 char 'w') => 48 (0x30) - 49, // input 120 (0x78 char 'x') => 49 (0x31) - 50, // input 121 (0x79 char 'y') => 50 (0x32) - 51, // input 122 (0x7A char 'z') => 51 (0x33) - INVALID_VALUE, // input 123 (0x7B) - INVALID_VALUE, // input 124 (0x7C) - INVALID_VALUE, // input 125 (0x7D) - INVALID_VALUE, // input 126 (0x7E) - INVALID_VALUE, // input 127 (0x7F) - INVALID_VALUE, // input 128 (0x80) - INVALID_VALUE, // input 129 (0x81) - INVALID_VALUE, // input 130 (0x82) - INVALID_VALUE, // input 131 (0x83) - INVALID_VALUE, // input 132 (0x84) - INVALID_VALUE, // input 133 (0x85) - INVALID_VALUE, // input 134 (0x86) - INVALID_VALUE, // input 135 (0x87) - INVALID_VALUE, // input 136 (0x88) - INVALID_VALUE, // input 137 (0x89) - INVALID_VALUE, // input 138 (0x8A) - INVALID_VALUE, // input 139 (0x8B) - INVALID_VALUE, // input 140 (0x8C) - INVALID_VALUE, // input 141 (0x8D) - INVALID_VALUE, // input 142 (0x8E) - INVALID_VALUE, // input 143 (0x8F) - INVALID_VALUE, // input 144 (0x90) - INVALID_VALUE, // input 145 (0x91) - INVALID_VALUE, // input 146 (0x92) - INVALID_VALUE, // input 147 (0x93) - INVALID_VALUE, // input 148 (0x94) - INVALID_VALUE, // input 149 (0x95) - INVALID_VALUE, // input 150 (0x96) - INVALID_VALUE, // input 151 (0x97) - INVALID_VALUE, // input 152 (0x98) - INVALID_VALUE, // input 153 (0x99) - INVALID_VALUE, // input 154 (0x9A) - INVALID_VALUE, // input 155 (0x9B) - INVALID_VALUE, // input 156 (0x9C) - INVALID_VALUE, // input 157 (0x9D) - INVALID_VALUE, // input 158 (0x9E) - INVALID_VALUE, // input 159 (0x9F) - INVALID_VALUE, // input 160 (0xA0) - INVALID_VALUE, // input 161 (0xA1) - INVALID_VALUE, // input 162 (0xA2) - INVALID_VALUE, // input 163 (0xA3) - INVALID_VALUE, // input 164 (0xA4) - INVALID_VALUE, // input 165 (0xA5) - INVALID_VALUE, // input 166 (0xA6) - INVALID_VALUE, // input 167 (0xA7) - INVALID_VALUE, // input 168 (0xA8) - INVALID_VALUE, // input 169 (0xA9) - INVALID_VALUE, // input 170 (0xAA) - INVALID_VALUE, // input 171 (0xAB) - INVALID_VALUE, // input 172 (0xAC) - INVALID_VALUE, // input 173 (0xAD) - INVALID_VALUE, // input 174 (0xAE) - INVALID_VALUE, // input 175 (0xAF) - INVALID_VALUE, // input 176 (0xB0) - INVALID_VALUE, // input 177 (0xB1) - INVALID_VALUE, // input 178 (0xB2) - INVALID_VALUE, // input 179 (0xB3) - INVALID_VALUE, // input 180 (0xB4) - INVALID_VALUE, // input 181 (0xB5) - INVALID_VALUE, // input 182 (0xB6) - INVALID_VALUE, // input 183 (0xB7) - INVALID_VALUE, // input 184 (0xB8) - INVALID_VALUE, // input 185 (0xB9) - INVALID_VALUE, // input 186 (0xBA) - INVALID_VALUE, // input 187 (0xBB) - INVALID_VALUE, // input 188 (0xBC) - INVALID_VALUE, // input 189 (0xBD) - INVALID_VALUE, // input 190 (0xBE) - INVALID_VALUE, // input 191 (0xBF) - INVALID_VALUE, // input 192 (0xC0) - INVALID_VALUE, // input 193 (0xC1) - INVALID_VALUE, // input 194 (0xC2) - INVALID_VALUE, // input 195 (0xC3) - INVALID_VALUE, // input 196 (0xC4) - INVALID_VALUE, // input 197 (0xC5) - INVALID_VALUE, // input 198 (0xC6) - INVALID_VALUE, // input 199 (0xC7) - INVALID_VALUE, // input 200 (0xC8) - INVALID_VALUE, // input 201 (0xC9) - INVALID_VALUE, // input 202 (0xCA) - INVALID_VALUE, // input 203 (0xCB) - INVALID_VALUE, // input 204 (0xCC) - INVALID_VALUE, // input 205 (0xCD) - INVALID_VALUE, // input 206 (0xCE) - INVALID_VALUE, // input 207 (0xCF) - INVALID_VALUE, // input 208 (0xD0) - INVALID_VALUE, // input 209 (0xD1) - INVALID_VALUE, // input 210 (0xD2) - INVALID_VALUE, // input 211 (0xD3) - INVALID_VALUE, // input 212 (0xD4) - INVALID_VALUE, // input 213 (0xD5) - INVALID_VALUE, // input 214 (0xD6) - INVALID_VALUE, // input 215 (0xD7) - INVALID_VALUE, // input 216 (0xD8) - INVALID_VALUE, // input 217 (0xD9) - INVALID_VALUE, // input 218 (0xDA) - INVALID_VALUE, // input 219 (0xDB) - INVALID_VALUE, // input 220 (0xDC) - INVALID_VALUE, // input 221 (0xDD) - INVALID_VALUE, // input 222 (0xDE) - INVALID_VALUE, // input 223 (0xDF) - INVALID_VALUE, // input 224 (0xE0) - INVALID_VALUE, // input 225 (0xE1) - INVALID_VALUE, // input 226 (0xE2) - INVALID_VALUE, // input 227 (0xE3) - INVALID_VALUE, // input 228 (0xE4) - INVALID_VALUE, // input 229 (0xE5) - INVALID_VALUE, // input 230 (0xE6) - INVALID_VALUE, // input 231 (0xE7) - INVALID_VALUE, // input 232 (0xE8) - INVALID_VALUE, // input 233 (0xE9) - INVALID_VALUE, // input 234 (0xEA) - INVALID_VALUE, // input 235 (0xEB) - INVALID_VALUE, // input 236 (0xEC) - INVALID_VALUE, // input 237 (0xED) - INVALID_VALUE, // input 238 (0xEE) - INVALID_VALUE, // input 239 (0xEF) - INVALID_VALUE, // input 240 (0xF0) - INVALID_VALUE, // input 241 (0xF1) - INVALID_VALUE, // input 242 (0xF2) - INVALID_VALUE, // input 243 (0xF3) - INVALID_VALUE, // input 244 (0xF4) - INVALID_VALUE, // input 245 (0xF5) - INVALID_VALUE, // input 246 (0xF6) - INVALID_VALUE, // input 247 (0xF7) - INVALID_VALUE, // input 248 (0xF8) - INVALID_VALUE, // input 249 (0xF9) - INVALID_VALUE, // input 250 (0xFA) - INVALID_VALUE, // input 251 (0xFB) - INVALID_VALUE, // input 252 (0xFC) - INVALID_VALUE, // input 253 (0xFD) - INVALID_VALUE, // input 254 (0xFE) - INVALID_VALUE, // input 255 (0xFF) -]; -#[rustfmt::skip] -pub const CRYPT_ENCODE: &[u8; 64] = &[ - 46, // input 0 (0x0) => '.' (0x2E) - 47, // input 1 (0x1) => '/' (0x2F) - 48, // input 2 (0x2) => '0' (0x30) - 49, // input 3 (0x3) => '1' (0x31) - 50, // input 4 (0x4) => '2' (0x32) - 51, // input 5 (0x5) => '3' (0x33) - 52, // input 6 (0x6) => '4' (0x34) - 53, // input 7 (0x7) => '5' (0x35) - 54, // input 8 (0x8) => '6' (0x36) - 55, // input 9 (0x9) => '7' (0x37) - 56, // input 10 (0xA) => '8' (0x38) - 57, // input 11 (0xB) => '9' (0x39) - 65, // input 12 (0xC) => 'A' (0x41) - 66, // input 13 (0xD) => 'B' (0x42) - 67, // input 14 (0xE) => 'C' (0x43) - 68, // input 15 (0xF) => 'D' (0x44) - 69, // input 16 (0x10) => 'E' (0x45) - 70, // input 17 (0x11) => 'F' (0x46) - 71, // input 18 (0x12) => 'G' (0x47) - 72, // input 19 (0x13) => 'H' (0x48) - 73, // input 20 (0x14) => 'I' (0x49) - 74, // input 21 (0x15) => 'J' (0x4A) - 75, // input 22 (0x16) => 'K' (0x4B) - 76, // input 23 (0x17) => 'L' (0x4C) - 77, // input 24 (0x18) => 'M' (0x4D) - 78, // input 25 (0x19) => 'N' (0x4E) - 79, // input 26 (0x1A) => 'O' (0x4F) - 80, // input 27 (0x1B) => 'P' (0x50) - 81, // input 28 (0x1C) => 'Q' (0x51) - 82, // input 29 (0x1D) => 'R' (0x52) - 83, // input 30 (0x1E) => 'S' (0x53) - 84, // input 31 (0x1F) => 'T' (0x54) - 85, // input 32 (0x20) => 'U' (0x55) - 86, // input 33 (0x21) => 'V' (0x56) - 87, // input 34 (0x22) => 'W' (0x57) - 88, // input 35 (0x23) => 'X' (0x58) - 89, // input 36 (0x24) => 'Y' (0x59) - 90, // input 37 (0x25) => 'Z' (0x5A) - 97, // input 38 (0x26) => 'a' (0x61) - 98, // input 39 (0x27) => 'b' (0x62) - 99, // input 40 (0x28) => 'c' (0x63) - 100, // input 41 (0x29) => 'd' (0x64) - 101, // input 42 (0x2A) => 'e' (0x65) - 102, // input 43 (0x2B) => 'f' (0x66) - 103, // input 44 (0x2C) => 'g' (0x67) - 104, // input 45 (0x2D) => 'h' (0x68) - 105, // input 46 (0x2E) => 'i' (0x69) - 106, // input 47 (0x2F) => 'j' (0x6A) - 107, // input 48 (0x30) => 'k' (0x6B) - 108, // input 49 (0x31) => 'l' (0x6C) - 109, // input 50 (0x32) => 'm' (0x6D) - 110, // input 51 (0x33) => 'n' (0x6E) - 111, // input 52 (0x34) => 'o' (0x6F) - 112, // input 53 (0x35) => 'p' (0x70) - 113, // input 54 (0x36) => 'q' (0x71) - 114, // input 55 (0x37) => 'r' (0x72) - 115, // input 56 (0x38) => 's' (0x73) - 116, // input 57 (0x39) => 't' (0x74) - 117, // input 58 (0x3A) => 'u' (0x75) - 118, // input 59 (0x3B) => 'v' (0x76) - 119, // input 60 (0x3C) => 'w' (0x77) - 120, // input 61 (0x3D) => 'x' (0x78) - 121, // input 62 (0x3E) => 'y' (0x79) - 122, // input 63 (0x3F) => 'z' (0x7A) -]; -#[rustfmt::skip] -pub const CRYPT_DECODE: &[u8; 256] = &[ - INVALID_VALUE, // input 0 (0x0) - INVALID_VALUE, // input 1 (0x1) - INVALID_VALUE, // input 2 (0x2) - INVALID_VALUE, // input 3 (0x3) - INVALID_VALUE, // input 4 (0x4) - INVALID_VALUE, // input 5 (0x5) - INVALID_VALUE, // input 6 (0x6) - INVALID_VALUE, // input 7 (0x7) - INVALID_VALUE, // input 8 (0x8) - INVALID_VALUE, // input 9 (0x9) - INVALID_VALUE, // input 10 (0xA) - INVALID_VALUE, // input 11 (0xB) - INVALID_VALUE, // input 12 (0xC) - INVALID_VALUE, // input 13 (0xD) - INVALID_VALUE, // input 14 (0xE) - INVALID_VALUE, // input 15 (0xF) - INVALID_VALUE, // input 16 (0x10) - INVALID_VALUE, // input 17 (0x11) - INVALID_VALUE, // input 18 (0x12) - INVALID_VALUE, // input 19 (0x13) - INVALID_VALUE, // input 20 (0x14) - INVALID_VALUE, // input 21 (0x15) - INVALID_VALUE, // input 22 (0x16) - INVALID_VALUE, // input 23 (0x17) - INVALID_VALUE, // input 24 (0x18) - INVALID_VALUE, // input 25 (0x19) - INVALID_VALUE, // input 26 (0x1A) - INVALID_VALUE, // input 27 (0x1B) - INVALID_VALUE, // input 28 (0x1C) - INVALID_VALUE, // input 29 (0x1D) - INVALID_VALUE, // input 30 (0x1E) - INVALID_VALUE, // input 31 (0x1F) - INVALID_VALUE, // input 32 (0x20) - INVALID_VALUE, // input 33 (0x21) - INVALID_VALUE, // input 34 (0x22) - INVALID_VALUE, // input 35 (0x23) - INVALID_VALUE, // input 36 (0x24) - INVALID_VALUE, // input 37 (0x25) - INVALID_VALUE, // input 38 (0x26) - INVALID_VALUE, // input 39 (0x27) - INVALID_VALUE, // input 40 (0x28) - INVALID_VALUE, // input 41 (0x29) - INVALID_VALUE, // input 42 (0x2A) - INVALID_VALUE, // input 43 (0x2B) - INVALID_VALUE, // input 44 (0x2C) - INVALID_VALUE, // input 45 (0x2D) - 0, // input 46 (0x2E char '.') => 0 (0x0) - 1, // input 47 (0x2F char '/') => 1 (0x1) - 2, // input 48 (0x30 char '0') => 2 (0x2) - 3, // input 49 (0x31 char '1') => 3 (0x3) - 4, // input 50 (0x32 char '2') => 4 (0x4) - 5, // input 51 (0x33 char '3') => 5 (0x5) - 6, // input 52 (0x34 char '4') => 6 (0x6) - 7, // input 53 (0x35 char '5') => 7 (0x7) - 8, // input 54 (0x36 char '6') => 8 (0x8) - 9, // input 55 (0x37 char '7') => 9 (0x9) - 10, // input 56 (0x38 char '8') => 10 (0xA) - 11, // input 57 (0x39 char '9') => 11 (0xB) - INVALID_VALUE, // input 58 (0x3A) - INVALID_VALUE, // input 59 (0x3B) - INVALID_VALUE, // input 60 (0x3C) - INVALID_VALUE, // input 61 (0x3D) - INVALID_VALUE, // input 62 (0x3E) - INVALID_VALUE, // input 63 (0x3F) - INVALID_VALUE, // input 64 (0x40) - 12, // input 65 (0x41 char 'A') => 12 (0xC) - 13, // input 66 (0x42 char 'B') => 13 (0xD) - 14, // input 67 (0x43 char 'C') => 14 (0xE) - 15, // input 68 (0x44 char 'D') => 15 (0xF) - 16, // input 69 (0x45 char 'E') => 16 (0x10) - 17, // input 70 (0x46 char 'F') => 17 (0x11) - 18, // input 71 (0x47 char 'G') => 18 (0x12) - 19, // input 72 (0x48 char 'H') => 19 (0x13) - 20, // input 73 (0x49 char 'I') => 20 (0x14) - 21, // input 74 (0x4A char 'J') => 21 (0x15) - 22, // input 75 (0x4B char 'K') => 22 (0x16) - 23, // input 76 (0x4C char 'L') => 23 (0x17) - 24, // input 77 (0x4D char 'M') => 24 (0x18) - 25, // input 78 (0x4E char 'N') => 25 (0x19) - 26, // input 79 (0x4F char 'O') => 26 (0x1A) - 27, // input 80 (0x50 char 'P') => 27 (0x1B) - 28, // input 81 (0x51 char 'Q') => 28 (0x1C) - 29, // input 82 (0x52 char 'R') => 29 (0x1D) - 30, // input 83 (0x53 char 'S') => 30 (0x1E) - 31, // input 84 (0x54 char 'T') => 31 (0x1F) - 32, // input 85 (0x55 char 'U') => 32 (0x20) - 33, // input 86 (0x56 char 'V') => 33 (0x21) - 34, // input 87 (0x57 char 'W') => 34 (0x22) - 35, // input 88 (0x58 char 'X') => 35 (0x23) - 36, // input 89 (0x59 char 'Y') => 36 (0x24) - 37, // input 90 (0x5A char 'Z') => 37 (0x25) - INVALID_VALUE, // input 91 (0x5B) - INVALID_VALUE, // input 92 (0x5C) - INVALID_VALUE, // input 93 (0x5D) - INVALID_VALUE, // input 94 (0x5E) - INVALID_VALUE, // input 95 (0x5F) - INVALID_VALUE, // input 96 (0x60) - 38, // input 97 (0x61 char 'a') => 38 (0x26) - 39, // input 98 (0x62 char 'b') => 39 (0x27) - 40, // input 99 (0x63 char 'c') => 40 (0x28) - 41, // input 100 (0x64 char 'd') => 41 (0x29) - 42, // input 101 (0x65 char 'e') => 42 (0x2A) - 43, // input 102 (0x66 char 'f') => 43 (0x2B) - 44, // input 103 (0x67 char 'g') => 44 (0x2C) - 45, // input 104 (0x68 char 'h') => 45 (0x2D) - 46, // input 105 (0x69 char 'i') => 46 (0x2E) - 47, // input 106 (0x6A char 'j') => 47 (0x2F) - 48, // input 107 (0x6B char 'k') => 48 (0x30) - 49, // input 108 (0x6C char 'l') => 49 (0x31) - 50, // input 109 (0x6D char 'm') => 50 (0x32) - 51, // input 110 (0x6E char 'n') => 51 (0x33) - 52, // input 111 (0x6F char 'o') => 52 (0x34) - 53, // input 112 (0x70 char 'p') => 53 (0x35) - 54, // input 113 (0x71 char 'q') => 54 (0x36) - 55, // input 114 (0x72 char 'r') => 55 (0x37) - 56, // input 115 (0x73 char 's') => 56 (0x38) - 57, // input 116 (0x74 char 't') => 57 (0x39) - 58, // input 117 (0x75 char 'u') => 58 (0x3A) - 59, // input 118 (0x76 char 'v') => 59 (0x3B) - 60, // input 119 (0x77 char 'w') => 60 (0x3C) - 61, // input 120 (0x78 char 'x') => 61 (0x3D) - 62, // input 121 (0x79 char 'y') => 62 (0x3E) - 63, // input 122 (0x7A char 'z') => 63 (0x3F) - INVALID_VALUE, // input 123 (0x7B) - INVALID_VALUE, // input 124 (0x7C) - INVALID_VALUE, // input 125 (0x7D) - INVALID_VALUE, // input 126 (0x7E) - INVALID_VALUE, // input 127 (0x7F) - INVALID_VALUE, // input 128 (0x80) - INVALID_VALUE, // input 129 (0x81) - INVALID_VALUE, // input 130 (0x82) - INVALID_VALUE, // input 131 (0x83) - INVALID_VALUE, // input 132 (0x84) - INVALID_VALUE, // input 133 (0x85) - INVALID_VALUE, // input 134 (0x86) - INVALID_VALUE, // input 135 (0x87) - INVALID_VALUE, // input 136 (0x88) - INVALID_VALUE, // input 137 (0x89) - INVALID_VALUE, // input 138 (0x8A) - INVALID_VALUE, // input 139 (0x8B) - INVALID_VALUE, // input 140 (0x8C) - INVALID_VALUE, // input 141 (0x8D) - INVALID_VALUE, // input 142 (0x8E) - INVALID_VALUE, // input 143 (0x8F) - INVALID_VALUE, // input 144 (0x90) - INVALID_VALUE, // input 145 (0x91) - INVALID_VALUE, // input 146 (0x92) - INVALID_VALUE, // input 147 (0x93) - INVALID_VALUE, // input 148 (0x94) - INVALID_VALUE, // input 149 (0x95) - INVALID_VALUE, // input 150 (0x96) - INVALID_VALUE, // input 151 (0x97) - INVALID_VALUE, // input 152 (0x98) - INVALID_VALUE, // input 153 (0x99) - INVALID_VALUE, // input 154 (0x9A) - INVALID_VALUE, // input 155 (0x9B) - INVALID_VALUE, // input 156 (0x9C) - INVALID_VALUE, // input 157 (0x9D) - INVALID_VALUE, // input 158 (0x9E) - INVALID_VALUE, // input 159 (0x9F) - INVALID_VALUE, // input 160 (0xA0) - INVALID_VALUE, // input 161 (0xA1) - INVALID_VALUE, // input 162 (0xA2) - INVALID_VALUE, // input 163 (0xA3) - INVALID_VALUE, // input 164 (0xA4) - INVALID_VALUE, // input 165 (0xA5) - INVALID_VALUE, // input 166 (0xA6) - INVALID_VALUE, // input 167 (0xA7) - INVALID_VALUE, // input 168 (0xA8) - INVALID_VALUE, // input 169 (0xA9) - INVALID_VALUE, // input 170 (0xAA) - INVALID_VALUE, // input 171 (0xAB) - INVALID_VALUE, // input 172 (0xAC) - INVALID_VALUE, // input 173 (0xAD) - INVALID_VALUE, // input 174 (0xAE) - INVALID_VALUE, // input 175 (0xAF) - INVALID_VALUE, // input 176 (0xB0) - INVALID_VALUE, // input 177 (0xB1) - INVALID_VALUE, // input 178 (0xB2) - INVALID_VALUE, // input 179 (0xB3) - INVALID_VALUE, // input 180 (0xB4) - INVALID_VALUE, // input 181 (0xB5) - INVALID_VALUE, // input 182 (0xB6) - INVALID_VALUE, // input 183 (0xB7) - INVALID_VALUE, // input 184 (0xB8) - INVALID_VALUE, // input 185 (0xB9) - INVALID_VALUE, // input 186 (0xBA) - INVALID_VALUE, // input 187 (0xBB) - INVALID_VALUE, // input 188 (0xBC) - INVALID_VALUE, // input 189 (0xBD) - INVALID_VALUE, // input 190 (0xBE) - INVALID_VALUE, // input 191 (0xBF) - INVALID_VALUE, // input 192 (0xC0) - INVALID_VALUE, // input 193 (0xC1) - INVALID_VALUE, // input 194 (0xC2) - INVALID_VALUE, // input 195 (0xC3) - INVALID_VALUE, // input 196 (0xC4) - INVALID_VALUE, // input 197 (0xC5) - INVALID_VALUE, // input 198 (0xC6) - INVALID_VALUE, // input 199 (0xC7) - INVALID_VALUE, // input 200 (0xC8) - INVALID_VALUE, // input 201 (0xC9) - INVALID_VALUE, // input 202 (0xCA) - INVALID_VALUE, // input 203 (0xCB) - INVALID_VALUE, // input 204 (0xCC) - INVALID_VALUE, // input 205 (0xCD) - INVALID_VALUE, // input 206 (0xCE) - INVALID_VALUE, // input 207 (0xCF) - INVALID_VALUE, // input 208 (0xD0) - INVALID_VALUE, // input 209 (0xD1) - INVALID_VALUE, // input 210 (0xD2) - INVALID_VALUE, // input 211 (0xD3) - INVALID_VALUE, // input 212 (0xD4) - INVALID_VALUE, // input 213 (0xD5) - INVALID_VALUE, // input 214 (0xD6) - INVALID_VALUE, // input 215 (0xD7) - INVALID_VALUE, // input 216 (0xD8) - INVALID_VALUE, // input 217 (0xD9) - INVALID_VALUE, // input 218 (0xDA) - INVALID_VALUE, // input 219 (0xDB) - INVALID_VALUE, // input 220 (0xDC) - INVALID_VALUE, // input 221 (0xDD) - INVALID_VALUE, // input 222 (0xDE) - INVALID_VALUE, // input 223 (0xDF) - INVALID_VALUE, // input 224 (0xE0) - INVALID_VALUE, // input 225 (0xE1) - INVALID_VALUE, // input 226 (0xE2) - INVALID_VALUE, // input 227 (0xE3) - INVALID_VALUE, // input 228 (0xE4) - INVALID_VALUE, // input 229 (0xE5) - INVALID_VALUE, // input 230 (0xE6) - INVALID_VALUE, // input 231 (0xE7) - INVALID_VALUE, // input 232 (0xE8) - INVALID_VALUE, // input 233 (0xE9) - INVALID_VALUE, // input 234 (0xEA) - INVALID_VALUE, // input 235 (0xEB) - INVALID_VALUE, // input 236 (0xEC) - INVALID_VALUE, // input 237 (0xED) - INVALID_VALUE, // input 238 (0xEE) - INVALID_VALUE, // input 239 (0xEF) - INVALID_VALUE, // input 240 (0xF0) - INVALID_VALUE, // input 241 (0xF1) - INVALID_VALUE, // input 242 (0xF2) - INVALID_VALUE, // input 243 (0xF3) - INVALID_VALUE, // input 244 (0xF4) - INVALID_VALUE, // input 245 (0xF5) - INVALID_VALUE, // input 246 (0xF6) - INVALID_VALUE, // input 247 (0xF7) - INVALID_VALUE, // input 248 (0xF8) - INVALID_VALUE, // input 249 (0xF9) - INVALID_VALUE, // input 250 (0xFA) - INVALID_VALUE, // input 251 (0xFB) - INVALID_VALUE, // input 252 (0xFC) - INVALID_VALUE, // input 253 (0xFD) - INVALID_VALUE, // input 254 (0xFE) - INVALID_VALUE, // input 255 (0xFF) -]; -#[rustfmt::skip] -pub const BCRYPT_ENCODE: &[u8; 64] = &[ - 46, // input 0 (0x0) => '.' (0x2E) - 47, // input 1 (0x1) => '/' (0x2F) - 65, // input 2 (0x2) => 'A' (0x41) - 66, // input 3 (0x3) => 'B' (0x42) - 67, // input 4 (0x4) => 'C' (0x43) - 68, // input 5 (0x5) => 'D' (0x44) - 69, // input 6 (0x6) => 'E' (0x45) - 70, // input 7 (0x7) => 'F' (0x46) - 71, // input 8 (0x8) => 'G' (0x47) - 72, // input 9 (0x9) => 'H' (0x48) - 73, // input 10 (0xA) => 'I' (0x49) - 74, // input 11 (0xB) => 'J' (0x4A) - 75, // input 12 (0xC) => 'K' (0x4B) - 76, // input 13 (0xD) => 'L' (0x4C) - 77, // input 14 (0xE) => 'M' (0x4D) - 78, // input 15 (0xF) => 'N' (0x4E) - 79, // input 16 (0x10) => 'O' (0x4F) - 80, // input 17 (0x11) => 'P' (0x50) - 81, // input 18 (0x12) => 'Q' (0x51) - 82, // input 19 (0x13) => 'R' (0x52) - 83, // input 20 (0x14) => 'S' (0x53) - 84, // input 21 (0x15) => 'T' (0x54) - 85, // input 22 (0x16) => 'U' (0x55) - 86, // input 23 (0x17) => 'V' (0x56) - 87, // input 24 (0x18) => 'W' (0x57) - 88, // input 25 (0x19) => 'X' (0x58) - 89, // input 26 (0x1A) => 'Y' (0x59) - 90, // input 27 (0x1B) => 'Z' (0x5A) - 97, // input 28 (0x1C) => 'a' (0x61) - 98, // input 29 (0x1D) => 'b' (0x62) - 99, // input 30 (0x1E) => 'c' (0x63) - 100, // input 31 (0x1F) => 'd' (0x64) - 101, // input 32 (0x20) => 'e' (0x65) - 102, // input 33 (0x21) => 'f' (0x66) - 103, // input 34 (0x22) => 'g' (0x67) - 104, // input 35 (0x23) => 'h' (0x68) - 105, // input 36 (0x24) => 'i' (0x69) - 106, // input 37 (0x25) => 'j' (0x6A) - 107, // input 38 (0x26) => 'k' (0x6B) - 108, // input 39 (0x27) => 'l' (0x6C) - 109, // input 40 (0x28) => 'm' (0x6D) - 110, // input 41 (0x29) => 'n' (0x6E) - 111, // input 42 (0x2A) => 'o' (0x6F) - 112, // input 43 (0x2B) => 'p' (0x70) - 113, // input 44 (0x2C) => 'q' (0x71) - 114, // input 45 (0x2D) => 'r' (0x72) - 115, // input 46 (0x2E) => 's' (0x73) - 116, // input 47 (0x2F) => 't' (0x74) - 117, // input 48 (0x30) => 'u' (0x75) - 118, // input 49 (0x31) => 'v' (0x76) - 119, // input 50 (0x32) => 'w' (0x77) - 120, // input 51 (0x33) => 'x' (0x78) - 121, // input 52 (0x34) => 'y' (0x79) - 122, // input 53 (0x35) => 'z' (0x7A) - 48, // input 54 (0x36) => '0' (0x30) - 49, // input 55 (0x37) => '1' (0x31) - 50, // input 56 (0x38) => '2' (0x32) - 51, // input 57 (0x39) => '3' (0x33) - 52, // input 58 (0x3A) => '4' (0x34) - 53, // input 59 (0x3B) => '5' (0x35) - 54, // input 60 (0x3C) => '6' (0x36) - 55, // input 61 (0x3D) => '7' (0x37) - 56, // input 62 (0x3E) => '8' (0x38) - 57, // input 63 (0x3F) => '9' (0x39) -]; -#[rustfmt::skip] -pub const BCRYPT_DECODE: &[u8; 256] = &[ - INVALID_VALUE, // input 0 (0x0) - INVALID_VALUE, // input 1 (0x1) - INVALID_VALUE, // input 2 (0x2) - INVALID_VALUE, // input 3 (0x3) - INVALID_VALUE, // input 4 (0x4) - INVALID_VALUE, // input 5 (0x5) - INVALID_VALUE, // input 6 (0x6) - INVALID_VALUE, // input 7 (0x7) - INVALID_VALUE, // input 8 (0x8) - INVALID_VALUE, // input 9 (0x9) - INVALID_VALUE, // input 10 (0xA) - INVALID_VALUE, // input 11 (0xB) - INVALID_VALUE, // input 12 (0xC) - INVALID_VALUE, // input 13 (0xD) - INVALID_VALUE, // input 14 (0xE) - INVALID_VALUE, // input 15 (0xF) - INVALID_VALUE, // input 16 (0x10) - INVALID_VALUE, // input 17 (0x11) - INVALID_VALUE, // input 18 (0x12) - INVALID_VALUE, // input 19 (0x13) - INVALID_VALUE, // input 20 (0x14) - INVALID_VALUE, // input 21 (0x15) - INVALID_VALUE, // input 22 (0x16) - INVALID_VALUE, // input 23 (0x17) - INVALID_VALUE, // input 24 (0x18) - INVALID_VALUE, // input 25 (0x19) - INVALID_VALUE, // input 26 (0x1A) - INVALID_VALUE, // input 27 (0x1B) - INVALID_VALUE, // input 28 (0x1C) - INVALID_VALUE, // input 29 (0x1D) - INVALID_VALUE, // input 30 (0x1E) - INVALID_VALUE, // input 31 (0x1F) - INVALID_VALUE, // input 32 (0x20) - INVALID_VALUE, // input 33 (0x21) - INVALID_VALUE, // input 34 (0x22) - INVALID_VALUE, // input 35 (0x23) - INVALID_VALUE, // input 36 (0x24) - INVALID_VALUE, // input 37 (0x25) - INVALID_VALUE, // input 38 (0x26) - INVALID_VALUE, // input 39 (0x27) - INVALID_VALUE, // input 40 (0x28) - INVALID_VALUE, // input 41 (0x29) - INVALID_VALUE, // input 42 (0x2A) - INVALID_VALUE, // input 43 (0x2B) - INVALID_VALUE, // input 44 (0x2C) - INVALID_VALUE, // input 45 (0x2D) - 0, // input 46 (0x2E char '.') => 0 (0x0) - 1, // input 47 (0x2F char '/') => 1 (0x1) - 54, // input 48 (0x30 char '0') => 54 (0x36) - 55, // input 49 (0x31 char '1') => 55 (0x37) - 56, // input 50 (0x32 char '2') => 56 (0x38) - 57, // input 51 (0x33 char '3') => 57 (0x39) - 58, // input 52 (0x34 char '4') => 58 (0x3A) - 59, // input 53 (0x35 char '5') => 59 (0x3B) - 60, // input 54 (0x36 char '6') => 60 (0x3C) - 61, // input 55 (0x37 char '7') => 61 (0x3D) - 62, // input 56 (0x38 char '8') => 62 (0x3E) - 63, // input 57 (0x39 char '9') => 63 (0x3F) - INVALID_VALUE, // input 58 (0x3A) - INVALID_VALUE, // input 59 (0x3B) - INVALID_VALUE, // input 60 (0x3C) - INVALID_VALUE, // input 61 (0x3D) - INVALID_VALUE, // input 62 (0x3E) - INVALID_VALUE, // input 63 (0x3F) - INVALID_VALUE, // input 64 (0x40) - 2, // input 65 (0x41 char 'A') => 2 (0x2) - 3, // input 66 (0x42 char 'B') => 3 (0x3) - 4, // input 67 (0x43 char 'C') => 4 (0x4) - 5, // input 68 (0x44 char 'D') => 5 (0x5) - 6, // input 69 (0x45 char 'E') => 6 (0x6) - 7, // input 70 (0x46 char 'F') => 7 (0x7) - 8, // input 71 (0x47 char 'G') => 8 (0x8) - 9, // input 72 (0x48 char 'H') => 9 (0x9) - 10, // input 73 (0x49 char 'I') => 10 (0xA) - 11, // input 74 (0x4A char 'J') => 11 (0xB) - 12, // input 75 (0x4B char 'K') => 12 (0xC) - 13, // input 76 (0x4C char 'L') => 13 (0xD) - 14, // input 77 (0x4D char 'M') => 14 (0xE) - 15, // input 78 (0x4E char 'N') => 15 (0xF) - 16, // input 79 (0x4F char 'O') => 16 (0x10) - 17, // input 80 (0x50 char 'P') => 17 (0x11) - 18, // input 81 (0x51 char 'Q') => 18 (0x12) - 19, // input 82 (0x52 char 'R') => 19 (0x13) - 20, // input 83 (0x53 char 'S') => 20 (0x14) - 21, // input 84 (0x54 char 'T') => 21 (0x15) - 22, // input 85 (0x55 char 'U') => 22 (0x16) - 23, // input 86 (0x56 char 'V') => 23 (0x17) - 24, // input 87 (0x57 char 'W') => 24 (0x18) - 25, // input 88 (0x58 char 'X') => 25 (0x19) - 26, // input 89 (0x59 char 'Y') => 26 (0x1A) - 27, // input 90 (0x5A char 'Z') => 27 (0x1B) - INVALID_VALUE, // input 91 (0x5B) - INVALID_VALUE, // input 92 (0x5C) - INVALID_VALUE, // input 93 (0x5D) - INVALID_VALUE, // input 94 (0x5E) - INVALID_VALUE, // input 95 (0x5F) - INVALID_VALUE, // input 96 (0x60) - 28, // input 97 (0x61 char 'a') => 28 (0x1C) - 29, // input 98 (0x62 char 'b') => 29 (0x1D) - 30, // input 99 (0x63 char 'c') => 30 (0x1E) - 31, // input 100 (0x64 char 'd') => 31 (0x1F) - 32, // input 101 (0x65 char 'e') => 32 (0x20) - 33, // input 102 (0x66 char 'f') => 33 (0x21) - 34, // input 103 (0x67 char 'g') => 34 (0x22) - 35, // input 104 (0x68 char 'h') => 35 (0x23) - 36, // input 105 (0x69 char 'i') => 36 (0x24) - 37, // input 106 (0x6A char 'j') => 37 (0x25) - 38, // input 107 (0x6B char 'k') => 38 (0x26) - 39, // input 108 (0x6C char 'l') => 39 (0x27) - 40, // input 109 (0x6D char 'm') => 40 (0x28) - 41, // input 110 (0x6E char 'n') => 41 (0x29) - 42, // input 111 (0x6F char 'o') => 42 (0x2A) - 43, // input 112 (0x70 char 'p') => 43 (0x2B) - 44, // input 113 (0x71 char 'q') => 44 (0x2C) - 45, // input 114 (0x72 char 'r') => 45 (0x2D) - 46, // input 115 (0x73 char 's') => 46 (0x2E) - 47, // input 116 (0x74 char 't') => 47 (0x2F) - 48, // input 117 (0x75 char 'u') => 48 (0x30) - 49, // input 118 (0x76 char 'v') => 49 (0x31) - 50, // input 119 (0x77 char 'w') => 50 (0x32) - 51, // input 120 (0x78 char 'x') => 51 (0x33) - 52, // input 121 (0x79 char 'y') => 52 (0x34) - 53, // input 122 (0x7A char 'z') => 53 (0x35) - INVALID_VALUE, // input 123 (0x7B) - INVALID_VALUE, // input 124 (0x7C) - INVALID_VALUE, // input 125 (0x7D) - INVALID_VALUE, // input 126 (0x7E) - INVALID_VALUE, // input 127 (0x7F) - INVALID_VALUE, // input 128 (0x80) - INVALID_VALUE, // input 129 (0x81) - INVALID_VALUE, // input 130 (0x82) - INVALID_VALUE, // input 131 (0x83) - INVALID_VALUE, // input 132 (0x84) - INVALID_VALUE, // input 133 (0x85) - INVALID_VALUE, // input 134 (0x86) - INVALID_VALUE, // input 135 (0x87) - INVALID_VALUE, // input 136 (0x88) - INVALID_VALUE, // input 137 (0x89) - INVALID_VALUE, // input 138 (0x8A) - INVALID_VALUE, // input 139 (0x8B) - INVALID_VALUE, // input 140 (0x8C) - INVALID_VALUE, // input 141 (0x8D) - INVALID_VALUE, // input 142 (0x8E) - INVALID_VALUE, // input 143 (0x8F) - INVALID_VALUE, // input 144 (0x90) - INVALID_VALUE, // input 145 (0x91) - INVALID_VALUE, // input 146 (0x92) - INVALID_VALUE, // input 147 (0x93) - INVALID_VALUE, // input 148 (0x94) - INVALID_VALUE, // input 149 (0x95) - INVALID_VALUE, // input 150 (0x96) - INVALID_VALUE, // input 151 (0x97) - INVALID_VALUE, // input 152 (0x98) - INVALID_VALUE, // input 153 (0x99) - INVALID_VALUE, // input 154 (0x9A) - INVALID_VALUE, // input 155 (0x9B) - INVALID_VALUE, // input 156 (0x9C) - INVALID_VALUE, // input 157 (0x9D) - INVALID_VALUE, // input 158 (0x9E) - INVALID_VALUE, // input 159 (0x9F) - INVALID_VALUE, // input 160 (0xA0) - INVALID_VALUE, // input 161 (0xA1) - INVALID_VALUE, // input 162 (0xA2) - INVALID_VALUE, // input 163 (0xA3) - INVALID_VALUE, // input 164 (0xA4) - INVALID_VALUE, // input 165 (0xA5) - INVALID_VALUE, // input 166 (0xA6) - INVALID_VALUE, // input 167 (0xA7) - INVALID_VALUE, // input 168 (0xA8) - INVALID_VALUE, // input 169 (0xA9) - INVALID_VALUE, // input 170 (0xAA) - INVALID_VALUE, // input 171 (0xAB) - INVALID_VALUE, // input 172 (0xAC) - INVALID_VALUE, // input 173 (0xAD) - INVALID_VALUE, // input 174 (0xAE) - INVALID_VALUE, // input 175 (0xAF) - INVALID_VALUE, // input 176 (0xB0) - INVALID_VALUE, // input 177 (0xB1) - INVALID_VALUE, // input 178 (0xB2) - INVALID_VALUE, // input 179 (0xB3) - INVALID_VALUE, // input 180 (0xB4) - INVALID_VALUE, // input 181 (0xB5) - INVALID_VALUE, // input 182 (0xB6) - INVALID_VALUE, // input 183 (0xB7) - INVALID_VALUE, // input 184 (0xB8) - INVALID_VALUE, // input 185 (0xB9) - INVALID_VALUE, // input 186 (0xBA) - INVALID_VALUE, // input 187 (0xBB) - INVALID_VALUE, // input 188 (0xBC) - INVALID_VALUE, // input 189 (0xBD) - INVALID_VALUE, // input 190 (0xBE) - INVALID_VALUE, // input 191 (0xBF) - INVALID_VALUE, // input 192 (0xC0) - INVALID_VALUE, // input 193 (0xC1) - INVALID_VALUE, // input 194 (0xC2) - INVALID_VALUE, // input 195 (0xC3) - INVALID_VALUE, // input 196 (0xC4) - INVALID_VALUE, // input 197 (0xC5) - INVALID_VALUE, // input 198 (0xC6) - INVALID_VALUE, // input 199 (0xC7) - INVALID_VALUE, // input 200 (0xC8) - INVALID_VALUE, // input 201 (0xC9) - INVALID_VALUE, // input 202 (0xCA) - INVALID_VALUE, // input 203 (0xCB) - INVALID_VALUE, // input 204 (0xCC) - INVALID_VALUE, // input 205 (0xCD) - INVALID_VALUE, // input 206 (0xCE) - INVALID_VALUE, // input 207 (0xCF) - INVALID_VALUE, // input 208 (0xD0) - INVALID_VALUE, // input 209 (0xD1) - INVALID_VALUE, // input 210 (0xD2) - INVALID_VALUE, // input 211 (0xD3) - INVALID_VALUE, // input 212 (0xD4) - INVALID_VALUE, // input 213 (0xD5) - INVALID_VALUE, // input 214 (0xD6) - INVALID_VALUE, // input 215 (0xD7) - INVALID_VALUE, // input 216 (0xD8) - INVALID_VALUE, // input 217 (0xD9) - INVALID_VALUE, // input 218 (0xDA) - INVALID_VALUE, // input 219 (0xDB) - INVALID_VALUE, // input 220 (0xDC) - INVALID_VALUE, // input 221 (0xDD) - INVALID_VALUE, // input 222 (0xDE) - INVALID_VALUE, // input 223 (0xDF) - INVALID_VALUE, // input 224 (0xE0) - INVALID_VALUE, // input 225 (0xE1) - INVALID_VALUE, // input 226 (0xE2) - INVALID_VALUE, // input 227 (0xE3) - INVALID_VALUE, // input 228 (0xE4) - INVALID_VALUE, // input 229 (0xE5) - INVALID_VALUE, // input 230 (0xE6) - INVALID_VALUE, // input 231 (0xE7) - INVALID_VALUE, // input 232 (0xE8) - INVALID_VALUE, // input 233 (0xE9) - INVALID_VALUE, // input 234 (0xEA) - INVALID_VALUE, // input 235 (0xEB) - INVALID_VALUE, // input 236 (0xEC) - INVALID_VALUE, // input 237 (0xED) - INVALID_VALUE, // input 238 (0xEE) - INVALID_VALUE, // input 239 (0xEF) - INVALID_VALUE, // input 240 (0xF0) - INVALID_VALUE, // input 241 (0xF1) - INVALID_VALUE, // input 242 (0xF2) - INVALID_VALUE, // input 243 (0xF3) - INVALID_VALUE, // input 244 (0xF4) - INVALID_VALUE, // input 245 (0xF5) - INVALID_VALUE, // input 246 (0xF6) - INVALID_VALUE, // input 247 (0xF7) - INVALID_VALUE, // input 248 (0xF8) - INVALID_VALUE, // input 249 (0xF9) - INVALID_VALUE, // input 250 (0xFA) - INVALID_VALUE, // input 251 (0xFB) - INVALID_VALUE, // input 252 (0xFC) - INVALID_VALUE, // input 253 (0xFD) - INVALID_VALUE, // input 254 (0xFE) - INVALID_VALUE, // input 255 (0xFF) -]; -#[rustfmt::skip] -pub const IMAP_MUTF7_ENCODE: &[u8; 64] = &[ - 65, // input 0 (0x0) => 'A' (0x41) - 66, // input 1 (0x1) => 'B' (0x42) - 67, // input 2 (0x2) => 'C' (0x43) - 68, // input 3 (0x3) => 'D' (0x44) - 69, // input 4 (0x4) => 'E' (0x45) - 70, // input 5 (0x5) => 'F' (0x46) - 71, // input 6 (0x6) => 'G' (0x47) - 72, // input 7 (0x7) => 'H' (0x48) - 73, // input 8 (0x8) => 'I' (0x49) - 74, // input 9 (0x9) => 'J' (0x4A) - 75, // input 10 (0xA) => 'K' (0x4B) - 76, // input 11 (0xB) => 'L' (0x4C) - 77, // input 12 (0xC) => 'M' (0x4D) - 78, // input 13 (0xD) => 'N' (0x4E) - 79, // input 14 (0xE) => 'O' (0x4F) - 80, // input 15 (0xF) => 'P' (0x50) - 81, // input 16 (0x10) => 'Q' (0x51) - 82, // input 17 (0x11) => 'R' (0x52) - 83, // input 18 (0x12) => 'S' (0x53) - 84, // input 19 (0x13) => 'T' (0x54) - 85, // input 20 (0x14) => 'U' (0x55) - 86, // input 21 (0x15) => 'V' (0x56) - 87, // input 22 (0x16) => 'W' (0x57) - 88, // input 23 (0x17) => 'X' (0x58) - 89, // input 24 (0x18) => 'Y' (0x59) - 90, // input 25 (0x19) => 'Z' (0x5A) - 97, // input 26 (0x1A) => 'a' (0x61) - 98, // input 27 (0x1B) => 'b' (0x62) - 99, // input 28 (0x1C) => 'c' (0x63) - 100, // input 29 (0x1D) => 'd' (0x64) - 101, // input 30 (0x1E) => 'e' (0x65) - 102, // input 31 (0x1F) => 'f' (0x66) - 103, // input 32 (0x20) => 'g' (0x67) - 104, // input 33 (0x21) => 'h' (0x68) - 105, // input 34 (0x22) => 'i' (0x69) - 106, // input 35 (0x23) => 'j' (0x6A) - 107, // input 36 (0x24) => 'k' (0x6B) - 108, // input 37 (0x25) => 'l' (0x6C) - 109, // input 38 (0x26) => 'm' (0x6D) - 110, // input 39 (0x27) => 'n' (0x6E) - 111, // input 40 (0x28) => 'o' (0x6F) - 112, // input 41 (0x29) => 'p' (0x70) - 113, // input 42 (0x2A) => 'q' (0x71) - 114, // input 43 (0x2B) => 'r' (0x72) - 115, // input 44 (0x2C) => 's' (0x73) - 116, // input 45 (0x2D) => 't' (0x74) - 117, // input 46 (0x2E) => 'u' (0x75) - 118, // input 47 (0x2F) => 'v' (0x76) - 119, // input 48 (0x30) => 'w' (0x77) - 120, // input 49 (0x31) => 'x' (0x78) - 121, // input 50 (0x32) => 'y' (0x79) - 122, // input 51 (0x33) => 'z' (0x7A) - 48, // input 52 (0x34) => '0' (0x30) - 49, // input 53 (0x35) => '1' (0x31) - 50, // input 54 (0x36) => '2' (0x32) - 51, // input 55 (0x37) => '3' (0x33) - 52, // input 56 (0x38) => '4' (0x34) - 53, // input 57 (0x39) => '5' (0x35) - 54, // input 58 (0x3A) => '6' (0x36) - 55, // input 59 (0x3B) => '7' (0x37) - 56, // input 60 (0x3C) => '8' (0x38) - 57, // input 61 (0x3D) => '9' (0x39) - 43, // input 62 (0x3E) => '+' (0x2B) - 44, // input 63 (0x3F) => ',' (0x2C) -]; -#[rustfmt::skip] -pub const IMAP_MUTF7_DECODE: &[u8; 256] = &[ - INVALID_VALUE, // input 0 (0x0) - INVALID_VALUE, // input 1 (0x1) - INVALID_VALUE, // input 2 (0x2) - INVALID_VALUE, // input 3 (0x3) - INVALID_VALUE, // input 4 (0x4) - INVALID_VALUE, // input 5 (0x5) - INVALID_VALUE, // input 6 (0x6) - INVALID_VALUE, // input 7 (0x7) - INVALID_VALUE, // input 8 (0x8) - INVALID_VALUE, // input 9 (0x9) - INVALID_VALUE, // input 10 (0xA) - INVALID_VALUE, // input 11 (0xB) - INVALID_VALUE, // input 12 (0xC) - INVALID_VALUE, // input 13 (0xD) - INVALID_VALUE, // input 14 (0xE) - INVALID_VALUE, // input 15 (0xF) - INVALID_VALUE, // input 16 (0x10) - INVALID_VALUE, // input 17 (0x11) - INVALID_VALUE, // input 18 (0x12) - INVALID_VALUE, // input 19 (0x13) - INVALID_VALUE, // input 20 (0x14) - INVALID_VALUE, // input 21 (0x15) - INVALID_VALUE, // input 22 (0x16) - INVALID_VALUE, // input 23 (0x17) - INVALID_VALUE, // input 24 (0x18) - INVALID_VALUE, // input 25 (0x19) - INVALID_VALUE, // input 26 (0x1A) - INVALID_VALUE, // input 27 (0x1B) - INVALID_VALUE, // input 28 (0x1C) - INVALID_VALUE, // input 29 (0x1D) - INVALID_VALUE, // input 30 (0x1E) - INVALID_VALUE, // input 31 (0x1F) - INVALID_VALUE, // input 32 (0x20) - INVALID_VALUE, // input 33 (0x21) - INVALID_VALUE, // input 34 (0x22) - INVALID_VALUE, // input 35 (0x23) - INVALID_VALUE, // input 36 (0x24) - INVALID_VALUE, // input 37 (0x25) - INVALID_VALUE, // input 38 (0x26) - INVALID_VALUE, // input 39 (0x27) - INVALID_VALUE, // input 40 (0x28) - INVALID_VALUE, // input 41 (0x29) - INVALID_VALUE, // input 42 (0x2A) - 62, // input 43 (0x2B char '+') => 62 (0x3E) - 63, // input 44 (0x2C char ',') => 63 (0x3F) - INVALID_VALUE, // input 45 (0x2D) - INVALID_VALUE, // input 46 (0x2E) - INVALID_VALUE, // input 47 (0x2F) - 52, // input 48 (0x30 char '0') => 52 (0x34) - 53, // input 49 (0x31 char '1') => 53 (0x35) - 54, // input 50 (0x32 char '2') => 54 (0x36) - 55, // input 51 (0x33 char '3') => 55 (0x37) - 56, // input 52 (0x34 char '4') => 56 (0x38) - 57, // input 53 (0x35 char '5') => 57 (0x39) - 58, // input 54 (0x36 char '6') => 58 (0x3A) - 59, // input 55 (0x37 char '7') => 59 (0x3B) - 60, // input 56 (0x38 char '8') => 60 (0x3C) - 61, // input 57 (0x39 char '9') => 61 (0x3D) - INVALID_VALUE, // input 58 (0x3A) - INVALID_VALUE, // input 59 (0x3B) - INVALID_VALUE, // input 60 (0x3C) - INVALID_VALUE, // input 61 (0x3D) - INVALID_VALUE, // input 62 (0x3E) - INVALID_VALUE, // input 63 (0x3F) - INVALID_VALUE, // input 64 (0x40) - 0, // input 65 (0x41 char 'A') => 0 (0x0) - 1, // input 66 (0x42 char 'B') => 1 (0x1) - 2, // input 67 (0x43 char 'C') => 2 (0x2) - 3, // input 68 (0x44 char 'D') => 3 (0x3) - 4, // input 69 (0x45 char 'E') => 4 (0x4) - 5, // input 70 (0x46 char 'F') => 5 (0x5) - 6, // input 71 (0x47 char 'G') => 6 (0x6) - 7, // input 72 (0x48 char 'H') => 7 (0x7) - 8, // input 73 (0x49 char 'I') => 8 (0x8) - 9, // input 74 (0x4A char 'J') => 9 (0x9) - 10, // input 75 (0x4B char 'K') => 10 (0xA) - 11, // input 76 (0x4C char 'L') => 11 (0xB) - 12, // input 77 (0x4D char 'M') => 12 (0xC) - 13, // input 78 (0x4E char 'N') => 13 (0xD) - 14, // input 79 (0x4F char 'O') => 14 (0xE) - 15, // input 80 (0x50 char 'P') => 15 (0xF) - 16, // input 81 (0x51 char 'Q') => 16 (0x10) - 17, // input 82 (0x52 char 'R') => 17 (0x11) - 18, // input 83 (0x53 char 'S') => 18 (0x12) - 19, // input 84 (0x54 char 'T') => 19 (0x13) - 20, // input 85 (0x55 char 'U') => 20 (0x14) - 21, // input 86 (0x56 char 'V') => 21 (0x15) - 22, // input 87 (0x57 char 'W') => 22 (0x16) - 23, // input 88 (0x58 char 'X') => 23 (0x17) - 24, // input 89 (0x59 char 'Y') => 24 (0x18) - 25, // input 90 (0x5A char 'Z') => 25 (0x19) - INVALID_VALUE, // input 91 (0x5B) - INVALID_VALUE, // input 92 (0x5C) - INVALID_VALUE, // input 93 (0x5D) - INVALID_VALUE, // input 94 (0x5E) - INVALID_VALUE, // input 95 (0x5F) - INVALID_VALUE, // input 96 (0x60) - 26, // input 97 (0x61 char 'a') => 26 (0x1A) - 27, // input 98 (0x62 char 'b') => 27 (0x1B) - 28, // input 99 (0x63 char 'c') => 28 (0x1C) - 29, // input 100 (0x64 char 'd') => 29 (0x1D) - 30, // input 101 (0x65 char 'e') => 30 (0x1E) - 31, // input 102 (0x66 char 'f') => 31 (0x1F) - 32, // input 103 (0x67 char 'g') => 32 (0x20) - 33, // input 104 (0x68 char 'h') => 33 (0x21) - 34, // input 105 (0x69 char 'i') => 34 (0x22) - 35, // input 106 (0x6A char 'j') => 35 (0x23) - 36, // input 107 (0x6B char 'k') => 36 (0x24) - 37, // input 108 (0x6C char 'l') => 37 (0x25) - 38, // input 109 (0x6D char 'm') => 38 (0x26) - 39, // input 110 (0x6E char 'n') => 39 (0x27) - 40, // input 111 (0x6F char 'o') => 40 (0x28) - 41, // input 112 (0x70 char 'p') => 41 (0x29) - 42, // input 113 (0x71 char 'q') => 42 (0x2A) - 43, // input 114 (0x72 char 'r') => 43 (0x2B) - 44, // input 115 (0x73 char 's') => 44 (0x2C) - 45, // input 116 (0x74 char 't') => 45 (0x2D) - 46, // input 117 (0x75 char 'u') => 46 (0x2E) - 47, // input 118 (0x76 char 'v') => 47 (0x2F) - 48, // input 119 (0x77 char 'w') => 48 (0x30) - 49, // input 120 (0x78 char 'x') => 49 (0x31) - 50, // input 121 (0x79 char 'y') => 50 (0x32) - 51, // input 122 (0x7A char 'z') => 51 (0x33) - INVALID_VALUE, // input 123 (0x7B) - INVALID_VALUE, // input 124 (0x7C) - INVALID_VALUE, // input 125 (0x7D) - INVALID_VALUE, // input 126 (0x7E) - INVALID_VALUE, // input 127 (0x7F) - INVALID_VALUE, // input 128 (0x80) - INVALID_VALUE, // input 129 (0x81) - INVALID_VALUE, // input 130 (0x82) - INVALID_VALUE, // input 131 (0x83) - INVALID_VALUE, // input 132 (0x84) - INVALID_VALUE, // input 133 (0x85) - INVALID_VALUE, // input 134 (0x86) - INVALID_VALUE, // input 135 (0x87) - INVALID_VALUE, // input 136 (0x88) - INVALID_VALUE, // input 137 (0x89) - INVALID_VALUE, // input 138 (0x8A) - INVALID_VALUE, // input 139 (0x8B) - INVALID_VALUE, // input 140 (0x8C) - INVALID_VALUE, // input 141 (0x8D) - INVALID_VALUE, // input 142 (0x8E) - INVALID_VALUE, // input 143 (0x8F) - INVALID_VALUE, // input 144 (0x90) - INVALID_VALUE, // input 145 (0x91) - INVALID_VALUE, // input 146 (0x92) - INVALID_VALUE, // input 147 (0x93) - INVALID_VALUE, // input 148 (0x94) - INVALID_VALUE, // input 149 (0x95) - INVALID_VALUE, // input 150 (0x96) - INVALID_VALUE, // input 151 (0x97) - INVALID_VALUE, // input 152 (0x98) - INVALID_VALUE, // input 153 (0x99) - INVALID_VALUE, // input 154 (0x9A) - INVALID_VALUE, // input 155 (0x9B) - INVALID_VALUE, // input 156 (0x9C) - INVALID_VALUE, // input 157 (0x9D) - INVALID_VALUE, // input 158 (0x9E) - INVALID_VALUE, // input 159 (0x9F) - INVALID_VALUE, // input 160 (0xA0) - INVALID_VALUE, // input 161 (0xA1) - INVALID_VALUE, // input 162 (0xA2) - INVALID_VALUE, // input 163 (0xA3) - INVALID_VALUE, // input 164 (0xA4) - INVALID_VALUE, // input 165 (0xA5) - INVALID_VALUE, // input 166 (0xA6) - INVALID_VALUE, // input 167 (0xA7) - INVALID_VALUE, // input 168 (0xA8) - INVALID_VALUE, // input 169 (0xA9) - INVALID_VALUE, // input 170 (0xAA) - INVALID_VALUE, // input 171 (0xAB) - INVALID_VALUE, // input 172 (0xAC) - INVALID_VALUE, // input 173 (0xAD) - INVALID_VALUE, // input 174 (0xAE) - INVALID_VALUE, // input 175 (0xAF) - INVALID_VALUE, // input 176 (0xB0) - INVALID_VALUE, // input 177 (0xB1) - INVALID_VALUE, // input 178 (0xB2) - INVALID_VALUE, // input 179 (0xB3) - INVALID_VALUE, // input 180 (0xB4) - INVALID_VALUE, // input 181 (0xB5) - INVALID_VALUE, // input 182 (0xB6) - INVALID_VALUE, // input 183 (0xB7) - INVALID_VALUE, // input 184 (0xB8) - INVALID_VALUE, // input 185 (0xB9) - INVALID_VALUE, // input 186 (0xBA) - INVALID_VALUE, // input 187 (0xBB) - INVALID_VALUE, // input 188 (0xBC) - INVALID_VALUE, // input 189 (0xBD) - INVALID_VALUE, // input 190 (0xBE) - INVALID_VALUE, // input 191 (0xBF) - INVALID_VALUE, // input 192 (0xC0) - INVALID_VALUE, // input 193 (0xC1) - INVALID_VALUE, // input 194 (0xC2) - INVALID_VALUE, // input 195 (0xC3) - INVALID_VALUE, // input 196 (0xC4) - INVALID_VALUE, // input 197 (0xC5) - INVALID_VALUE, // input 198 (0xC6) - INVALID_VALUE, // input 199 (0xC7) - INVALID_VALUE, // input 200 (0xC8) - INVALID_VALUE, // input 201 (0xC9) - INVALID_VALUE, // input 202 (0xCA) - INVALID_VALUE, // input 203 (0xCB) - INVALID_VALUE, // input 204 (0xCC) - INVALID_VALUE, // input 205 (0xCD) - INVALID_VALUE, // input 206 (0xCE) - INVALID_VALUE, // input 207 (0xCF) - INVALID_VALUE, // input 208 (0xD0) - INVALID_VALUE, // input 209 (0xD1) - INVALID_VALUE, // input 210 (0xD2) - INVALID_VALUE, // input 211 (0xD3) - INVALID_VALUE, // input 212 (0xD4) - INVALID_VALUE, // input 213 (0xD5) - INVALID_VALUE, // input 214 (0xD6) - INVALID_VALUE, // input 215 (0xD7) - INVALID_VALUE, // input 216 (0xD8) - INVALID_VALUE, // input 217 (0xD9) - INVALID_VALUE, // input 218 (0xDA) - INVALID_VALUE, // input 219 (0xDB) - INVALID_VALUE, // input 220 (0xDC) - INVALID_VALUE, // input 221 (0xDD) - INVALID_VALUE, // input 222 (0xDE) - INVALID_VALUE, // input 223 (0xDF) - INVALID_VALUE, // input 224 (0xE0) - INVALID_VALUE, // input 225 (0xE1) - INVALID_VALUE, // input 226 (0xE2) - INVALID_VALUE, // input 227 (0xE3) - INVALID_VALUE, // input 228 (0xE4) - INVALID_VALUE, // input 229 (0xE5) - INVALID_VALUE, // input 230 (0xE6) - INVALID_VALUE, // input 231 (0xE7) - INVALID_VALUE, // input 232 (0xE8) - INVALID_VALUE, // input 233 (0xE9) - INVALID_VALUE, // input 234 (0xEA) - INVALID_VALUE, // input 235 (0xEB) - INVALID_VALUE, // input 236 (0xEC) - INVALID_VALUE, // input 237 (0xED) - INVALID_VALUE, // input 238 (0xEE) - INVALID_VALUE, // input 239 (0xEF) - INVALID_VALUE, // input 240 (0xF0) - INVALID_VALUE, // input 241 (0xF1) - INVALID_VALUE, // input 242 (0xF2) - INVALID_VALUE, // input 243 (0xF3) - INVALID_VALUE, // input 244 (0xF4) - INVALID_VALUE, // input 245 (0xF5) - INVALID_VALUE, // input 246 (0xF6) - INVALID_VALUE, // input 247 (0xF7) - INVALID_VALUE, // input 248 (0xF8) - INVALID_VALUE, // input 249 (0xF9) - INVALID_VALUE, // input 250 (0xFA) - INVALID_VALUE, // input 251 (0xFB) - INVALID_VALUE, // input 252 (0xFC) - INVALID_VALUE, // input 253 (0xFD) - INVALID_VALUE, // input 254 (0xFE) - INVALID_VALUE, // input 255 (0xFF) -]; -#[rustfmt::skip] -pub const BINHEX_ENCODE: &[u8; 64] = &[ - 33, // input 0 (0x0) => '!' (0x21) - 34, // input 1 (0x1) => '"' (0x22) - 35, // input 2 (0x2) => '#' (0x23) - 36, // input 3 (0x3) => '$' (0x24) - 37, // input 4 (0x4) => '%' (0x25) - 38, // input 5 (0x5) => '&' (0x26) - 39, // input 6 (0x6) => ''' (0x27) - 40, // input 7 (0x7) => '(' (0x28) - 41, // input 8 (0x8) => ')' (0x29) - 42, // input 9 (0x9) => '*' (0x2A) - 43, // input 10 (0xA) => '+' (0x2B) - 44, // input 11 (0xB) => ',' (0x2C) - 45, // input 12 (0xC) => '-' (0x2D) - 48, // input 13 (0xD) => '0' (0x30) - 49, // input 14 (0xE) => '1' (0x31) - 50, // input 15 (0xF) => '2' (0x32) - 51, // input 16 (0x10) => '3' (0x33) - 52, // input 17 (0x11) => '4' (0x34) - 53, // input 18 (0x12) => '5' (0x35) - 54, // input 19 (0x13) => '6' (0x36) - 55, // input 20 (0x14) => '7' (0x37) - 56, // input 21 (0x15) => '8' (0x38) - 57, // input 22 (0x16) => '9' (0x39) - 64, // input 23 (0x17) => '@' (0x40) - 65, // input 24 (0x18) => 'A' (0x41) - 66, // input 25 (0x19) => 'B' (0x42) - 67, // input 26 (0x1A) => 'C' (0x43) - 68, // input 27 (0x1B) => 'D' (0x44) - 69, // input 28 (0x1C) => 'E' (0x45) - 70, // input 29 (0x1D) => 'F' (0x46) - 71, // input 30 (0x1E) => 'G' (0x47) - 72, // input 31 (0x1F) => 'H' (0x48) - 73, // input 32 (0x20) => 'I' (0x49) - 74, // input 33 (0x21) => 'J' (0x4A) - 75, // input 34 (0x22) => 'K' (0x4B) - 76, // input 35 (0x23) => 'L' (0x4C) - 77, // input 36 (0x24) => 'M' (0x4D) - 78, // input 37 (0x25) => 'N' (0x4E) - 80, // input 38 (0x26) => 'P' (0x50) - 81, // input 39 (0x27) => 'Q' (0x51) - 82, // input 40 (0x28) => 'R' (0x52) - 83, // input 41 (0x29) => 'S' (0x53) - 84, // input 42 (0x2A) => 'T' (0x54) - 85, // input 43 (0x2B) => 'U' (0x55) - 86, // input 44 (0x2C) => 'V' (0x56) - 88, // input 45 (0x2D) => 'X' (0x58) - 89, // input 46 (0x2E) => 'Y' (0x59) - 90, // input 47 (0x2F) => 'Z' (0x5A) - 91, // input 48 (0x30) => '[' (0x5B) - 96, // input 49 (0x31) => '`' (0x60) - 97, // input 50 (0x32) => 'a' (0x61) - 98, // input 51 (0x33) => 'b' (0x62) - 99, // input 52 (0x34) => 'c' (0x63) - 100, // input 53 (0x35) => 'd' (0x64) - 101, // input 54 (0x36) => 'e' (0x65) - 104, // input 55 (0x37) => 'h' (0x68) - 105, // input 56 (0x38) => 'i' (0x69) - 106, // input 57 (0x39) => 'j' (0x6A) - 107, // input 58 (0x3A) => 'k' (0x6B) - 108, // input 59 (0x3B) => 'l' (0x6C) - 109, // input 60 (0x3C) => 'm' (0x6D) - 112, // input 61 (0x3D) => 'p' (0x70) - 113, // input 62 (0x3E) => 'q' (0x71) - 114, // input 63 (0x3F) => 'r' (0x72) -]; -#[rustfmt::skip] -pub const BINHEX_DECODE: &[u8; 256] = &[ - INVALID_VALUE, // input 0 (0x0) - INVALID_VALUE, // input 1 (0x1) - INVALID_VALUE, // input 2 (0x2) - INVALID_VALUE, // input 3 (0x3) - INVALID_VALUE, // input 4 (0x4) - INVALID_VALUE, // input 5 (0x5) - INVALID_VALUE, // input 6 (0x6) - INVALID_VALUE, // input 7 (0x7) - INVALID_VALUE, // input 8 (0x8) - INVALID_VALUE, // input 9 (0x9) - INVALID_VALUE, // input 10 (0xA) - INVALID_VALUE, // input 11 (0xB) - INVALID_VALUE, // input 12 (0xC) - INVALID_VALUE, // input 13 (0xD) - INVALID_VALUE, // input 14 (0xE) - INVALID_VALUE, // input 15 (0xF) - INVALID_VALUE, // input 16 (0x10) - INVALID_VALUE, // input 17 (0x11) - INVALID_VALUE, // input 18 (0x12) - INVALID_VALUE, // input 19 (0x13) - INVALID_VALUE, // input 20 (0x14) - INVALID_VALUE, // input 21 (0x15) - INVALID_VALUE, // input 22 (0x16) - INVALID_VALUE, // input 23 (0x17) - INVALID_VALUE, // input 24 (0x18) - INVALID_VALUE, // input 25 (0x19) - INVALID_VALUE, // input 26 (0x1A) - INVALID_VALUE, // input 27 (0x1B) - INVALID_VALUE, // input 28 (0x1C) - INVALID_VALUE, // input 29 (0x1D) - INVALID_VALUE, // input 30 (0x1E) - INVALID_VALUE, // input 31 (0x1F) - INVALID_VALUE, // input 32 (0x20) - 0, // input 33 (0x21 char '!') => 0 (0x0) - 1, // input 34 (0x22 char '"') => 1 (0x1) - 2, // input 35 (0x23 char '#') => 2 (0x2) - 3, // input 36 (0x24 char '$') => 3 (0x3) - 4, // input 37 (0x25 char '%') => 4 (0x4) - 5, // input 38 (0x26 char '&') => 5 (0x5) - 6, // input 39 (0x27 char ''') => 6 (0x6) - 7, // input 40 (0x28 char '(') => 7 (0x7) - 8, // input 41 (0x29 char ')') => 8 (0x8) - 9, // input 42 (0x2A char '*') => 9 (0x9) - 10, // input 43 (0x2B char '+') => 10 (0xA) - 11, // input 44 (0x2C char ',') => 11 (0xB) - 12, // input 45 (0x2D char '-') => 12 (0xC) - INVALID_VALUE, // input 46 (0x2E) - INVALID_VALUE, // input 47 (0x2F) - 13, // input 48 (0x30 char '0') => 13 (0xD) - 14, // input 49 (0x31 char '1') => 14 (0xE) - 15, // input 50 (0x32 char '2') => 15 (0xF) - 16, // input 51 (0x33 char '3') => 16 (0x10) - 17, // input 52 (0x34 char '4') => 17 (0x11) - 18, // input 53 (0x35 char '5') => 18 (0x12) - 19, // input 54 (0x36 char '6') => 19 (0x13) - 20, // input 55 (0x37 char '7') => 20 (0x14) - 21, // input 56 (0x38 char '8') => 21 (0x15) - 22, // input 57 (0x39 char '9') => 22 (0x16) - INVALID_VALUE, // input 58 (0x3A) - INVALID_VALUE, // input 59 (0x3B) - INVALID_VALUE, // input 60 (0x3C) - INVALID_VALUE, // input 61 (0x3D) - INVALID_VALUE, // input 62 (0x3E) - INVALID_VALUE, // input 63 (0x3F) - 23, // input 64 (0x40 char '@') => 23 (0x17) - 24, // input 65 (0x41 char 'A') => 24 (0x18) - 25, // input 66 (0x42 char 'B') => 25 (0x19) - 26, // input 67 (0x43 char 'C') => 26 (0x1A) - 27, // input 68 (0x44 char 'D') => 27 (0x1B) - 28, // input 69 (0x45 char 'E') => 28 (0x1C) - 29, // input 70 (0x46 char 'F') => 29 (0x1D) - 30, // input 71 (0x47 char 'G') => 30 (0x1E) - 31, // input 72 (0x48 char 'H') => 31 (0x1F) - 32, // input 73 (0x49 char 'I') => 32 (0x20) - 33, // input 74 (0x4A char 'J') => 33 (0x21) - 34, // input 75 (0x4B char 'K') => 34 (0x22) - 35, // input 76 (0x4C char 'L') => 35 (0x23) - 36, // input 77 (0x4D char 'M') => 36 (0x24) - 37, // input 78 (0x4E char 'N') => 37 (0x25) - INVALID_VALUE, // input 79 (0x4F) - 38, // input 80 (0x50 char 'P') => 38 (0x26) - 39, // input 81 (0x51 char 'Q') => 39 (0x27) - 40, // input 82 (0x52 char 'R') => 40 (0x28) - 41, // input 83 (0x53 char 'S') => 41 (0x29) - 42, // input 84 (0x54 char 'T') => 42 (0x2A) - 43, // input 85 (0x55 char 'U') => 43 (0x2B) - 44, // input 86 (0x56 char 'V') => 44 (0x2C) - INVALID_VALUE, // input 87 (0x57) - 45, // input 88 (0x58 char 'X') => 45 (0x2D) - 46, // input 89 (0x59 char 'Y') => 46 (0x2E) - 47, // input 90 (0x5A char 'Z') => 47 (0x2F) - 48, // input 91 (0x5B char '[') => 48 (0x30) - INVALID_VALUE, // input 92 (0x5C) - INVALID_VALUE, // input 93 (0x5D) - INVALID_VALUE, // input 94 (0x5E) - INVALID_VALUE, // input 95 (0x5F) - 49, // input 96 (0x60 char '`') => 49 (0x31) - 50, // input 97 (0x61 char 'a') => 50 (0x32) - 51, // input 98 (0x62 char 'b') => 51 (0x33) - 52, // input 99 (0x63 char 'c') => 52 (0x34) - 53, // input 100 (0x64 char 'd') => 53 (0x35) - 54, // input 101 (0x65 char 'e') => 54 (0x36) - INVALID_VALUE, // input 102 (0x66) - INVALID_VALUE, // input 103 (0x67) - 55, // input 104 (0x68 char 'h') => 55 (0x37) - 56, // input 105 (0x69 char 'i') => 56 (0x38) - 57, // input 106 (0x6A char 'j') => 57 (0x39) - 58, // input 107 (0x6B char 'k') => 58 (0x3A) - 59, // input 108 (0x6C char 'l') => 59 (0x3B) - 60, // input 109 (0x6D char 'm') => 60 (0x3C) - INVALID_VALUE, // input 110 (0x6E) - INVALID_VALUE, // input 111 (0x6F) - 61, // input 112 (0x70 char 'p') => 61 (0x3D) - 62, // input 113 (0x71 char 'q') => 62 (0x3E) - 63, // input 114 (0x72 char 'r') => 63 (0x3F) - INVALID_VALUE, // input 115 (0x73) - INVALID_VALUE, // input 116 (0x74) - INVALID_VALUE, // input 117 (0x75) - INVALID_VALUE, // input 118 (0x76) - INVALID_VALUE, // input 119 (0x77) - INVALID_VALUE, // input 120 (0x78) - INVALID_VALUE, // input 121 (0x79) - INVALID_VALUE, // input 122 (0x7A) - INVALID_VALUE, // input 123 (0x7B) - INVALID_VALUE, // input 124 (0x7C) - INVALID_VALUE, // input 125 (0x7D) - INVALID_VALUE, // input 126 (0x7E) - INVALID_VALUE, // input 127 (0x7F) - INVALID_VALUE, // input 128 (0x80) - INVALID_VALUE, // input 129 (0x81) - INVALID_VALUE, // input 130 (0x82) - INVALID_VALUE, // input 131 (0x83) - INVALID_VALUE, // input 132 (0x84) - INVALID_VALUE, // input 133 (0x85) - INVALID_VALUE, // input 134 (0x86) - INVALID_VALUE, // input 135 (0x87) - INVALID_VALUE, // input 136 (0x88) - INVALID_VALUE, // input 137 (0x89) - INVALID_VALUE, // input 138 (0x8A) - INVALID_VALUE, // input 139 (0x8B) - INVALID_VALUE, // input 140 (0x8C) - INVALID_VALUE, // input 141 (0x8D) - INVALID_VALUE, // input 142 (0x8E) - INVALID_VALUE, // input 143 (0x8F) - INVALID_VALUE, // input 144 (0x90) - INVALID_VALUE, // input 145 (0x91) - INVALID_VALUE, // input 146 (0x92) - INVALID_VALUE, // input 147 (0x93) - INVALID_VALUE, // input 148 (0x94) - INVALID_VALUE, // input 149 (0x95) - INVALID_VALUE, // input 150 (0x96) - INVALID_VALUE, // input 151 (0x97) - INVALID_VALUE, // input 152 (0x98) - INVALID_VALUE, // input 153 (0x99) - INVALID_VALUE, // input 154 (0x9A) - INVALID_VALUE, // input 155 (0x9B) - INVALID_VALUE, // input 156 (0x9C) - INVALID_VALUE, // input 157 (0x9D) - INVALID_VALUE, // input 158 (0x9E) - INVALID_VALUE, // input 159 (0x9F) - INVALID_VALUE, // input 160 (0xA0) - INVALID_VALUE, // input 161 (0xA1) - INVALID_VALUE, // input 162 (0xA2) - INVALID_VALUE, // input 163 (0xA3) - INVALID_VALUE, // input 164 (0xA4) - INVALID_VALUE, // input 165 (0xA5) - INVALID_VALUE, // input 166 (0xA6) - INVALID_VALUE, // input 167 (0xA7) - INVALID_VALUE, // input 168 (0xA8) - INVALID_VALUE, // input 169 (0xA9) - INVALID_VALUE, // input 170 (0xAA) - INVALID_VALUE, // input 171 (0xAB) - INVALID_VALUE, // input 172 (0xAC) - INVALID_VALUE, // input 173 (0xAD) - INVALID_VALUE, // input 174 (0xAE) - INVALID_VALUE, // input 175 (0xAF) - INVALID_VALUE, // input 176 (0xB0) - INVALID_VALUE, // input 177 (0xB1) - INVALID_VALUE, // input 178 (0xB2) - INVALID_VALUE, // input 179 (0xB3) - INVALID_VALUE, // input 180 (0xB4) - INVALID_VALUE, // input 181 (0xB5) - INVALID_VALUE, // input 182 (0xB6) - INVALID_VALUE, // input 183 (0xB7) - INVALID_VALUE, // input 184 (0xB8) - INVALID_VALUE, // input 185 (0xB9) - INVALID_VALUE, // input 186 (0xBA) - INVALID_VALUE, // input 187 (0xBB) - INVALID_VALUE, // input 188 (0xBC) - INVALID_VALUE, // input 189 (0xBD) - INVALID_VALUE, // input 190 (0xBE) - INVALID_VALUE, // input 191 (0xBF) - INVALID_VALUE, // input 192 (0xC0) - INVALID_VALUE, // input 193 (0xC1) - INVALID_VALUE, // input 194 (0xC2) - INVALID_VALUE, // input 195 (0xC3) - INVALID_VALUE, // input 196 (0xC4) - INVALID_VALUE, // input 197 (0xC5) - INVALID_VALUE, // input 198 (0xC6) - INVALID_VALUE, // input 199 (0xC7) - INVALID_VALUE, // input 200 (0xC8) - INVALID_VALUE, // input 201 (0xC9) - INVALID_VALUE, // input 202 (0xCA) - INVALID_VALUE, // input 203 (0xCB) - INVALID_VALUE, // input 204 (0xCC) - INVALID_VALUE, // input 205 (0xCD) - INVALID_VALUE, // input 206 (0xCE) - INVALID_VALUE, // input 207 (0xCF) - INVALID_VALUE, // input 208 (0xD0) - INVALID_VALUE, // input 209 (0xD1) - INVALID_VALUE, // input 210 (0xD2) - INVALID_VALUE, // input 211 (0xD3) - INVALID_VALUE, // input 212 (0xD4) - INVALID_VALUE, // input 213 (0xD5) - INVALID_VALUE, // input 214 (0xD6) - INVALID_VALUE, // input 215 (0xD7) - INVALID_VALUE, // input 216 (0xD8) - INVALID_VALUE, // input 217 (0xD9) - INVALID_VALUE, // input 218 (0xDA) - INVALID_VALUE, // input 219 (0xDB) - INVALID_VALUE, // input 220 (0xDC) - INVALID_VALUE, // input 221 (0xDD) - INVALID_VALUE, // input 222 (0xDE) - INVALID_VALUE, // input 223 (0xDF) - INVALID_VALUE, // input 224 (0xE0) - INVALID_VALUE, // input 225 (0xE1) - INVALID_VALUE, // input 226 (0xE2) - INVALID_VALUE, // input 227 (0xE3) - INVALID_VALUE, // input 228 (0xE4) - INVALID_VALUE, // input 229 (0xE5) - INVALID_VALUE, // input 230 (0xE6) - INVALID_VALUE, // input 231 (0xE7) - INVALID_VALUE, // input 232 (0xE8) - INVALID_VALUE, // input 233 (0xE9) - INVALID_VALUE, // input 234 (0xEA) - INVALID_VALUE, // input 235 (0xEB) - INVALID_VALUE, // input 236 (0xEC) - INVALID_VALUE, // input 237 (0xED) - INVALID_VALUE, // input 238 (0xEE) - INVALID_VALUE, // input 239 (0xEF) - INVALID_VALUE, // input 240 (0xF0) - INVALID_VALUE, // input 241 (0xF1) - INVALID_VALUE, // input 242 (0xF2) - INVALID_VALUE, // input 243 (0xF3) - INVALID_VALUE, // input 244 (0xF4) - INVALID_VALUE, // input 245 (0xF5) - INVALID_VALUE, // input 246 (0xF6) - INVALID_VALUE, // input 247 (0xF7) - INVALID_VALUE, // input 248 (0xF8) - INVALID_VALUE, // input 249 (0xF9) - INVALID_VALUE, // input 250 (0xFA) - INVALID_VALUE, // input 251 (0xFB) - INVALID_VALUE, // input 252 (0xFC) - INVALID_VALUE, // input 253 (0xFD) - INVALID_VALUE, // input 254 (0xFE) - INVALID_VALUE, // input 255 (0xFF) -]; diff --git a/src/tests.rs b/src/tests.rs index 88748de..7083b54 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -1,11 +1,19 @@ -use crate::{decode_config, encode::encoded_size, encode_config_buf, CharacterSet, Config}; - use std::str; use rand::{ - distributions::{Distribution, Uniform}, + distributions, + distributions::{Distribution as _, Uniform}, seq::SliceRandom, - FromEntropy, Rng, + Rng, SeedableRng, +}; + +use crate::{ + alphabet, + encode::encoded_len, + engine::{ + general_purpose::{GeneralPurpose, GeneralPurposeConfig}, + Config, DecodePaddingMode, Engine, + }, }; #[test] @@ -19,10 +27,10 @@ fn roundtrip_random_config_long() { roundtrip_random_config(Uniform::new(0, 1000), 10_000); } -pub fn assert_encode_sanity(encoded: &str, config: Config, input_len: usize) { +pub fn assert_encode_sanity(encoded: &str, padded: bool, input_len: usize) { let input_rem = input_len % 3; let expected_padding_len = if input_rem > 0 { - if config.pad { + if padded { 3 - input_rem } else { 0 @@ -31,7 +39,7 @@ pub fn assert_encode_sanity(encoded: &str, config: Config, input_len: usize) { 0 }; - let expected_encoded_len = encoded_size(input_len, config).unwrap(); + let expected_encoded_len = encoded_len(input_len, padded).unwrap(); assert_eq!(expected_encoded_len, encoded.len()); @@ -53,29 +61,57 @@ fn roundtrip_random_config(input_len_range: Uniform<usize>, iterations: u32) { let input_len = input_len_range.sample(&mut rng); - let config = random_config(&mut rng); + let engine = random_engine(&mut rng); for _ in 0..input_len { input_buf.push(rng.gen()); } - encode_config_buf(&input_buf, config, &mut encoded_buf); + engine.encode_string(&input_buf, &mut encoded_buf); + + assert_encode_sanity(&encoded_buf, engine.config().encode_padding(), input_len); + + assert_eq!(input_buf, engine.decode(&encoded_buf).unwrap()); + } +} - assert_encode_sanity(&encoded_buf, config, input_len); +pub fn random_config<R: Rng>(rng: &mut R) -> GeneralPurposeConfig { + let mode = rng.gen(); + GeneralPurposeConfig::new() + .with_encode_padding(match mode { + DecodePaddingMode::Indifferent => rng.gen(), + DecodePaddingMode::RequireCanonical => true, + DecodePaddingMode::RequireNone => false, + }) + .with_decode_padding_mode(mode) + .with_decode_allow_trailing_bits(rng.gen()) +} - assert_eq!(input_buf, decode_config(&encoded_buf, config).unwrap()); +impl distributions::Distribution<DecodePaddingMode> for distributions::Standard { + fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> DecodePaddingMode { + match rng.gen_range(0..=2) { + 0 => DecodePaddingMode::Indifferent, + 1 => DecodePaddingMode::RequireCanonical, + _ => DecodePaddingMode::RequireNone, + } } } -pub fn random_config<R: Rng>(rng: &mut R) -> Config { - const CHARSETS: &[CharacterSet] = &[ - CharacterSet::UrlSafe, - CharacterSet::Standard, - CharacterSet::Crypt, - CharacterSet::ImapMutf7, - CharacterSet::BinHex, - ]; - let charset = *CHARSETS.choose(rng).unwrap(); - - Config::new(charset, rng.gen()) +pub fn random_alphabet<R: Rng>(rng: &mut R) -> &'static alphabet::Alphabet { + ALPHABETS.choose(rng).unwrap() } + +pub fn random_engine<R: Rng>(rng: &mut R) -> GeneralPurpose { + let alphabet = random_alphabet(rng); + let config = random_config(rng); + GeneralPurpose::new(alphabet, config) +} + +const ALPHABETS: &[alphabet::Alphabet] = &[ + alphabet::URL_SAFE, + alphabet::STANDARD, + alphabet::CRYPT, + alphabet::BCRYPT, + alphabet::IMAP_MUTF7, + alphabet::BIN_HEX, +]; diff --git a/src/write/encoder.rs b/src/write/encoder.rs index 4bb57eb..1c19bb4 100644 --- a/src/write/encoder.rs +++ b/src/write/encoder.rs @@ -1,8 +1,7 @@ -use crate::encode::encode_to_slice; -use crate::{encode_config_slice, Config}; +use crate::engine::Engine; use std::{ - cmp, fmt, - io::{ErrorKind, Result, Write}, + cmp, fmt, io, + io::{ErrorKind, Result}, }; pub(crate) const BUF_SIZE: usize = 1024; @@ -23,9 +22,10 @@ const MIN_ENCODE_CHUNK_SIZE: usize = 3; /// /// ``` /// use std::io::Write; +/// use base64::engine::general_purpose; /// /// // use a vec as the simplest possible `Write` -- in real code this is probably a file, etc. -/// let mut enc = base64::write::EncoderWriter::new(Vec::new(), base64::STANDARD); +/// let mut enc = base64::write::EncoderWriter::new(Vec::new(), &general_purpose::STANDARD); /// /// // handle errors as you normally would /// enc.write_all(b"asdf").unwrap(); @@ -53,8 +53,15 @@ const MIN_ENCODE_CHUNK_SIZE: usize = 3; /// /// It has some minor performance loss compared to encoding slices (a couple percent). /// It does not do any heap allocation. -pub struct EncoderWriter<W: Write> { - config: Config, +/// +/// # Limitations +/// +/// Owing to the specification of the `write` and `flush` methods on the `Write` trait and their +/// implications for a buffering implementation, these methods may not behave as expected. In +/// particular, calling `write_all` on this interface may fail with `io::ErrorKind::WriteZero`. +/// See the documentation of the `Write` trait implementation for further details. +pub struct EncoderWriter<'e, E: Engine, W: io::Write> { + engine: &'e E, /// Where encoded data is written to. It's an Option as it's None immediately before Drop is /// called so that finish() can return the underlying writer. None implies that finish() has /// been called successfully. @@ -73,7 +80,7 @@ pub struct EncoderWriter<W: Write> { panicked: bool, } -impl<W: Write> fmt::Debug for EncoderWriter<W> { +impl<'e, E: Engine, W: io::Write> fmt::Debug for EncoderWriter<'e, E, W> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!( f, @@ -86,12 +93,12 @@ impl<W: Write> fmt::Debug for EncoderWriter<W> { } } -impl<W: Write> EncoderWriter<W> { - /// Create a new encoder that will write to the provided delegate writer `w`. - pub fn new(w: W, config: Config) -> EncoderWriter<W> { +impl<'e, E: Engine, W: io::Write> EncoderWriter<'e, E, W> { + /// Create a new encoder that will write to the provided delegate writer. + pub fn new(delegate: W, engine: &'e E) -> EncoderWriter<'e, E, W> { EncoderWriter { - config, - delegate: Some(w), + engine, + delegate: Some(delegate), extra_input: [0u8; MIN_ENCODE_CHUNK_SIZE], extra_input_occupied_len: 0, output: [0u8; BUF_SIZE], @@ -120,7 +127,7 @@ impl<W: Write> EncoderWriter<W> { // If we could consume self in finish(), we wouldn't have to worry about this case, but // finish() is retryable in the face of I/O errors, so we can't consume here. if self.delegate.is_none() { - panic!("Encoder has already had finish() called") + panic!("Encoder has already had finish() called"); }; self.write_final_leftovers()?; @@ -141,11 +148,13 @@ impl<W: Write> EncoderWriter<W> { self.write_all_encoded_output()?; if self.extra_input_occupied_len > 0 { - let encoded_len = encode_config_slice( - &self.extra_input[..self.extra_input_occupied_len], - self.config, - &mut self.output[..], - ); + let encoded_len = self + .engine + .encode_slice( + &self.extra_input[..self.extra_input_occupied_len], + &mut self.output[..], + ) + .expect("buffer is large enough"); self.output_occupied_len = encoded_len; @@ -182,7 +191,7 @@ impl<W: Write> EncoderWriter<W> { self.output_occupied_len = current_output_len.checked_sub(consumed).unwrap(); // If we're blocking on I/O, the minor inefficiency of copying bytes to the // start of the buffer is the least of our concerns... - // Rotate moves more than we need to, but copy_within isn't stabilized yet. + // TODO Rotate moves more than we need to; copy_within now stable. self.output.rotate_left(consumed); } else { self.output_occupied_len = 0; @@ -215,9 +224,28 @@ impl<W: Write> EncoderWriter<W> { debug_assert_eq!(0, self.output_occupied_len); Ok(()) } + + /// Unwraps this `EncoderWriter`, returning the base writer it writes base64 encoded output + /// to. + /// + /// Normally this method should not be needed, since `finish()` returns the inner writer if + /// it completes successfully. That will also ensure all data has been flushed, which the + /// `into_inner()` function does *not* do. + /// + /// Calling this method after `finish()` has completed successfully will panic, since the + /// writer has already been returned. + /// + /// This method may be useful if the writer implements additional APIs beyond the `Write` + /// trait. Note that the inner writer might be in an error state or have an incomplete + /// base64 string written to it. + pub fn into_inner(mut self) -> W { + self.delegate + .take() + .expect("Encoder has already had finish() called") + } } -impl<W: Write> Write for EncoderWriter<W> { +impl<'e, E: Engine, W: io::Write> io::Write for EncoderWriter<'e, E, W> { /// Encode input and then write to the delegate writer. /// /// Under non-error circumstances, this returns `Ok` with the value being the number of bytes @@ -286,10 +314,9 @@ impl<W: Write> Write for EncoderWriter<W> { self.extra_input[self.extra_input_occupied_len..MIN_ENCODE_CHUNK_SIZE] .copy_from_slice(&input[0..extra_input_read_len]); - let len = encode_to_slice( + let len = self.engine.internal_encode( &self.extra_input[0..MIN_ENCODE_CHUNK_SIZE], &mut self.output[..], - self.config.char_set.encode_table(), ); debug_assert_eq!(4, len); @@ -335,10 +362,9 @@ impl<W: Write> Write for EncoderWriter<W> { debug_assert_eq!(0, max_input_len % MIN_ENCODE_CHUNK_SIZE); debug_assert_eq!(0, input_chunks_to_encode_len % MIN_ENCODE_CHUNK_SIZE); - encoded_size += encode_to_slice( + encoded_size += self.engine.internal_encode( &input[..(input_chunks_to_encode_len)], &mut self.output[encoded_size..], - self.config.char_set.encode_table(), ); // not updating `self.output_occupied_len` here because if the below write fails, it should @@ -371,7 +397,7 @@ impl<W: Write> Write for EncoderWriter<W> { } } -impl<W: Write> Drop for EncoderWriter<W> { +impl<'e, E: Engine, W: io::Write> Drop for EncoderWriter<'e, E, W> { fn drop(&mut self) { if !self.panicked { // like `BufWriter`, ignore errors during drop diff --git a/src/write/encoder_string_writer.rs b/src/write/encoder_string_writer.rs index 58b1c0a..9394dc9 100644 --- a/src/write/encoder_string_writer.rs +++ b/src/write/encoder_string_writer.rs @@ -1,10 +1,10 @@ use super::encoder::EncoderWriter; -use crate::Config; +use crate::engine::Engine; use std::io; -use std::io::Write; /// A `Write` implementation that base64-encodes data using the provided config and accumulates the -/// resulting base64 in memory, which is then exposed as a String via `into_inner()`. +/// resulting base64 utf8 `&str` in a [StrConsumer] implementation (typically `String`), which is +/// then exposed via `into_inner()`. /// /// # Examples /// @@ -12,8 +12,9 @@ use std::io::Write; /// /// ``` /// use std::io::Write; +/// use base64::engine::general_purpose; /// -/// let mut enc = base64::write::EncoderStringWriter::new(base64::STANDARD); +/// let mut enc = base64::write::EncoderStringWriter::new(&general_purpose::STANDARD); /// /// enc.write_all(b"asdf").unwrap(); /// @@ -23,14 +24,17 @@ use std::io::Write; /// assert_eq!("YXNkZg==", &b64_string); /// ``` /// -/// Or, append to an existing String: +/// Or, append to an existing `String`, which implements `StrConsumer`: /// /// ``` /// use std::io::Write; +/// use base64::engine::general_purpose; /// /// let mut buf = String::from("base64: "); /// -/// let mut enc = base64::write::EncoderStringWriter::from(&mut buf, base64::STANDARD); +/// let mut enc = base64::write::EncoderStringWriter::from_consumer( +/// &mut buf, +/// &general_purpose::STANDARD); /// /// enc.write_all(b"asdf").unwrap(); /// @@ -49,40 +53,38 @@ use std::io::Write; /// /// Because it has to validate that the base64 is UTF-8, it is about 80% as fast as writing plain /// bytes to a `io::Write`. -pub struct EncoderStringWriter<S: StrConsumer> { - encoder: EncoderWriter<Utf8SingleCodeUnitWriter<S>>, +pub struct EncoderStringWriter<'e, E: Engine, S: StrConsumer> { + encoder: EncoderWriter<'e, E, Utf8SingleCodeUnitWriter<S>>, } -impl<S: StrConsumer> EncoderStringWriter<S> { +impl<'e, E: Engine, S: StrConsumer> EncoderStringWriter<'e, E, S> { /// Create a EncoderStringWriter that will append to the provided `StrConsumer`. - pub fn from(str_consumer: S, config: Config) -> Self { + pub fn from_consumer(str_consumer: S, engine: &'e E) -> Self { EncoderStringWriter { - encoder: EncoderWriter::new(Utf8SingleCodeUnitWriter { str_consumer }, config), + encoder: EncoderWriter::new(Utf8SingleCodeUnitWriter { str_consumer }, engine), } } /// Encode all remaining buffered data, including any trailing incomplete input triples and /// associated padding. /// - /// Once this succeeds, no further writes or calls to this method are allowed. - /// /// Returns the base64-encoded form of the accumulated written data. pub fn into_inner(mut self) -> S { self.encoder .finish() - .expect("Writing to a Vec<u8> should never fail") + .expect("Writing to a consumer should never fail") .str_consumer } } -impl EncoderStringWriter<String> { - /// Create a EncoderStringWriter that will encode into a new String with the provided config. - pub fn new(config: Config) -> Self { - EncoderStringWriter::from(String::new(), config) +impl<'e, E: Engine> EncoderStringWriter<'e, E, String> { + /// Create a EncoderStringWriter that will encode into a new `String` with the provided config. + pub fn new(engine: &'e E) -> Self { + EncoderStringWriter::from_consumer(String::new(), engine) } } -impl<S: StrConsumer> Write for EncoderStringWriter<S> { +impl<'e, E: Engine, S: StrConsumer> io::Write for EncoderStringWriter<'e, E, S> { fn write(&mut self, buf: &[u8]) -> io::Result<usize> { self.encoder.write(buf) } @@ -101,14 +103,14 @@ pub trait StrConsumer { /// As for io::Write, `StrConsumer` is implemented automatically for `&mut S`. impl<S: StrConsumer + ?Sized> StrConsumer for &mut S { fn consume(&mut self, buf: &str) { - (**self).consume(buf) + (**self).consume(buf); } } /// Pushes the str onto the end of the String impl StrConsumer for String { fn consume(&mut self, buf: &str) { - self.push_str(buf) + self.push_str(buf); } } @@ -138,9 +140,9 @@ impl<S: StrConsumer> io::Write for Utf8SingleCodeUnitWriter<S> { #[cfg(test)] mod tests { - use crate::encode_config_buf; - use crate::tests::random_config; - use crate::write::encoder_string_writer::EncoderStringWriter; + use crate::{ + engine::Engine, tests::random_engine, write::encoder_string_writer::EncoderStringWriter, + }; use rand::Rng; use std::io::Write; @@ -160,10 +162,10 @@ mod tests { orig_data.push(rng.gen()); } - let config = random_config(&mut rng); - encode_config_buf(&orig_data, config, &mut normal_encoded); + let engine = random_engine(&mut rng); + engine.encode_string(&orig_data, &mut normal_encoded); - let mut stream_encoder = EncoderStringWriter::new(config); + let mut stream_encoder = EncoderStringWriter::new(&engine); // Write the first i bytes, then the rest stream_encoder.write_all(&orig_data[0..i]).unwrap(); stream_encoder.write_all(&orig_data[i..]).unwrap(); diff --git a/src/write/encoder_tests.rs b/src/write/encoder_tests.rs index 09b4d3a..ce76d63 100644 --- a/src/write/encoder_tests.rs +++ b/src/write/encoder_tests.rs @@ -1,29 +1,39 @@ -use super::EncoderWriter; -use crate::tests::random_config; -use crate::{encode_config, encode_config_buf, STANDARD_NO_PAD, URL_SAFE}; - use std::io::{Cursor, Write}; use std::{cmp, io, str}; use rand::Rng; +use crate::{ + alphabet::{STANDARD, URL_SAFE}, + engine::{ + general_purpose::{GeneralPurpose, NO_PAD, PAD}, + Engine, + }, + tests::random_engine, +}; + +use super::EncoderWriter; + +const URL_SAFE_ENGINE: GeneralPurpose = GeneralPurpose::new(&URL_SAFE, PAD); +const NO_PAD_ENGINE: GeneralPurpose = GeneralPurpose::new(&STANDARD, NO_PAD); + #[test] fn encode_three_bytes() { let mut c = Cursor::new(Vec::new()); { - let mut enc = EncoderWriter::new(&mut c, URL_SAFE); + let mut enc = EncoderWriter::new(&mut c, &URL_SAFE_ENGINE); let sz = enc.write(b"abc").unwrap(); assert_eq!(sz, 3); } - assert_eq!(&c.get_ref()[..], encode_config("abc", URL_SAFE).as_bytes()); + assert_eq!(&c.get_ref()[..], URL_SAFE_ENGINE.encode("abc").as_bytes()); } #[test] fn encode_nine_bytes_two_writes() { let mut c = Cursor::new(Vec::new()); { - let mut enc = EncoderWriter::new(&mut c, URL_SAFE); + let mut enc = EncoderWriter::new(&mut c, &URL_SAFE_ENGINE); let sz = enc.write(b"abcdef").unwrap(); assert_eq!(sz, 6); @@ -32,7 +42,7 @@ fn encode_nine_bytes_two_writes() { } assert_eq!( &c.get_ref()[..], - encode_config("abcdefghi", URL_SAFE).as_bytes() + URL_SAFE_ENGINE.encode("abcdefghi").as_bytes() ); } @@ -40,21 +50,21 @@ fn encode_nine_bytes_two_writes() { fn encode_one_then_two_bytes() { let mut c = Cursor::new(Vec::new()); { - let mut enc = EncoderWriter::new(&mut c, URL_SAFE); + let mut enc = EncoderWriter::new(&mut c, &URL_SAFE_ENGINE); let sz = enc.write(b"a").unwrap(); assert_eq!(sz, 1); let sz = enc.write(b"bc").unwrap(); assert_eq!(sz, 2); } - assert_eq!(&c.get_ref()[..], encode_config("abc", URL_SAFE).as_bytes()); + assert_eq!(&c.get_ref()[..], URL_SAFE_ENGINE.encode("abc").as_bytes()); } #[test] fn encode_one_then_five_bytes() { let mut c = Cursor::new(Vec::new()); { - let mut enc = EncoderWriter::new(&mut c, URL_SAFE); + let mut enc = EncoderWriter::new(&mut c, &URL_SAFE_ENGINE); let sz = enc.write(b"a").unwrap(); assert_eq!(sz, 1); @@ -63,7 +73,7 @@ fn encode_one_then_five_bytes() { } assert_eq!( &c.get_ref()[..], - encode_config("abcdef", URL_SAFE).as_bytes() + URL_SAFE_ENGINE.encode("abcdef").as_bytes() ); } @@ -71,7 +81,7 @@ fn encode_one_then_five_bytes() { fn encode_1_2_3_bytes() { let mut c = Cursor::new(Vec::new()); { - let mut enc = EncoderWriter::new(&mut c, URL_SAFE); + let mut enc = EncoderWriter::new(&mut c, &URL_SAFE_ENGINE); let sz = enc.write(b"a").unwrap(); assert_eq!(sz, 1); @@ -82,7 +92,7 @@ fn encode_1_2_3_bytes() { } assert_eq!( &c.get_ref()[..], - encode_config("abcdef", URL_SAFE).as_bytes() + URL_SAFE_ENGINE.encode("abcdef").as_bytes() ); } @@ -90,20 +100,20 @@ fn encode_1_2_3_bytes() { fn encode_with_padding() { let mut c = Cursor::new(Vec::new()); { - let mut enc = EncoderWriter::new(&mut c, URL_SAFE); + let mut enc = EncoderWriter::new(&mut c, &URL_SAFE_ENGINE); enc.write_all(b"abcd").unwrap(); enc.flush().unwrap(); } - assert_eq!(&c.get_ref()[..], encode_config("abcd", URL_SAFE).as_bytes()); + assert_eq!(&c.get_ref()[..], URL_SAFE_ENGINE.encode("abcd").as_bytes()); } #[test] fn encode_with_padding_multiple_writes() { let mut c = Cursor::new(Vec::new()); { - let mut enc = EncoderWriter::new(&mut c, URL_SAFE); + let mut enc = EncoderWriter::new(&mut c, &URL_SAFE_ENGINE); assert_eq!(1, enc.write(b"a").unwrap()); assert_eq!(2, enc.write(b"bc").unwrap()); @@ -114,7 +124,7 @@ fn encode_with_padding_multiple_writes() { } assert_eq!( &c.get_ref()[..], - encode_config("abcdefg", URL_SAFE).as_bytes() + URL_SAFE_ENGINE.encode("abcdefg").as_bytes() ); } @@ -122,7 +132,7 @@ fn encode_with_padding_multiple_writes() { fn finish_writes_extra_byte() { let mut c = Cursor::new(Vec::new()); { - let mut enc = EncoderWriter::new(&mut c, URL_SAFE); + let mut enc = EncoderWriter::new(&mut c, &URL_SAFE_ENGINE); assert_eq!(6, enc.write(b"abcdef").unwrap()); @@ -134,7 +144,7 @@ fn finish_writes_extra_byte() { } assert_eq!( &c.get_ref()[..], - encode_config("abcdefg", URL_SAFE).as_bytes() + URL_SAFE_ENGINE.encode("abcdefg").as_bytes() ); } @@ -142,17 +152,14 @@ fn finish_writes_extra_byte() { fn write_partial_chunk_encodes_partial_chunk() { let mut c = Cursor::new(Vec::new()); { - let mut enc = EncoderWriter::new(&mut c, STANDARD_NO_PAD); + let mut enc = EncoderWriter::new(&mut c, &NO_PAD_ENGINE); // nothing encoded yet assert_eq!(2, enc.write(b"ab").unwrap()); // encoded here let _ = enc.finish().unwrap(); } - assert_eq!( - &c.get_ref()[..], - encode_config("ab", STANDARD_NO_PAD).as_bytes() - ); + assert_eq!(&c.get_ref()[..], NO_PAD_ENGINE.encode("ab").as_bytes()); assert_eq!(3, c.get_ref().len()); } @@ -160,15 +167,12 @@ fn write_partial_chunk_encodes_partial_chunk() { fn write_1_chunk_encodes_complete_chunk() { let mut c = Cursor::new(Vec::new()); { - let mut enc = EncoderWriter::new(&mut c, STANDARD_NO_PAD); + let mut enc = EncoderWriter::new(&mut c, &NO_PAD_ENGINE); assert_eq!(3, enc.write(b"abc").unwrap()); let _ = enc.finish().unwrap(); } - assert_eq!( - &c.get_ref()[..], - encode_config("abc", STANDARD_NO_PAD).as_bytes() - ); + assert_eq!(&c.get_ref()[..], NO_PAD_ENGINE.encode("abc").as_bytes()); assert_eq!(4, c.get_ref().len()); } @@ -176,16 +180,13 @@ fn write_1_chunk_encodes_complete_chunk() { fn write_1_chunk_and_partial_encodes_only_complete_chunk() { let mut c = Cursor::new(Vec::new()); { - let mut enc = EncoderWriter::new(&mut c, STANDARD_NO_PAD); + let mut enc = EncoderWriter::new(&mut c, &NO_PAD_ENGINE); - // "d" not written + // "d" not consumed since it's not a full chunk assert_eq!(3, enc.write(b"abcd").unwrap()); let _ = enc.finish().unwrap(); } - assert_eq!( - &c.get_ref()[..], - encode_config("abc", STANDARD_NO_PAD).as_bytes() - ); + assert_eq!(&c.get_ref()[..], NO_PAD_ENGINE.encode("abc").as_bytes()); assert_eq!(4, c.get_ref().len()); } @@ -193,16 +194,13 @@ fn write_1_chunk_and_partial_encodes_only_complete_chunk() { fn write_2_partials_to_exactly_complete_chunk_encodes_complete_chunk() { let mut c = Cursor::new(Vec::new()); { - let mut enc = EncoderWriter::new(&mut c, STANDARD_NO_PAD); + let mut enc = EncoderWriter::new(&mut c, &NO_PAD_ENGINE); assert_eq!(1, enc.write(b"a").unwrap()); assert_eq!(2, enc.write(b"bc").unwrap()); let _ = enc.finish().unwrap(); } - assert_eq!( - &c.get_ref()[..], - encode_config("abc", STANDARD_NO_PAD).as_bytes() - ); + assert_eq!(&c.get_ref()[..], NO_PAD_ENGINE.encode("abc").as_bytes()); assert_eq!(4, c.get_ref().len()); } @@ -211,17 +209,14 @@ fn write_partial_then_enough_to_complete_chunk_but_not_complete_another_chunk_en ) { let mut c = Cursor::new(Vec::new()); { - let mut enc = EncoderWriter::new(&mut c, STANDARD_NO_PAD); + let mut enc = EncoderWriter::new(&mut c, &NO_PAD_ENGINE); assert_eq!(1, enc.write(b"a").unwrap()); // doesn't consume "d" assert_eq!(2, enc.write(b"bcd").unwrap()); let _ = enc.finish().unwrap(); } - assert_eq!( - &c.get_ref()[..], - encode_config("abc", STANDARD_NO_PAD).as_bytes() - ); + assert_eq!(&c.get_ref()[..], NO_PAD_ENGINE.encode("abc").as_bytes()); assert_eq!(4, c.get_ref().len()); } @@ -229,17 +224,14 @@ fn write_partial_then_enough_to_complete_chunk_but_not_complete_another_chunk_en fn write_partial_then_enough_to_complete_chunk_and_another_chunk_encodes_complete_chunks() { let mut c = Cursor::new(Vec::new()); { - let mut enc = EncoderWriter::new(&mut c, STANDARD_NO_PAD); + let mut enc = EncoderWriter::new(&mut c, &NO_PAD_ENGINE); assert_eq!(1, enc.write(b"a").unwrap()); // completes partial chunk, and another chunk assert_eq!(5, enc.write(b"bcdef").unwrap()); let _ = enc.finish().unwrap(); } - assert_eq!( - &c.get_ref()[..], - encode_config("abcdef", STANDARD_NO_PAD).as_bytes() - ); + assert_eq!(&c.get_ref()[..], NO_PAD_ENGINE.encode("abcdef").as_bytes()); assert_eq!(8, c.get_ref().len()); } @@ -248,7 +240,7 @@ fn write_partial_then_enough_to_complete_chunk_and_another_chunk_and_another_par ) { let mut c = Cursor::new(Vec::new()); { - let mut enc = EncoderWriter::new(&mut c, STANDARD_NO_PAD); + let mut enc = EncoderWriter::new(&mut c, &NO_PAD_ENGINE); assert_eq!(1, enc.write(b"a").unwrap()); // completes partial chunk, and another chunk, with one more partial chunk that's not @@ -256,10 +248,7 @@ fn write_partial_then_enough_to_complete_chunk_and_another_chunk_and_another_par assert_eq!(5, enc.write(b"bcdefe").unwrap()); let _ = enc.finish().unwrap(); } - assert_eq!( - &c.get_ref()[..], - encode_config("abcdef", STANDARD_NO_PAD).as_bytes() - ); + assert_eq!(&c.get_ref()[..], NO_PAD_ENGINE.encode("abcdef").as_bytes()); assert_eq!(8, c.get_ref().len()); } @@ -267,13 +256,10 @@ fn write_partial_then_enough_to_complete_chunk_and_another_chunk_and_another_par fn drop_calls_finish_for_you() { let mut c = Cursor::new(Vec::new()); { - let mut enc = EncoderWriter::new(&mut c, STANDARD_NO_PAD); + let mut enc = EncoderWriter::new(&mut c, &NO_PAD_ENGINE); assert_eq!(1, enc.write(b"a").unwrap()); } - assert_eq!( - &c.get_ref()[..], - encode_config("a", STANDARD_NO_PAD).as_bytes() - ); + assert_eq!(&c.get_ref()[..], NO_PAD_ENGINE.encode("a").as_bytes()); assert_eq!(2, c.get_ref().len()); } @@ -295,11 +281,11 @@ fn every_possible_split_of_input() { orig_data.push(rng.gen()); } - let config = random_config(&mut rng); - encode_config_buf(&orig_data, config, &mut normal_encoded); + let engine = random_engine(&mut rng); + engine.encode_string(&orig_data, &mut normal_encoded); { - let mut stream_encoder = EncoderWriter::new(&mut stream_encoded, config); + let mut stream_encoder = EncoderWriter::new(&mut stream_encoded, &engine); // Write the first i bytes, then the rest stream_encoder.write_all(&orig_data[0..i]).unwrap(); stream_encoder.write_all(&orig_data[i..]).unwrap(); @@ -312,12 +298,12 @@ fn every_possible_split_of_input() { #[test] fn encode_random_config_matches_normal_encode_reasonable_input_len() { // choose up to 2 * buf size, so ~half the time it'll use a full buffer - do_encode_random_config_matches_normal_encode(super::encoder::BUF_SIZE * 2) + do_encode_random_config_matches_normal_encode(super::encoder::BUF_SIZE * 2); } #[test] fn encode_random_config_matches_normal_encode_tiny_input_len() { - do_encode_random_config_matches_normal_encode(10) + do_encode_random_config_matches_normal_encode(10); } #[test] @@ -332,14 +318,14 @@ fn retrying_writes_that_error_with_interrupted_works() { stream_encoded.clear(); normal_encoded.clear(); - let orig_len: usize = rng.gen_range(100, 20_000); + let orig_len: usize = rng.gen_range(100..20_000); for _ in 0..orig_len { orig_data.push(rng.gen()); } // encode the normal way - let config = random_config(&mut rng); - encode_config_buf(&orig_data, config, &mut normal_encoded); + let engine = random_engine(&mut rng); + engine.encode_string(&orig_data, &mut normal_encoded); // encode via the stream encoder { @@ -350,12 +336,12 @@ fn retrying_writes_that_error_with_interrupted_works() { fraction: 0.8, }; - let mut stream_encoder = EncoderWriter::new(&mut interrupting_writer, config); + let mut stream_encoder = EncoderWriter::new(&mut interrupting_writer, &engine); let mut bytes_consumed = 0; while bytes_consumed < orig_len { // use short inputs since we want to use `extra` a lot as that's what needs rollback // when errors occur - let input_len: usize = cmp::min(rng.gen_range(0, 10), orig_len - bytes_consumed); + let input_len: usize = cmp::min(rng.gen_range(0..10), orig_len - bytes_consumed); retry_interrupted_write_all( &mut stream_encoder, @@ -396,14 +382,14 @@ fn writes_that_only_write_part_of_input_and_sometimes_interrupt_produce_correct_ stream_encoded.clear(); normal_encoded.clear(); - let orig_len: usize = rng.gen_range(100, 20_000); + let orig_len: usize = rng.gen_range(100..20_000); for _ in 0..orig_len { orig_data.push(rng.gen()); } // encode the normal way - let config = random_config(&mut rng); - encode_config_buf(&orig_data, config, &mut normal_encoded); + let engine = random_engine(&mut rng); + engine.encode_string(&orig_data, &mut normal_encoded); // encode via the stream encoder { @@ -415,11 +401,11 @@ fn writes_that_only_write_part_of_input_and_sometimes_interrupt_produce_correct_ no_interrupt_fraction: 0.1, }; - let mut stream_encoder = EncoderWriter::new(&mut partial_writer, config); + let mut stream_encoder = EncoderWriter::new(&mut partial_writer, &engine); let mut bytes_consumed = 0; while bytes_consumed < orig_len { // use at most medium-length inputs to exercise retry logic more aggressively - let input_len: usize = cmp::min(rng.gen_range(0, 100), orig_len - bytes_consumed); + let input_len: usize = cmp::min(rng.gen_range(0..100), orig_len - bytes_consumed); let res = stream_encoder.write(&orig_data[bytes_consumed..bytes_consumed + input_len]); @@ -475,22 +461,22 @@ fn do_encode_random_config_matches_normal_encode(max_input_len: usize) { stream_encoded.clear(); normal_encoded.clear(); - let orig_len: usize = rng.gen_range(100, 20_000); + let orig_len: usize = rng.gen_range(100..20_000); for _ in 0..orig_len { orig_data.push(rng.gen()); } // encode the normal way - let config = random_config(&mut rng); - encode_config_buf(&orig_data, config, &mut normal_encoded); + let engine = random_engine(&mut rng); + engine.encode_string(&orig_data, &mut normal_encoded); // encode via the stream encoder { - let mut stream_encoder = EncoderWriter::new(&mut stream_encoded, config); + let mut stream_encoder = EncoderWriter::new(&mut stream_encoded, &engine); let mut bytes_consumed = 0; while bytes_consumed < orig_len { let input_len: usize = - cmp::min(rng.gen_range(0, max_input_len), orig_len - bytes_consumed); + cmp::min(rng.gen_range(0..max_input_len), orig_len - bytes_consumed); // write a little bit of the data stream_encoder @@ -520,7 +506,7 @@ struct InterruptingWriter<'a, W: 'a + Write, R: 'a + Rng> { impl<'a, W: Write, R: Rng> Write for InterruptingWriter<'a, W, R> { fn write(&mut self, buf: &[u8]) -> io::Result<usize> { - if self.rng.gen_range(0.0, 1.0) <= self.fraction { + if self.rng.gen_range(0.0..1.0) <= self.fraction { return Err(io::Error::new(io::ErrorKind::Interrupted, "interrupted")); } @@ -528,7 +514,7 @@ impl<'a, W: Write, R: Rng> Write for InterruptingWriter<'a, W, R> { } fn flush(&mut self) -> io::Result<()> { - if self.rng.gen_range(0.0, 1.0) <= self.fraction { + if self.rng.gen_range(0.0..1.0) <= self.fraction { return Err(io::Error::new(io::ErrorKind::Interrupted, "interrupted")); } @@ -548,17 +534,17 @@ struct PartialInterruptingWriter<'a, W: 'a + Write, R: 'a + Rng> { impl<'a, W: Write, R: Rng> Write for PartialInterruptingWriter<'a, W, R> { fn write(&mut self, buf: &[u8]) -> io::Result<usize> { - if self.rng.gen_range(0.0, 1.0) > self.no_interrupt_fraction { + if self.rng.gen_range(0.0..1.0) > self.no_interrupt_fraction { return Err(io::Error::new(io::ErrorKind::Interrupted, "interrupted")); } - if self.rng.gen_range(0.0, 1.0) <= self.full_input_fraction || buf.len() == 0 { + if self.rng.gen_range(0.0..1.0) <= self.full_input_fraction || buf.is_empty() { // pass through the buf untouched self.w.write(buf) } else { // only use a prefix of it self.w - .write(&buf[0..(self.rng.gen_range(0, buf.len() - 1))]) + .write(&buf[0..(self.rng.gen_range(0..(buf.len() - 1)))]) } } diff --git a/src/write/mod.rs b/src/write/mod.rs index 98cb48c..2a617db 100644 --- a/src/write/mod.rs +++ b/src/write/mod.rs @@ -1,8 +1,11 @@ //! Implementations of `io::Write` to transparently handle base64. mod encoder; mod encoder_string_writer; -pub use self::encoder::EncoderWriter; -pub use self::encoder_string_writer::EncoderStringWriter; + +pub use self::{ + encoder::EncoderWriter, + encoder_string_writer::{EncoderStringWriter, StrConsumer}, +}; #[cfg(test)] mod encoder_tests; diff --git a/tests/decode.rs b/tests/decode.rs deleted file mode 100644 index 282bccd..0000000 --- a/tests/decode.rs +++ /dev/null @@ -1,330 +0,0 @@ -extern crate base64; - -use base64::*; - -mod helpers; - -use self::helpers::*; - -#[test] -fn decode_rfc4648_0() { - compare_decode("", ""); -} - -#[test] -fn decode_rfc4648_1() { - compare_decode("f", "Zg=="); -} - -#[test] -fn decode_rfc4648_1_just_a_bit_of_padding() { - // allows less padding than required - compare_decode("f", "Zg="); -} - -#[test] -fn decode_rfc4648_1_no_padding() { - compare_decode("f", "Zg"); -} - -#[test] -fn decode_rfc4648_2() { - compare_decode("fo", "Zm8="); -} - -#[test] -fn decode_rfc4648_2_no_padding() { - compare_decode("fo", "Zm8"); -} - -#[test] -fn decode_rfc4648_3() { - compare_decode("foo", "Zm9v"); -} - -#[test] -fn decode_rfc4648_4() { - compare_decode("foob", "Zm9vYg=="); -} - -#[test] -fn decode_rfc4648_4_no_padding() { - compare_decode("foob", "Zm9vYg"); -} - -#[test] -fn decode_rfc4648_5() { - compare_decode("fooba", "Zm9vYmE="); -} - -#[test] -fn decode_rfc4648_5_no_padding() { - compare_decode("fooba", "Zm9vYmE"); -} - -#[test] -fn decode_rfc4648_6() { - compare_decode("foobar", "Zm9vYmFy"); -} - -#[test] -fn decode_reject_null() { - assert_eq!( - DecodeError::InvalidByte(3, 0x0), - decode_config("YWx\0pY2U==", config_std_pad()).unwrap_err() - ); -} - -#[test] -fn decode_single_pad_byte_after_2_chars_in_trailing_quad_ok() { - for num_quads in 0..25 { - let mut s: String = std::iter::repeat("ABCD").take(num_quads).collect(); - s.push_str("Zg="); - - let input_len = num_quads * 3 + 1; - - // Since there are 3 bytes in the trailing quad, want to be sure this allows for the fact - // that it could be bad padding rather than assuming that it will decode to 2 bytes and - // therefore allow 1 extra round of fast decode logic (stage 1 / 2). - - let mut decoded = Vec::new(); - decoded.resize(input_len, 0); - - assert_eq!( - input_len, - decode_config_slice(&s, STANDARD, &mut decoded).unwrap() - ); - } -} - -//this is a MAY in the rfc: https://tools.ietf.org/html/rfc4648#section-3.3 -#[test] -fn decode_1_pad_byte_in_fast_loop_then_extra_padding_chunk_error() { - for num_quads in 0..25 { - let mut s: String = std::iter::repeat("ABCD").take(num_quads).collect(); - s.push_str("YWxpY2U====="); - - // since the first 8 bytes are handled in stage 1 or 2, the padding is detected as a - // generic invalid byte, not specifcally a padding issue. - // Could argue that the *next* padding byte (in the next quad) is technically the first - // erroneous one, but reporting that accurately is more complex and probably nobody cares - assert_eq!( - DecodeError::InvalidByte(num_quads * 4 + 7, b'='), - decode(&s).unwrap_err() - ); - } -} - -#[test] -fn decode_2_pad_bytes_in_leftovers_then_extra_padding_chunk_error() { - for num_quads in 0..25 { - let mut s: String = std::iter::repeat("ABCD").take(num_quads).collect(); - s.push_str("YWxpY2UABB===="); - - // 6 bytes (4 padding) after last 8-byte chunk, so it's decoded by stage 4. - // First padding byte is invalid. - assert_eq!( - DecodeError::InvalidByte(num_quads * 4 + 10, b'='), - decode(&s).unwrap_err() - ); - } -} - -#[test] -fn decode_valid_bytes_after_padding_in_leftovers_error() { - for num_quads in 0..25 { - let mut s: String = std::iter::repeat("ABCD").take(num_quads).collect(); - s.push_str("YWxpY2UABB=B"); - - // 4 bytes after last 8-byte chunk, so it's decoded by stage 4. - // First (and only) padding byte is invalid. - assert_eq!( - DecodeError::InvalidByte(num_quads * 4 + 10, b'='), - decode(&s).unwrap_err() - ); - } -} - -#[test] -fn decode_absurd_pad_error() { - for num_quads in 0..25 { - let mut s: String = std::iter::repeat("ABCD").take(num_quads).collect(); - s.push_str("==Y=Wx===pY=2U====="); - - // Plenty of remaining bytes, so handled by stage 1 or 2. - // first padding byte - assert_eq!( - DecodeError::InvalidByte(num_quads * 4, b'='), - decode(&s).unwrap_err() - ); - } -} - -#[test] -fn decode_extra_padding_after_1_pad_bytes_in_trailing_quad_returns_error() { - for num_quads in 0..25 { - let mut s: String = std::iter::repeat("ABCD").take(num_quads).collect(); - s.push_str("EEE==="); - - // handled by stage 1, 2, or 4 depending on length - // first padding byte -- which would be legal if it was the only padding - assert_eq!( - DecodeError::InvalidByte(num_quads * 4 + 3, b'='), - decode(&s).unwrap_err() - ); - } -} - -#[test] -fn decode_extra_padding_after_2_pad_bytes_in_trailing_quad_2_returns_error() { - for num_quads in 0..25 { - let mut s: String = std::iter::repeat("ABCD").take(num_quads).collect(); - s.push_str("EE===="); - - // handled by stage 1, 2, or 4 depending on length - // first padding byte -- which would be legal if it was by itself - assert_eq!( - DecodeError::InvalidByte(num_quads * 4 + 2, b'='), - decode(&s).unwrap_err() - ); - } -} - -#[test] -fn decode_start_quad_with_padding_returns_error() { - for num_quads in 0..25 { - // add enough padding to ensure that we'll hit all 4 stages at the different lengths - for pad_bytes in 1..32 { - let mut s: String = std::iter::repeat("ABCD").take(num_quads).collect(); - let padding: String = std::iter::repeat("=").take(pad_bytes).collect(); - s.push_str(&padding); - - if pad_bytes % 4 == 1 { - // detected in early length check - assert_eq!(DecodeError::InvalidLength, decode(&s).unwrap_err()); - } else { - // padding lengths 2 - 8 are handled by stage 4 - // padding length >= 8 will hit at least one chunk at stages 1, 2, 3 at different - // prefix lengths - assert_eq!( - DecodeError::InvalidByte(num_quads * 4, b'='), - decode(&s).unwrap_err() - ); - } - } - } -} - -#[test] -fn decode_padding_followed_by_non_padding_returns_error() { - for num_quads in 0..25 { - for pad_bytes in 0..31 { - let mut s: String = std::iter::repeat("ABCD").take(num_quads).collect(); - let padding: String = std::iter::repeat("=").take(pad_bytes).collect(); - s.push_str(&padding); - s.push_str("E"); - - if pad_bytes % 4 == 0 { - assert_eq!(DecodeError::InvalidLength, decode(&s).unwrap_err()); - } else { - // pad len 1 - 8 will be handled by stage 4 - // pad len 9 (suffix len 10) will have 8 bytes of padding handled by stage 3 - // first padding byte - assert_eq!( - DecodeError::InvalidByte(num_quads * 4, b'='), - decode(&s).unwrap_err() - ); - } - } - } -} - -#[test] -fn decode_one_char_in_quad_with_padding_error() { - for num_quads in 0..25 { - let mut s: String = std::iter::repeat("ABCD").take(num_quads).collect(); - s.push_str("E="); - - assert_eq!( - DecodeError::InvalidByte(num_quads * 4 + 1, b'='), - decode(&s).unwrap_err() - ); - - // more padding doesn't change the error - s.push_str("="); - assert_eq!( - DecodeError::InvalidByte(num_quads * 4 + 1, b'='), - decode(&s).unwrap_err() - ); - - s.push_str("="); - assert_eq!( - DecodeError::InvalidByte(num_quads * 4 + 1, b'='), - decode(&s).unwrap_err() - ); - } -} - -#[test] -fn decode_one_char_in_quad_without_padding_error() { - for num_quads in 0..25 { - let mut s: String = std::iter::repeat("ABCD").take(num_quads).collect(); - s.push('E'); - - assert_eq!(DecodeError::InvalidLength, decode(&s).unwrap_err()); - } -} - -#[test] -fn decode_reject_invalid_bytes_with_correct_error() { - for length in 1..100 { - for index in 0_usize..length { - for invalid_byte in " \t\n\r\x0C\x0B\x00%*.".bytes() { - let prefix: String = std::iter::repeat("A").take(index).collect(); - let suffix: String = std::iter::repeat("B").take(length - index - 1).collect(); - - let input = prefix + &String::from_utf8(vec![invalid_byte]).unwrap() + &suffix; - assert_eq!( - length, - input.len(), - "length {} error position {}", - length, - index - ); - - if length % 4 == 1 && !suffix.is_empty() { - assert_eq!(DecodeError::InvalidLength, decode(&input).unwrap_err()); - } else { - assert_eq!( - DecodeError::InvalidByte(index, invalid_byte), - decode(&input).unwrap_err() - ); - } - } - } - } -} - -#[test] -fn decode_imap() { - assert_eq!( - decode_config(b"+,,+", crate::IMAP_MUTF7), - decode_config(b"+//+", crate::STANDARD_NO_PAD) - ); -} - -#[test] -fn decode_invalid_trailing_bytes() { - // The case of trailing newlines is common enough to warrant a test for a good error - // message. - assert_eq!( - Err(DecodeError::InvalidByte(8, b'\n')), - decode(b"Zm9vCg==\n") - ); - // extra padding, however, is still InvalidLength - assert_eq!(Err(DecodeError::InvalidLength), decode(b"Zm9vCg===")); -} - -fn config_std_pad() -> Config { - Config::new(CharacterSet::Standard, true) -} diff --git a/tests/encode.rs b/tests/encode.rs index 0004be0..2e1f893 100644 --- a/tests/encode.rs +++ b/tests/encode.rs @@ -1,44 +1,9 @@ -extern crate base64; - -use base64::*; +use base64::{ + alphabet::URL_SAFE, engine::general_purpose::PAD, engine::general_purpose::STANDARD, *, +}; fn compare_encode(expected: &str, target: &[u8]) { - assert_eq!(expected, encode(target)); -} - -#[test] -fn encode_rfc4648_0() { - compare_encode("", b""); -} - -#[test] -fn encode_rfc4648_1() { - compare_encode("Zg==", b"f"); -} - -#[test] -fn encode_rfc4648_2() { - compare_encode("Zm8=", b"fo"); -} - -#[test] -fn encode_rfc4648_3() { - compare_encode("Zm9v", b"foo"); -} - -#[test] -fn encode_rfc4648_4() { - compare_encode("Zm9vYg==", b"foob"); -} - -#[test] -fn encode_rfc4648_5() { - compare_encode("Zm9vYmE=", b"fooba"); -} - -#[test] -fn encode_rfc4648_6() { - compare_encode("Zm9vYmFy", b"foobar"); + assert_eq!(expected, STANDARD.encode(target)); } #[test] @@ -90,16 +55,6 @@ fn encode_all_bytes_url() { -AgYKDhIWGh4iJiouMjY6PkJGSk5SVlpeYmZqbnJ2en6ChoqOkpaanqKmqq6ytrq\ -wsbKztLW2t7i5uru8vb6_wMHCw8TFxsfIycrLzM3Oz9DR0tPU1dbX2Nna29zd3t_g4eLj5OXm5-jp6uvs7e7v8PHy\ 8_T19vf4-fr7_P3-_w==", - encode_config(&bytes, URL_SAFE) - ); -} - -#[test] -fn encode_url_safe_without_padding() { - let encoded = encode_config(b"alice", URL_SAFE_NO_PAD); - assert_eq!(&encoded, "YWxpY2U"); - assert_eq!( - String::from_utf8(decode(&encoded).unwrap()).unwrap(), - "alice" + &engine::GeneralPurpose::new(&URL_SAFE, PAD).encode(&bytes) ); } diff --git a/tests/helpers.rs b/tests/helpers.rs deleted file mode 100644 index 5144988..0000000 --- a/tests/helpers.rs +++ /dev/null @@ -1,14 +0,0 @@ -extern crate base64; - -use base64::*; - -pub fn compare_decode(expected: &str, target: &str) { - assert_eq!( - expected, - String::from_utf8(decode(target).unwrap()).unwrap() - ); - assert_eq!( - expected, - String::from_utf8(decode(target.as_bytes()).unwrap()).unwrap() - ); -} diff --git a/tests/tests.rs b/tests/tests.rs index 11fed96..eceff40 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -1,18 +1,15 @@ -extern crate base64; -extern crate rand; - -use rand::{FromEntropy, Rng}; +use rand::{Rng, SeedableRng}; +use base64::engine::{general_purpose::STANDARD, Engine}; use base64::*; -mod helpers; -use self::helpers::*; +use base64::engine::general_purpose::{GeneralPurpose, NO_PAD}; // generate random contents of the specified length and test encode/decode roundtrip -fn roundtrip_random( +fn roundtrip_random<E: Engine>( byte_buf: &mut Vec<u8>, str_buf: &mut String, - config: Config, + engine: &E, byte_len: usize, approx_values_per_byte: u8, max_rounds: u64, @@ -30,8 +27,8 @@ fn roundtrip_random( byte_buf.push(r.gen::<u8>()); } - encode_config_buf(&byte_buf, config, str_buf); - decode_config_buf(&str_buf, config, &mut decode_buf).unwrap(); + engine.encode_string(&byte_buf, str_buf); + engine.decode_vec(&str_buf, &mut decode_buf).unwrap(); assert_eq!(byte_buf, &decode_buf); } @@ -52,17 +49,13 @@ fn calculate_number_of_rounds(byte_len: usize, approx_values_per_byte: u8, max: prod } -fn no_pad_config() -> Config { - Config::new(CharacterSet::Standard, false) -} - #[test] fn roundtrip_random_short_standard() { let mut byte_buf: Vec<u8> = Vec::new(); let mut str_buf = String::new(); for input_len in 0..40 { - roundtrip_random(&mut byte_buf, &mut str_buf, STANDARD, input_len, 4, 10000); + roundtrip_random(&mut byte_buf, &mut str_buf, &STANDARD, input_len, 4, 10000); } } @@ -72,7 +65,7 @@ fn roundtrip_random_with_fast_loop_standard() { let mut str_buf = String::new(); for input_len in 40..100 { - roundtrip_random(&mut byte_buf, &mut str_buf, STANDARD, input_len, 4, 1000); + roundtrip_random(&mut byte_buf, &mut str_buf, &STANDARD, input_len, 4, 1000); } } @@ -81,15 +74,9 @@ fn roundtrip_random_short_no_padding() { let mut byte_buf: Vec<u8> = Vec::new(); let mut str_buf = String::new(); + let engine = GeneralPurpose::new(&alphabet::STANDARD, NO_PAD); for input_len in 0..40 { - roundtrip_random( - &mut byte_buf, - &mut str_buf, - no_pad_config(), - input_len, - 4, - 10000, - ); + roundtrip_random(&mut byte_buf, &mut str_buf, &engine, input_len, 4, 10000); } } @@ -98,15 +85,10 @@ fn roundtrip_random_no_padding() { let mut byte_buf: Vec<u8> = Vec::new(); let mut str_buf = String::new(); + let engine = GeneralPurpose::new(&alphabet::STANDARD, NO_PAD); + for input_len in 40..100 { - roundtrip_random( - &mut byte_buf, - &mut str_buf, - no_pad_config(), - input_len, - 4, - 1000, - ); + roundtrip_random(&mut byte_buf, &mut str_buf, &engine, input_len, 4, 1000); } } @@ -120,13 +102,14 @@ fn roundtrip_decode_trailing_10_bytes() { // to handle that case. for num_quads in 0..25 { - let mut s: String = std::iter::repeat("ABCD").take(num_quads).collect(); + let mut s: String = "ABCD".repeat(num_quads); s.push_str("EFGHIJKLZg"); - let decoded = decode(&s).unwrap(); + let engine = GeneralPurpose::new(&alphabet::STANDARD, NO_PAD); + let decoded = engine.decode(&s).unwrap(); assert_eq!(num_quads * 3 + 7, decoded.len()); - assert_eq!(s, encode_config(&decoded, STANDARD_NO_PAD)); + assert_eq!(s, engine.encode(&decoded)); } } @@ -140,55 +123,39 @@ fn display_wrapper_matches_normal_encode() { bytes.push(255); assert_eq!( - encode(&bytes), - format!( - "{}", - base64::display::Base64Display::with_config(&bytes, STANDARD) - ) + STANDARD.encode(&bytes), + format!("{}", display::Base64Display::new(&bytes, &STANDARD)) ); } #[test] -fn because_we_can() { - compare_decode("alice", "YWxpY2U="); - compare_decode("alice", &encode(b"alice")); - compare_decode("alice", &encode(&decode(&encode(b"alice")).unwrap())); -} - -#[test] -fn encode_config_slice_can_use_inline_buffer() { - let mut buf: [u8; 22] = [0; 22]; - let mut larger_buf: [u8; 24] = [0; 24]; - let mut input: [u8; 16] = [0; 16]; - - let mut rng = rand::rngs::SmallRng::from_entropy(); - for elt in &mut input { - *elt = rng.gen(); - } - - assert_eq!(22, encode_config_slice(&input, STANDARD_NO_PAD, &mut buf)); - let decoded = decode_config(&buf, STANDARD_NO_PAD).unwrap(); - - assert_eq!(decoded, input); - - // let's try it again with padding - - assert_eq!(24, encode_config_slice(&input, STANDARD, &mut larger_buf)); - let decoded = decode_config(&buf, STANDARD).unwrap(); - - assert_eq!(decoded, input); -} - -#[test] -#[should_panic(expected = "index 24 out of range for slice of length 22")] -fn encode_config_slice_panics_when_buffer_too_small() { - let mut buf: [u8; 22] = [0; 22]; - let mut input: [u8; 16] = [0; 16]; - - let mut rng = rand::rngs::SmallRng::from_entropy(); - for elt in &mut input { - *elt = rng.gen(); +fn encode_engine_slice_error_when_buffer_too_small() { + for num_triples in 1..100 { + let input = "AAA".repeat(num_triples); + let mut vec = vec![0; (num_triples - 1) * 4]; + assert_eq!( + EncodeSliceError::OutputSliceTooSmall, + STANDARD.encode_slice(&input, &mut vec).unwrap_err() + ); + vec.push(0); + assert_eq!( + EncodeSliceError::OutputSliceTooSmall, + STANDARD.encode_slice(&input, &mut vec).unwrap_err() + ); + vec.push(0); + assert_eq!( + EncodeSliceError::OutputSliceTooSmall, + STANDARD.encode_slice(&input, &mut vec).unwrap_err() + ); + vec.push(0); + assert_eq!( + EncodeSliceError::OutputSliceTooSmall, + STANDARD.encode_slice(&input, &mut vec).unwrap_err() + ); + vec.push(0); + assert_eq!( + num_triples * 4, + STANDARD.encode_slice(&input, &mut vec).unwrap() + ); } - - encode_config_slice(&input, STANDARD, &mut buf); } |