untrusted crate v0.7.1

Bug: 155855709 Test: n/a Change-Id: I3a4cd835839a800d4a091bf99964bf0f09883168
author: Jeff Vander Stoep <jeffv@google.com> 2020-12-04 12:38:08 +0100
committer: Jeff Vander Stoep <jeffv@google.com> 2020-12-04 12:38:08 +0100
commit: 10c89c58562f8e90c25e7573096e71dc83dc4786 (patch)
tree: 30b3efa938d801f0479e5ea2c6c647c74e60afa1 /src/untrusted.rs
parent: 8ecf69645e5dc06b2d864b8228d1b5b19cefaaf6 (diff)
download: untrusted-10c89c58562f8e90c25e7573096e71dc83dc4786.tar.gz
1 files changed, 374 insertions, 0 deletions
diff --git a/src/untrusted.rs b/src/untrusted.rs
new file mode 100644
index 0000000..2f88bb4
--- /dev/null
+++ b/src/untrusted.rs
@@ -0,0 +1,374 @@
+// Copyright 2015-2016 Brian Smith.
+//
+// Permission to use, copy, modify, and/or distribute this software for any
+// purpose with or without fee is hereby granted, provided that the above
+// copyright notice and this permission notice appear in all copies.
+//
+// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
+// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
+// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+//! untrusted.rs: Safe, fast, zero-panic, zero-crashing, zero-allocation
+//! parsing of untrusted inputs in Rust.
+//!
+//! <code>git clone https://github.com/briansmith/untrusted</code>
+//!
+//! untrusted.rs goes beyond Rust's normal safety guarantees by  also
+//! guaranteeing that parsing will be panic-free, as long as
+//! `untrusted::Input::as_slice_less_safe()` is not used. It avoids copying
+//! data and heap allocation and strives to prevent common pitfalls such as
+//! accidentally parsing input bytes multiple times. In order to meet these
+//! goals, untrusted.rs is limited in functionality such that it works best for
+//! input languages with a small fixed amount of lookahead such as ASN.1, TLS,
+//! TCP/IP, and many other networking, IPC, and related protocols. Languages
+//! that require more lookahead and/or backtracking require some significant
+//! contortions to parse using this framework. It would not be realistic to use
+//! it for parsing programming language code, for example.
+//!
+//! The overall pattern for using untrusted.rs is:
+//!
+//! 1. Write a recursive-descent-style parser for the input language, where the
+//!    input data is given as a `&mut untrusted::Reader` parameter to each
+//!    function. Each function should have a return type of `Result<V, E>` for
+//!    some value type `V` and some error type `E`, either or both of which may
+//!    be `()`. Functions for parsing the lowest-level language constructs
+//!    should be defined. Those lowest-level functions will parse their inputs
+//!    using `::read_byte()`, `Reader::peek()`, and similar functions.
+//!    Higher-level language constructs are then parsed by calling the
+//!    lower-level functions in sequence.
+//!
+//! 2. Wrap the top-most functions of your recursive-descent parser in
+//!    functions that take their input data as an `untrusted::Input`. The
+//!    wrapper functions should call the `Input`'s `read_all` (or a variant
+//!    thereof) method. The wrapper functions are the only ones that should be
+//!    exposed outside the parser's module.
+//!
+//! 3. After receiving the input data to parse, wrap it in an `untrusted::Input`
+//!    using `untrusted::Input::from()` as early as possible. Pass the
+//!    `untrusted::Input` to the wrapper functions when they need to be parsed.
+//!
+//! In general parsers built using `untrusted::Reader` do not need to explicitly
+//! check for end-of-input unless they are parsing optional constructs, because
+//! `Reader::read_byte()` will return `Err(EndOfInput)` on end-of-input.
+//! Similarly, parsers using `untrusted::Reader` generally don't need to check
+//! for extra junk at the end of the input as long as the parser's API uses the
+//! pattern described above, as `read_all` and its variants automatically check
+//! for trailing junk. `Reader::skip_to_end()` must be used when any remaining
+//! unread input should be ignored without triggering an error.
+//!
+//! untrusted.rs works best when all processing of the input data is done
+//! through the `untrusted::Input` and `untrusted::Reader` types. In
+//! particular, avoid trying to parse input data using functions that take
+//! byte slices. However, when you need to access a part of the input data as
+//! a slice to use a function that isn't written using untrusted.rs,
+//! `Input::as_slice_less_safe()` can be used.
+//!
+//! It is recommend to use `use untrusted;` and then `untrusted::Input`,
+//! `untrusted::Reader`, etc., instead of using `use untrusted::*`. Qualifying
+//! the names with `untrusted` helps remind the reader of the code that it is
+//! dealing with *untrusted* input.
+//!
+//! # Examples
+//!
+//! [*ring*](https://github.com/briansmith/ring)'s parser for the subset of
+//! ASN.1 DER it needs to understand,
+//! [`ring::der`](https://github.com/briansmith/ring/blob/master/src/der.rs),
+//! is built on top of untrusted.rs. *ring* also uses untrusted.rs to parse ECC
+//! public keys, RSA PKCS#1 1.5 padding, and for all other parsing it does.
+//!
+//! All of [webpki](https://github.com/briansmith/webpki)'s parsing of X.509
+//! certificates (also ASN.1 DER) is done using untrusted.rs.
+
+#![doc(html_root_url = "https://briansmith.org/rustdoc/")]
+// `#[derive(...)]` uses `#[allow(unused_qualifications)]` internally.
+#![deny(unused_qualifications)]
+#![forbid(
+    anonymous_parameters,
+    box_pointers,
+    missing_docs,
+    trivial_casts,
+    trivial_numeric_casts,
+    unsafe_code,
+    unstable_features,
+    unused_extern_crates,
+    unused_import_braces,
+    unused_results,
+    variant_size_differences,
+    warnings
+)]
+#![no_std]
+
+/// A wrapper around `&'a [u8]` that helps in writing panic-free code.
+///
+/// No methods of `Input` will ever panic.
+#[derive(Clone, Copy, Debug, Eq)]
+pub struct Input<'a> {
+    value: no_panic::Slice<'a>,
+}
+
+impl<'a> Input<'a> {
+    /// Construct a new `Input` for the given input `bytes`.
+    pub const fn from(bytes: &'a [u8]) -> Self {
+        // This limit is important for avoiding integer overflow. In particular,
+        // `Reader` assumes that an `i + 1 > i` if `input.value.get(i)` does
+        // not return `None`. According to the Rust language reference, the
+        // maximum object size is `core::isize::MAX`, and in practice it is
+        // impossible to create an object of size `core::usize::MAX` or larger.
+        Self {
+            value: no_panic::Slice::new(bytes),
+        }
+    }
+
+    /// Returns `true` if the input is empty and false otherwise.
+    #[inline]
+    pub fn is_empty(&self) -> bool { self.value.is_empty() }
+
+    /// Returns the length of the `Input`.
+    #[inline]
+    pub fn len(&self) -> usize { self.value.len() }
+
+    /// Calls `read` with the given input as a `Reader`, ensuring that `read`
+    /// consumed the entire input. If `read` does not consume the entire input,
+    /// `incomplete_read` is returned.
+    pub fn read_all<F, R, E>(&self, incomplete_read: E, read: F) -> Result<R, E>
+    where
+        F: FnOnce(&mut Reader<'a>) -> Result<R, E>,
+    {
+        let mut input = Reader::new(*self);
+        let result = read(&mut input)?;
+        if input.at_end() {
+            Ok(result)
+        } else {
+            Err(incomplete_read)
+        }
+    }
+
+    /// Access the input as a slice so it can be processed by functions that
+    /// are not written using the Input/Reader framework.
+    #[inline]
+    pub fn as_slice_less_safe(&self) -> &'a [u8] { self.value.as_slice_less_safe() }
+}
+
+impl<'a> From<&'a [u8]> for Input<'a> {
+    #[inline]
+    fn from(value: &'a [u8]) -> Self { Self { value: no_panic::Slice::new(value)} }
+}
+
+// #[derive(PartialEq)] would result in lifetime bounds that are
+// unnecessarily restrictive; see
+// https://github.com/rust-lang/rust/issues/26925.
+impl PartialEq<Input<'_>> for Input<'_> {
+    #[inline]
+    fn eq(&self, other: &Input) -> bool {
+        self.as_slice_less_safe() == other.as_slice_less_safe()
+    }
+}
+
+impl PartialEq<[u8]> for Input<'_> {
+    #[inline]
+    fn eq(&self, other: &[u8]) -> bool { self.as_slice_less_safe() == other }
+}
+
+impl PartialEq<Input<'_>> for [u8] {
+    #[inline]
+    fn eq(&self, other: &Input) -> bool { other.as_slice_less_safe() == self }
+}
+
+/// Calls `read` with the given input as a `Reader`, ensuring that `read`
+/// consumed the entire input. When `input` is `None`, `read` will be
+/// called with `None`.
+pub fn read_all_optional<'a, F, R, E>(
+    input: Option<Input<'a>>, incomplete_read: E, read: F,
+) -> Result<R, E>
+where
+    F: FnOnce(Option<&mut Reader<'a>>) -> Result<R, E>,
+{
+    match input {
+        Some(input) => {
+            let mut input = Reader::new(input);
+            let result = read(Some(&mut input))?;
+            if input.at_end() {
+                Ok(result)
+            } else {
+                Err(incomplete_read)
+            }
+        },
+        None => read(None),
+    }
+}
+
+/// A read-only, forward-only* cursor into the data in an `Input`.
+///
+/// Using `Reader` to parse input helps to ensure that no byte of the input
+/// will be accidentally processed more than once. Using `Reader` in
+/// conjunction with `read_all` and `read_all_optional` helps ensure that no
+/// byte of the input is accidentally left unprocessed. The methods of `Reader`
+/// never panic, so `Reader` also assists the writing of panic-free code.
+///
+/// \* `Reader` is not strictly forward-only because of the method
+/// `get_input_between_marks`, which is provided mainly to support calculating
+/// digests over parsed data.
+#[derive(Debug)]
+pub struct Reader<'a> {
+    input: no_panic::Slice<'a>,
+    i: usize,
+}
+
+/// An index into the already-parsed input of a `Reader`.
+pub struct Mark {
+    i: usize,
+}
+
+impl<'a> Reader<'a> {
+    /// Construct a new Reader for the given input. Use `read_all` or
+    /// `read_all_optional` instead of `Reader::new` whenever possible.
+    #[inline]
+    pub fn new(input: Input<'a>) -> Self {
+        Self {
+            input: input.value,
+            i: 0,
+        }
+    }
+
+    /// Returns `true` if the reader is at the end of the input, and `false`
+    /// otherwise.
+    #[inline]
+    pub fn at_end(&self) -> bool { self.i == self.input.len() }
+
+    /// Returns an `Input` for already-parsed input that has had its boundaries
+    /// marked using `mark`.
+    #[inline]
+    pub fn get_input_between_marks(
+        &self, mark1: Mark, mark2: Mark,
+    ) -> Result<Input<'a>, EndOfInput> {
+        self.input
+            .subslice(mark1.i..mark2.i)
+            .map(|subslice| Input { value: subslice })
+            .ok_or(EndOfInput)
+    }
+
+    /// Return the current position of the `Reader` for future use in a call
+    /// to `get_input_between_marks`.
+    #[inline]
+    pub fn mark(&self) -> Mark { Mark { i: self.i } }
+
+    /// Returns `true` if there is at least one more byte in the input and that
+    /// byte is equal to `b`, and false otherwise.
+    #[inline]
+    pub fn peek(&self, b: u8) -> bool {
+        match self.input.get(self.i) {
+            Some(actual_b) => b == *actual_b,
+            None => false,
+        }
+    }
+
+    /// Reads the next input byte.
+    ///
+    /// Returns `Ok(b)` where `b` is the next input byte, or `Err(EndOfInput)`
+    /// if the `Reader` is at the end of the input.
+    #[inline]
+    pub fn read_byte(&mut self) -> Result<u8, EndOfInput> {
+        match self.input.get(self.i) {
+            Some(b) => {
+                self.i += 1; // safe from overflow; see Input::from().
+                Ok(*b)
+            },
+            None => Err(EndOfInput),
+        }
+    }
+
+    /// Skips `num_bytes` of the input, returning the skipped input as an
+    /// `Input`.
+    ///
+    /// Returns `Ok(i)` if there are at least `num_bytes` of input remaining,
+    /// and `Err(EndOfInput)` otherwise.
+    #[inline]
+    pub fn read_bytes(&mut self, num_bytes: usize) -> Result<Input<'a>, EndOfInput> {
+        let new_i = self.i.checked_add(num_bytes).ok_or(EndOfInput)?;
+        let ret = self
+            .input
+            .subslice(self.i..new_i)
+            .map(|subslice| Input { value: subslice })
+            .ok_or(EndOfInput)?;
+        self.i = new_i;
+        Ok(ret)
+    }
+
+    /// Skips the reader to the end of the input, returning the skipped input
+    /// as an `Input`.
+    #[inline]
+    pub fn read_bytes_to_end(&mut self) -> Input<'a> {
+        let to_skip = self.input.len() - self.i;
+        self.read_bytes(to_skip).unwrap()
+    }
+
+    /// Calls `read()` with the given input as a `Reader`. On success, returns a
+    /// pair `(bytes_read, r)` where `bytes_read` is what `read()` consumed and
+    /// `r` is `read()`'s return value.
+    pub fn read_partial<F, R, E>(&mut self, read: F) -> Result<(Input<'a>, R), E>
+    where
+        F: FnOnce(&mut Reader<'a>) -> Result<R, E>,
+    {
+        let start = self.i;
+        let r = read(self)?;
+        let bytes_read = Input {
+            value: self.input.subslice(start..self.i).unwrap()
+        };
+        Ok((bytes_read, r))
+    }
+
+    /// Skips `num_bytes` of the input.
+    ///
+    /// Returns `Ok(i)` if there are at least `num_bytes` of input remaining,
+    /// and `Err(EndOfInput)` otherwise.
+    #[inline]
+    pub fn skip(&mut self, num_bytes: usize) -> Result<(), EndOfInput> {
+        self.read_bytes(num_bytes).map(|_| ())
+    }
+
+    /// Skips the reader to the end of the input.
+    #[inline]
+    pub fn skip_to_end(&mut self) -> () { let _ = self.read_bytes_to_end(); }
+}
+
+/// The error type used to indicate the end of the input was reached before the
+/// operation could be completed.
+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
+pub struct EndOfInput;
+
+mod no_panic {
+    use core;
+
+    /// A wrapper around a slice that exposes no functions that can panic.
+    #[derive(Clone, Copy, Debug, Eq, PartialEq)]
+    pub struct Slice<'a> {
+        bytes: &'a [u8],
+    }
+
+    impl<'a> Slice<'a> {
+        #[inline]
+        pub const fn new(bytes: &'a [u8]) -> Self { Self { bytes } }
+
+        #[inline]
+        pub fn get(&self, i: usize) -> Option<&u8> { self.bytes.get(i) }
+
+        #[inline]
+        pub fn subslice(&self, r: core::ops::Range<usize>) -> Option<Self> {
+            self.bytes.get(r).map(|bytes| Self { bytes })
+        }
+
+        #[inline]
+        pub fn is_empty(&self) -> bool { self.bytes.is_empty() }
+
+        #[inline]
+        pub fn len(&self) -> usize { self.bytes.len() }
+
+        #[inline]
+        pub fn as_slice_less_safe(&self) -> &'a [u8] { self.bytes }
+    }
+
+} // mod no_panic
author	Jeff Vander Stoep <jeffv@google.com>	2020-12-04 12:38:08 +0100
committer	Jeff Vander Stoep <jeffv@google.com>	2020-12-04 12:38:08 +0100
commit	10c89c58562f8e90c25e7573096e71dc83dc4786 (patch)
tree	30b3efa938d801f0479e5ea2c6c647c74e60afa1 /src/untrusted.rs
parent	8ecf69645e5dc06b2d864b8228d1b5b19cefaaf6 (diff)
download	untrusted-10c89c58562f8e90c25e7573096e71dc83dc4786.tar.gz