diff options
author | Jakub Kotur <qtr@google.com> | 2021-03-10 16:17:03 +0000 |
---|---|---|
committer | Automerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com> | 2021-03-10 16:17:03 +0000 |
commit | 147ccaf27c923ea460b9de69a84f93d00e39e2f0 (patch) | |
tree | 15bd9599451d07c663f118f93b14a789080dfb26 | |
parent | 029aa05a8f11602b1ce5816d785975c8d0925f88 (diff) | |
parent | 3dca6e6bd135005387d850cc96462761b26bc6dc (diff) | |
download | half-147ccaf27c923ea460b9de69a84f93d00e39e2f0.tar.gz |
Initial import of half-1.6.0 am: e245d8bd43 am: ce1f66bec5 am: 0dc0cadfb9 am: 3dca6e6bd1
Original change: https://android-review.googlesource.com/c/platform/external/rust/crates/half/+/1610013
MUST ONLY BE SUBMITTED BY AUTOMERGER
Change-Id: I7001a098f64c622dec525421acc65f640bec1abb
-rw-r--r-- | .cargo_vcs_info.json | 5 | ||||
-rw-r--r-- | .gitattributes | 2 | ||||
-rw-r--r-- | .gitignore | 11 | ||||
-rw-r--r-- | .travis.yml | 50 | ||||
-rw-r--r-- | CHANGELOG.md | 172 | ||||
-rw-r--r-- | Cargo.toml | 62 | ||||
-rw-r--r-- | Cargo.toml.orig | 42 | ||||
-rw-r--r-- | README.md | 59 | ||||
-rw-r--r-- | appveyor.yml | 156 | ||||
-rw-r--r-- | benches/convert.rs | 319 | ||||
-rw-r--r-- | src/bfloat.rs | 1103 | ||||
-rw-r--r-- | src/bfloat/convert.rs | 135 | ||||
-rw-r--r-- | src/binary16.rs | 1398 | ||||
-rw-r--r-- | src/binary16/convert.rs | 491 | ||||
-rw-r--r-- | src/lib.rs | 113 | ||||
-rw-r--r-- | src/slice.rs | 976 | ||||
-rw-r--r-- | src/vec.rs | 301 | ||||
-rw-r--r-- | tests/version-numbers.rs | 14 |
18 files changed, 5409 insertions, 0 deletions
diff --git a/.cargo_vcs_info.json b/.cargo_vcs_info.json new file mode 100644 index 0000000..a35bc0b --- /dev/null +++ b/.cargo_vcs_info.json @@ -0,0 +1,5 @@ +{ + "git": { + "sha1": "04b83b7c1954e3c0659a027179ba144b90559e19" + } +} diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..124e454 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,2 @@ +* text=auto +*.rs whitespace=tab-in-indent,trailing-space,tabwidth=4
\ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..56f75d2 --- /dev/null +++ b/.gitignore @@ -0,0 +1,11 @@ +# Rust +target/ +Cargo.lock +**/*.rs.bak + +# IntelliJ +.idea/ +*.iml + +# VS Code +.vscode/
\ No newline at end of file diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..cbf87db --- /dev/null +++ b/.travis.yml @@ -0,0 +1,50 @@ +os: + - linux + - windows +sudo: false +language: rust +rust: + - stable + - beta + - nightly +matrix: + include: + - rust: stable + env: CARGOFLAGS="--features serde" + - rust: stable + env: CARGOFLAGS="--features std" + - rust: stable + env: CARGOFLAGS="--features std,serde" + - rust: stable + env: CARGOFLAGS="--features alloc" + - rust: nightly + env: CARGOFLAGS=--all-features + + - rust: stable + os: windows + env: CARGOFLAGS="--features serde" + - rust: stable + os: windows + env: CARGOFLAGS="--features std" + - rust: stable + os: windows + env: CARGOFLAGS="--features alloc" + - rust: stable + os: windows + env: CARGOFLAGS="--features std,serde" + - rust: nightly + os: windows + env: CARGOFLAGS=--all-features + + - rust: 1.32.0 + script: + - cargo update + - cargo update -p unicode-normalization --precise 0.1.9 + - cargo update -p criterion --precise 0.3.0 + - cargo test --features std +branches: + except: + - gh-pages +script: + - cargo build --verbose --all-targets $CARGOFLAGS + - cargo test --verbose $CARGOFLAGS diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..3488747 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,172 @@ +# Changelog + +The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) +and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). + +## [Unreleased] + + +## [1.6.0] - 2020-05-09 <a name="1.6.0"></a> +### Added +- Added `LOG2_10` and `LOG10_2` constants to both `f16` and `bf16`, which were added to `f32` and + `f64` in the standard library in 1.43.0. By [@tspiteri]. +- Added `to_le/be/ne_bytes` and `from_le/be/ne_bytes` to both `f16` and `bf16`, which were added to + the standard library in 1.40.0. By [@bzm3r]. + +## [1.5.0] - 2020-03-03 <a name="1.5.0"></a> +### Added +- Added the `alloc` feature to support the `alloc` crate in `no_std` environments. By [@zserik]. The + `vec` module is now available with either `alloc` or `std` feature. + +## [1.4.1] - 2020-02-10 <a name="1.4.1"></a> +### Fixed +- Added `#[repr(transparent)]` to `f16`/`bf16` to remove undefined behavior. By [@jfrimmel]. + +## [1.4.0] - 2019-10-13 <a name="1.4.0"></a> +### Added +- Added a `bf16` type implementing the alternative + [`bfloat16`](https://en.wikipedia.org/wiki/Bfloat16_floating-point_format) 16-bit floating point + format. By [@tspiteri]. +- `f16::from_bits`, `f16::to_bits`, `f16::is_nan`, `f16::is_infinite`, `f16::is_finite`, + `f16::is_sign_positive`, and `f16::is_sign_negative` are now `const` fns. +- `slice::HalfBitsSliceExt` and `slice::HalfBitsSliceExt` extension traits have been added for + performing efficient reinterpret casts and conversions of slices to and from `[f16]` and + `[bf16]`. These traits will use hardware SIMD conversion instructions when available and the + `use-intrinsics` cargo feature is enabled. +- `vec::HalfBitsVecExt` and `vec::HalfFloatVecExt` extension traits have been added for + performing efficient reinterpret casts to and from `Vec<f16>` and `Vec<bf16>`. These traits + are only available with the `std` cargo feature. +- `prelude` has been added, for easy importing of most common functionality. Currently the + prelude imports `f16`, `bf16`, and the new slice and vec extension traits. +- New associated constants on `f16` type to replace deprecated `consts` module. + +### Fixed +- Software conversion (when not using `use-intrinsics` feature) now matches hardware rounding + by rounding to nearest, ties to even. Fixes [#24], by [@tspiteri]. +- NaN value conversions now behave like `f32` to `f64` conversions, retaining sign. Fixes [#23], + by [@tspiteri]. + +### Changed +- Minimum rustc version bumped to 1.32. +- Runtime target host feature detection is now used if both `std` and `use-intrinsics` features are + enabled and the compile target host does not support required features. +- When `use-intrinsics` feature is enabled, will now always compile and run without error correctly + regardless of compile target options. + +### Deprecated +- `consts` module and all its constants have been deprecated; use the associated constants on `f16` + instead. +- `slice::from_bits` has been deprecated; use `slice::HalfBitsSliceExt::reinterpret_cast` instead. +- `slice::from_bits_mut` has been deprecated; use `slice::HalfBitsSliceExt::reinterpret_cast_mut` + instead. +- `slice::to_bits` has been deprecated; use `slice::HalfFloatSliceExt::reinterpret_cast` instead. +- `slice::to_bits_mut` has been deprecated; use `slice::HalfFloatSliceExt::reinterpret_cast_mut` + instead. +- `vec::from_bits` has been deprecated; use `vec::HalfBitsVecExt::reinterpret_into` instead. +- `vec::to_bits` has been deprecated; use `vec::HalfFloatVecExt::reinterpret_into` instead. + +## [1.3.1] - 2019-10-04 <a name="1.3.1"></a> +### Fixed +- Corrected values of constants `EPSILON`, `MAX_10_EXP`, `MAX_EXP`, `MIN_10_EXP`, and `MIN_EXP` + in `consts` module, as well as setting `consts::NAN` to match value of `f32::NAN` converted to + `f16`. By [@tspiteri]. + +## [1.3.0] - 2018-10-02 <a name="1.3.0"></a> +### Added +- `slice::from_bits_mut` and `slice::to_bits_mut` for conversion between mutable `u16` and `f16` + slices. Fixes [#16], by [@johannesvollmer]. + +## [1.2.0] - 2018-09-03 <a name="1.2.0"></a> +### Added +- `slice` and optional `vec` (only included with `std` feature) modules for conversions between + `u16` and `f16` buffers. Fixes [#14], by [@johannesvollmer]. +- `to_bits` added to replace `as_bits`. Fixes [#12], by [@tspiteri]. +### Fixed +- `serde` optional dependency no longer uses its default `std` feature. +### Deprecated +- `as_bits` has been deprecated; use `to_bits` instead. +- `serialize` cargo feature is deprecated; use `serde` instead. + +## [1.1.2] - 2018-07-12 <a name="1.1.2"></a> +### Fixed +- Fixed compilation error in 1.1.1 on rustc < 1.27, now compiles again on rustc >= 1.10. Fixes + [#11]. + +## [1.1.1] - 2018-06-24 - **Yanked** <a name="1.1.1"></a> +### ***Yanked*** +*Not recommended due to introducing compilation error on rustc versions prior to 1.27.* +### Fixed +- Fix subnormal float conversions when `use-intrinsics` is not enabled. By [@Moongoodboy-K]. + +## [1.1.0] - 2018-03-17 <a name="1.1.0"></a> +### Added +- Made `to_f32` and `to_f64` public. Fixes [#7], by [@PSeitz]. + +## [1.0.2] - 2018-01-12 <a name="1.0.2"></a> +### Changed +- Update behavior of `is_sign_positive` and `is_sign_negative` to match the IEEE754 conforming + behavior of the standard library since Rust 1.20.0. Fixes [#3], by [@tspiteri]. +- Small optimization on `is_nan` and `is_infinite` from [@tspiteri]. +### Fixed +- Fix comparisons of +0 to -0 and comparisons involving negative numbers. Fixes [#2], by + [@tspiteri]. +- Fix loss of sign when converting `f16` and `f32` to `f16`, and case where `f64` NaN could be + converted to `f16` infinity instead of NaN. Fixes [#5], by [@tspiteri]. + +## [1.0.1] - 2017-08-30 <a name="1.0.1"></a> +### Added +- More README documentation. +- Badges and categories in crate metadata. +### Changed +- `serde` dependency updated to 1.0 stable. +- Writing changelog manually. + +## [1.0.0] - 2017-02-03 <a name="1.0.0"></a> +### Added +- Update to `serde` 0.9 and stable Rust 1.15 for `serialize` feature. + +## [0.1.1] - 2017-01-08 <a name="0.1.1"></a> +### Added +- Add `serde` support under new `serialize` feature. +### Changed +- Use `no_std` for crate by default. + +## 0.1.0 - 2016-03-17 <a name="0.1.0"></a> +### Added +- Initial release of `f16` type. + +[#2]: https://github.com/starkat99/half-rs/issues/2 +[#3]: https://github.com/starkat99/half-rs/issues/3 +[#5]: https://github.com/starkat99/half-rs/issues/5 +[#7]: https://github.com/starkat99/half-rs/issues/7 +[#11]: https://github.com/starkat99/half-rs/issues/11 +[#12]: https://github.com/starkat99/half-rs/issues/12 +[#14]: https://github.com/starkat99/half-rs/issues/14 +[#16]: https://github.com/starkat99/half-rs/issues/16 +[#23]: https://github.com/starkat99/half-rs/issues/23 +[#24]: https://github.com/starkat99/half-rs/issues/24 + +[@tspiteri]: https://github.com/tspiteri +[@PSeitz]: https://github.com/PSeitz +[@Moongoodboy-K]: https://github.com/Moongoodboy-K +[@johannesvollmer]: https://github.com/johannesvollmer +[@jfrimmel]: https://github.com/jfrimmel +[@zserik]: https://github.com/zserik +[@bzm3r]: https://github.com/bzm3r + + +[Unreleased]: https://github.com/starkat99/half-rs/compare/v1.6.0...HEAD +[1.6.0]: https://github.com/starkat99/half-rs/compare/v1.5.0...v1.6.0 +[1.5.0]: https://github.com/starkat99/half-rs/compare/v1.4.1...v1.5.0 +[1.4.1]: https://github.com/starkat99/half-rs/compare/v1.4.0...v1.4.1 +[1.4.0]: https://github.com/starkat99/half-rs/compare/v1.3.1...v1.4.0 +[1.3.1]: https://github.com/starkat99/half-rs/compare/v1.3.0...v1.3.1 +[1.3.0]: https://github.com/starkat99/half-rs/compare/v1.2.0...v1.3.0 +[1.2.0]: https://github.com/starkat99/half-rs/compare/v1.1.2...v1.2.0 +[1.1.2]: https://github.com/starkat99/half-rs/compare/v1.1.1...v1.1.2 +[1.1.1]: https://github.com/starkat99/half-rs/compare/v1.1.0...v1.1.1 +[1.1.0]: https://github.com/starkat99/half-rs/compare/v1.0.2...v1.1.0 +[1.0.2]: https://github.com/starkat99/half-rs/compare/v1.0.1...v1.0.2 +[1.0.1]: https://github.com/starkat99/half-rs/compare/v1.0.0...v1.0.1 +[1.0.0]: https://github.com/starkat99/half-rs/compare/v0.1.1...v1.0.0 +[0.1.1]: https://github.com/starkat99/half-rs/compare/v0.1.0...v0.1.1 diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..c122803 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,62 @@ +# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO +# +# When uploading crates to the registry Cargo will automatically +# "normalize" Cargo.toml files for maximal compatibility +# with all versions of Cargo and also rewrite `path` dependencies +# to registry (e.g., crates.io) dependencies +# +# If you believe there's an error in this file please file an +# issue against the rust-lang/cargo repository. If you're +# editing this file be aware that the upstream Cargo.toml +# will likely look very different (and much more reasonable) + +[package] +edition = "2018" +name = "half" +version = "1.6.0" +authors = ["Kathryn Long <squeeself@gmail.com>"] +description = "Half-precision floating point f16 and bf16 types for Rust implementing the IEEE 754-2008 standard binary16 and bfloat16 types." +readme = "README.md" +keywords = ["f16", "bfloat16", "no_std"] +categories = ["no-std", "data-structures", "encoding"] +license = "MIT/Apache-2.0" +repository = "https://github.com/starkat99/half-rs" +[package.metadata.docs.rs] +features = ["std", "serde"] + +[[bench]] +name = "convert" +harness = false +[dependencies.serde] +version = "1.0" +features = ["derive"] +optional = true +default-features = false +[dev-dependencies.criterion] +version = "0.3" + +[dev-dependencies.quickcheck] +version = "0.9" + +[dev-dependencies.quickcheck_macros] +version = "0.9" + +[dev-dependencies.rand] +version = "0.7" + +[dev-dependencies.version-sync] +version = "0.8" + +[features] +alloc = [] +serialize = ["serde"] +std = ["alloc"] +use-intrinsics = [] +[badges.appveyor] +repository = "starkat99/half-rs" + +[badges.maintenance] +status = "passively-maintained" + +[badges.travis-ci] +repository = "starkat99/half-rs" diff --git a/Cargo.toml.orig b/Cargo.toml.orig new file mode 100644 index 0000000..5fa235f --- /dev/null +++ b/Cargo.toml.orig @@ -0,0 +1,42 @@ +[package] +name = "half" +version = "1.6.0" # Remember to keep in sync with html_root_url crate attribute +authors = ["Kathryn Long <squeeself@gmail.com>"] +description = "Half-precision floating point f16 and bf16 types for Rust implementing the IEEE 754-2008 standard binary16 and bfloat16 types." +repository = "https://github.com/starkat99/half-rs" +readme = "README.md" +keywords = ["f16", "bfloat16", "no_std"] +license = "MIT/Apache-2.0" +categories = ["no-std", "data-structures", "encoding"] +edition = "2018" + +[badges] +appveyor = { repository = "starkat99/half-rs" } +travis-ci = { repository = "starkat99/half-rs" } +maintenance = { status = "passively-maintained" } + +[features] +std = ["alloc"] +use-intrinsics = [] +serialize = ["serde"] # Deprecated. Use serde directly. +alloc = [] + +[dependencies.serde] +version = "1.0" +optional = true +default-features = false +features = ["derive"] + +[package.metadata.docs.rs] +features = ["std", "serde"] + +[dev-dependencies] +criterion = "0.3" +quickcheck = "0.9" +quickcheck_macros = "0.9" +rand = "0.7" +version-sync = "0.8" + +[[bench]] +name = "convert" +harness = false diff --git a/README.md b/README.md new file mode 100644 index 0000000..8de9417 --- /dev/null +++ b/README.md @@ -0,0 +1,59 @@ +# `f16` and `bf16` floating point types for Rust +[![Crates.io](https://img.shields.io/crates/v/half.svg)](https://crates.io/crates/half/) [![docs.rs](https://docs.rs/half/badge.svg)](https://docs.rs/half/) [![Build Status](https://travis-ci.org/starkat99/half-rs.svg?branch=master)](https://travis-ci.org/starkat99/half-rs) [![Build status](https://ci.appveyor.com/api/projects/status/bi18aypi3h5r88gs?svg=true)](https://ci.appveyor.com/project/starkat99/half-rs) + +This crate implements a half-precision floating point `f16` type for Rust implementing the IEEE 754-2008 standard +[`binary16`](https://en.wikipedia.org/wiki/Half-precision_floating-point_format) a.k.a `half` format, +as well as a `bf16` type implementing the [`bfloat16`](https://en.wikipedia.org/wiki/Bfloat16_floating-point_format) format. + +## Usage + +The `f16` and `bf16` types provides conversion operations as a normal Rust floating point type, but since they are primarily leveraged for +minimal floating point storage and most major hardware does not implement them, all math operations should be done as an `f32` type. + +This crate provides [`no_std`](https://rust-embedded.github.io/book/intro/no-std.html) support by default so can easily be used in embedded +code where a smaller float format is most useful. + +*Requries Rust 1.32 or greater.* If you need support for older versions of Rust, use versions 1.3 and earlier of this crate. + +See the [crate documentation](https://docs.rs/half/) for more details. + +### Optional Features + +- **`serde`** - Implement `Serialize` and `Deserialize` traits for `f16` and `bf16`. This adds a dependency on the +[`serde`](https://crates.io/crates/serde) crate. + +- **`use-intrinsics`** - Use hardware intrinsics for `f16` and `bf16` conversions if available on the compiler host target. By +default, without this feature, conversions are done only in software, which will be the fallback if the host target does +not have hardware support. **Available only on Rust nightly channel.** + +- **`alloc`** - Enable use of the [`alloc`](https://doc.rust-lang.org/alloc/) crate when not using the `std` library. + + This enables the `vec` module, which contains zero-copy conversions for the `Vec` type. This allows fast conversion between + raw `Vec<u16>` bits and `Vec<f16>` or `Vec<bf16>` arrays, and vice versa. *Requires Rust 1.36 or greater.* + +- **`std`** - Enable features that depend on the Rust `std` library, including everything in the `alloc` feature. + + Enabling the `std` feature enables runtime CPU feature detection when the `use-intrsincis` feature is also enabled. + Without this feature detection, intrinsics are only used when compiler host target supports them. + +### More Documentation + +- [Crate API Reference](https://docs.rs/half/) +- [Latest Changes](CHANGELOG.md) + +## License + +This library is distributed under the terms of either of: + +* MIT license ([LICENSE-MIT](LICENSE-MIT) or +[http://opensource.org/licenses/MIT](http://opensource.org/licenses/MIT)) +* Apache License, Version 2.0 ([LICENSE-APACHE](LICENSE-APACHE) or +[http://www.apache.org/licenses/LICENSE-2.0](http://www.apache.org/licenses/LICENSE-2.0)) + +at your option. + +### Contributing + +Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in the +work by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any +additional terms or conditions. diff --git a/appveyor.yml b/appveyor.yml new file mode 100644 index 0000000..5147453 --- /dev/null +++ b/appveyor.yml @@ -0,0 +1,156 @@ +# Appveyor configuration template for Rust using rustup for Rust installation +# https://github.com/starkat99/appveyor-rust + +branches: + except: + - gh-pages + +## Operating System (VM environment) ## + +# Rust needs at least Visual Studio 2013 Appveyor OS for MSVC targets. +os: Visual Studio 2015 + +## Build Matrix ## + +# This configuration will setup a build for each channel & target combination (12 windows +# combinations in all). +# +# There are 3 channels: stable, beta, and nightly. +# +# Alternatively, the full version may be specified for the channel to build using that specific +# version (e.g. channel: 1.5.0) +# +# The values for target are the set of windows Rust build targets. Each value is of the form +# +# ARCH-pc-windows-TOOLCHAIN +# +# Where ARCH is the target architecture, either x86_64 or i686, and TOOLCHAIN is the linker +# toolchain to use, either msvc or gnu. See https://www.rust-lang.org/downloads.html#win-foot for +# a description of the toolchain differences. +# See https://github.com/rust-lang-nursery/rustup.rs/#toolchain-specification for description of +# toolchains and host triples. +# +# Comment out channel/target combos you do not wish to build in CI. +# +# You may use the `cargoflags` and `RUSTFLAGS` variables to set additional flags for cargo commands +# and rustc, respectively. For instance, you can uncomment the cargoflags lines in the nightly +# channels to enable unstable features when building for nightly. Or you could add additional +# matrix entries to test different combinations of features. +environment: + matrix: + ### MSVC Toolchains ### + + # Stable 64-bit MSVC + - channel: stable + target: x86_64-pc-windows-msvc + # Stable 32-bit MSVC + - channel: stable + target: i686-pc-windows-msvc + # Stable 64-bit MSVC w/ optional features + - channel: stable + target: x86_64-pc-windows-msvc + cargoflags: --features "std serde" + # Stable 64-bit MSVC w/ optional features + - channel: stable + target: x86_64-pc-windows-msvc + cargoflags: --features "alloc" + # Stable 32-bit MSVC w/ optional features + - channel: stable + target: i686-pc-windows-msvc + cargoflags: --features "std serde" + # Stable 32-bit MSVC w/ optional features + - channel: stable + target: i686-pc-windows-msvc + cargoflags: --features "alloc" + # Beta 64-bit MSVC + - channel: beta + target: x86_64-pc-windows-msvc + # Beta 32-bit MSVC + - channel: beta + target: i686-pc-windows-msvc + # Nightly 64-bit MSVC w/ nightly features + - channel: nightly + target: x86_64-pc-windows-msvc + cargoflags: --features "use-intrinsics" + # Nightly 32-bit MSVC w/ nightly features + - channel: nightly + target: i686-pc-windows-msvc + cargoflags: --features "use-intrinsics" + + ### GNU Toolchains ### + + # Stable 64-bit GNU + - channel: stable + target: x86_64-pc-windows-gnu + # Stable 32-bit GNU + - channel: stable + target: i686-pc-windows-gnu + # Stable 64-bit GNU w/ optional features + - channel: stable + target: x86_64-pc-windows-gnu + cargoflags: --features "std serde" + # Stable 64-bit GNU w/ optional features + - channel: stable + target: x86_64-pc-windows-gnu + cargoflags: --features "alloc" + # Stable 32-bit GNU w/ optional features + - channel: stable + target: i686-pc-windows-gnu + cargoflags: --features "std serde" + # Stable 32-bit GNU w/ optional features + - channel: stable + target: i686-pc-windows-gnu + cargoflags: --features "alloc" + # Beta 64-bit GNU + - channel: beta + target: x86_64-pc-windows-gnu + # Beta 32-bit GNU + - channel: beta + target: i686-pc-windows-gnu + # Nightly 64-bit GNU w/ nightly features + - channel: nightly + target: x86_64-pc-windows-gnu + cargoflags: --features "use-intrinsics" + # Nightly 32-bit GNU w/ nightly features + - channel: nightly + target: i686-pc-windows-gnu + cargoflags: --features "use-intrinsics" + +### Allowed failures ### + +# See Appveyor documentation for specific details. In short, place any channel or targets you wish +# to allow build failures on (usually nightly at least is a wise choice). This will prevent a build +# or test failure in the matching channels/targets from failing the entire build. +#matrix: +# allow_failures: +# - channel: nightly + +# If you only care about stable channel build failures, uncomment the following line: +#- channel: beta + +## Install Script ## + +# This is the most important part of the Appveyor configuration. This installs the version of Rust +# specified by the 'channel' and 'target' environment variables from the build matrix. This uses +# rustup to install Rust. +# +# For simple configurations, instead of using the build matrix, you can simply set the +# default-toolchain and default-host manually here. +install: + - appveyor DownloadFile https://win.rustup.rs/ -FileName rustup-init.exe + - rustup-init -yv --default-toolchain %channel% --default-host %target% + - set PATH=%PATH%;%USERPROFILE%\.cargo\bin + - rustc -vV + - cargo -vV + +## Build Script ## + +# 'cargo test' takes care of building for us, so disable Appveyor's build stage. This prevents +# the "directory does not contain a project or solution file" error. +build: false + +# Uses 'cargo test' to run tests and build. Alternatively, the project may call compiled programs +#directly or perform other testing commands. Rust will automatically be placed in the PATH +# environment variable. +test_script: + - cargo test --verbose %cargoflags% diff --git a/benches/convert.rs b/benches/convert.rs new file mode 100644 index 0000000..4801c8d --- /dev/null +++ b/benches/convert.rs @@ -0,0 +1,319 @@ +use criterion::{criterion_group, criterion_main, Bencher, Criterion}; +use half::prelude::*; +use std::{f32, f64, iter}; + +const SIMD_LARGE_BENCH_SLICE_LEN: usize = 1024; + +fn bench_f32_to_f16(c: &mut Criterion) { + c.bench_function_over_inputs( + "f16::from_f32", + |b: &mut Bencher<'_>, i: &f32| b.iter(|| f16::from_f32(*i)), + vec![ + 0., + -0., + 1., + f32::MIN, + f32::MAX, + f32::MIN_POSITIVE, + f32::NEG_INFINITY, + f32::INFINITY, + f32::NAN, + f32::consts::E, + f32::consts::PI, + ], + ); +} + +fn bench_f64_to_f16(c: &mut Criterion) { + c.bench_function_over_inputs( + "f16::from_f64", + |b: &mut Bencher<'_>, i: &f64| b.iter(|| f16::from_f64(*i)), + vec![ + 0., + -0., + 1., + f64::MIN, + f64::MAX, + f64::MIN_POSITIVE, + f64::NEG_INFINITY, + f64::INFINITY, + f64::NAN, + f64::consts::E, + f64::consts::PI, + ], + ); +} + +fn bench_f16_to_f32(c: &mut Criterion) { + c.bench_function_over_inputs( + "f16::to_f32", + |b: &mut Bencher<'_>, i: &f16| b.iter(|| i.to_f32()), + vec![ + f16::ZERO, + f16::NEG_ZERO, + f16::ONE, + f16::MIN, + f16::MAX, + f16::MIN_POSITIVE, + f16::NEG_INFINITY, + f16::INFINITY, + f16::NAN, + f16::E, + f16::PI, + ], + ); +} + +fn bench_f16_to_f64(c: &mut Criterion) { + c.bench_function_over_inputs( + "f16::to_f64", + |b: &mut Bencher<'_>, i: &f16| b.iter(|| i.to_f64()), + vec![ + f16::ZERO, + f16::NEG_ZERO, + f16::ONE, + f16::MIN, + f16::MAX, + f16::MIN_POSITIVE, + f16::NEG_INFINITY, + f16::INFINITY, + f16::NAN, + f16::E, + f16::PI, + ], + ); +} + +criterion_group!( + f16_sisd, + bench_f32_to_f16, + bench_f64_to_f16, + bench_f16_to_f32, + bench_f16_to_f64 +); + +fn bench_slice_f32_to_f16(c: &mut Criterion) { + let mut constant_buffer = [f16::ZERO; 11]; + let constants = [ + 0., + -0., + 1., + f32::MIN, + f32::MAX, + f32::MIN_POSITIVE, + f32::NEG_INFINITY, + f32::INFINITY, + f32::NAN, + f32::consts::E, + f32::consts::PI, + ]; + c.bench_function( + "HalfFloatSliceExt::convert_from_f32_slice/constants", + |b: &mut Bencher<'_>| b.iter(|| constant_buffer.convert_from_f32_slice(&constants)), + ); + + let large: Vec<_> = iter::repeat(0) + .enumerate() + .map(|(i, _)| i as f32) + .take(SIMD_LARGE_BENCH_SLICE_LEN) + .collect(); + let mut large_buffer = [f16::ZERO; SIMD_LARGE_BENCH_SLICE_LEN]; + c.bench_function( + "HalfFloatSliceExt::convert_from_f32_slice/large", + |b: &mut Bencher<'_>| b.iter(|| large_buffer.convert_from_f32_slice(&large)), + ); +} + +fn bench_slice_f64_to_f16(c: &mut Criterion) { + let mut constant_buffer = [f16::ZERO; 11]; + let constants = [ + 0., + -0., + 1., + f64::MIN, + f64::MAX, + f64::MIN_POSITIVE, + f64::NEG_INFINITY, + f64::INFINITY, + f64::NAN, + f64::consts::E, + f64::consts::PI, + ]; + c.bench_function( + "HalfFloatSliceExt::convert_from_f64_slice/constants", + |b: &mut Bencher<'_>| b.iter(|| constant_buffer.convert_from_f64_slice(&constants)), + ); + + let large: Vec<_> = iter::repeat(0) + .enumerate() + .map(|(i, _)| i as f64) + .take(SIMD_LARGE_BENCH_SLICE_LEN) + .collect(); + let mut large_buffer = [f16::ZERO; SIMD_LARGE_BENCH_SLICE_LEN]; + c.bench_function( + "HalfFloatSliceExt::convert_from_f64_slice/large", + |b: &mut Bencher<'_>| b.iter(|| large_buffer.convert_from_f64_slice(&large)), + ); +} + +fn bench_slice_f16_to_f32(c: &mut Criterion) { + let mut constant_buffer = [0f32; 11]; + let constants = [ + f16::ZERO, + f16::NEG_ZERO, + f16::ONE, + f16::MIN, + f16::MAX, + f16::MIN_POSITIVE, + f16::NEG_INFINITY, + f16::INFINITY, + f16::NAN, + f16::E, + f16::PI, + ]; + c.bench_function( + "HalfFloatSliceExt::convert_to_f32_slice/constants", + |b: &mut Bencher<'_>| b.iter(|| constants.convert_to_f32_slice(&mut constant_buffer)), + ); + + let large: Vec<_> = iter::repeat(0) + .enumerate() + .map(|(i, _)| f16::from_f32(i as f32)) + .take(SIMD_LARGE_BENCH_SLICE_LEN) + .collect(); + let mut large_buffer = [0f32; SIMD_LARGE_BENCH_SLICE_LEN]; + c.bench_function( + "HalfFloatSliceExt::convert_to_f32_slice/large", + |b: &mut Bencher<'_>| b.iter(|| large.convert_to_f32_slice(&mut large_buffer)), + ); +} + +fn bench_slice_f16_to_f64(c: &mut Criterion) { + let mut constant_buffer = [0f64; 11]; + let constants = [ + f16::ZERO, + f16::NEG_ZERO, + f16::ONE, + f16::MIN, + f16::MAX, + f16::MIN_POSITIVE, + f16::NEG_INFINITY, + f16::INFINITY, + f16::NAN, + f16::E, + f16::PI, + ]; + c.bench_function( + "HalfFloatSliceExt::convert_to_f64_slice/constants", + |b: &mut Bencher<'_>| b.iter(|| constants.convert_to_f64_slice(&mut constant_buffer)), + ); + + let large: Vec<_> = iter::repeat(0) + .enumerate() + .map(|(i, _)| f16::from_f64(i as f64)) + .take(SIMD_LARGE_BENCH_SLICE_LEN) + .collect(); + let mut large_buffer = [0f64; SIMD_LARGE_BENCH_SLICE_LEN]; + c.bench_function( + "HalfFloatSliceExt::convert_to_f64_slice/large", + |b: &mut Bencher<'_>| b.iter(|| large.convert_to_f64_slice(&mut large_buffer)), + ); +} + +criterion_group!( + f16_simd, + bench_slice_f32_to_f16, + bench_slice_f64_to_f16, + bench_slice_f16_to_f32, + bench_slice_f16_to_f64 +); + +fn bench_f32_to_bf16(c: &mut Criterion) { + c.bench_function_over_inputs( + "bf16::from_f32", + |b: &mut Bencher<'_>, i: &f32| b.iter(|| bf16::from_f32(*i)), + vec![ + 0., + -0., + 1., + f32::MIN, + f32::MAX, + f32::MIN_POSITIVE, + f32::NEG_INFINITY, + f32::INFINITY, + f32::NAN, + f32::consts::E, + f32::consts::PI, + ], + ); +} + +fn bench_f64_to_bf16(c: &mut Criterion) { + c.bench_function_over_inputs( + "bf16::from_f64", + |b: &mut Bencher<'_>, i: &f64| b.iter(|| bf16::from_f64(*i)), + vec![ + 0., + -0., + 1., + f64::MIN, + f64::MAX, + f64::MIN_POSITIVE, + f64::NEG_INFINITY, + f64::INFINITY, + f64::NAN, + f64::consts::E, + f64::consts::PI, + ], + ); +} + +fn bench_bf16_to_f32(c: &mut Criterion) { + c.bench_function_over_inputs( + "bf16::to_f32", + |b: &mut Bencher<'_>, i: &bf16| b.iter(|| i.to_f32()), + vec![ + bf16::ZERO, + bf16::NEG_ZERO, + bf16::ONE, + bf16::MIN, + bf16::MAX, + bf16::MIN_POSITIVE, + bf16::NEG_INFINITY, + bf16::INFINITY, + bf16::NAN, + bf16::E, + bf16::PI, + ], + ); +} + +fn bench_bf16_to_f64(c: &mut Criterion) { + c.bench_function_over_inputs( + "bf16::to_f64", + |b: &mut Bencher<'_>, i: &bf16| b.iter(|| i.to_f64()), + vec![ + bf16::ZERO, + bf16::NEG_ZERO, + bf16::ONE, + bf16::MIN, + bf16::MAX, + bf16::MIN_POSITIVE, + bf16::NEG_INFINITY, + bf16::INFINITY, + bf16::NAN, + bf16::E, + bf16::PI, + ], + ); +} + +criterion_group!( + bf16_sisd, + bench_f32_to_bf16, + bench_f64_to_bf16, + bench_bf16_to_f32, + bench_bf16_to_f64 +); + +criterion_main!(f16_sisd, bf16_sisd, f16_simd); diff --git a/src/bfloat.rs b/src/bfloat.rs new file mode 100644 index 0000000..09ad035 --- /dev/null +++ b/src/bfloat.rs @@ -0,0 +1,1103 @@ +#[cfg(feature = "serde")] +use serde::{Deserialize, Serialize}; + +use core::{ + cmp::Ordering, + fmt::{Debug, Display, Error, Formatter, LowerExp, UpperExp}, + num::{FpCategory, ParseFloatError}, + str::FromStr, +}; + +pub(crate) mod convert; + +/// A 16-bit floating point type implementing the [`bfloat16`] format. +/// +/// The [`bfloat16`] floating point format is a truncated 16-bit version of the IEEE 754 standard +/// `binary32`, a.k.a `f32`. [`bf16`] has approximately the same dynamic range as `f32` by having +/// a lower precision than [`f16`]. While [`f16`] has a precision of 11 bits, [`bf16`] has a +/// precision of only 8 bits. +/// +/// Like [`f16`], [`bf16`] does not offer arithmetic operations as it is intended for compact +/// storage rather than calculations. Operations should be performed with `f32` or higher-precision +/// types and converted to/from [`bf16`] as necessary. +/// +/// [`bfloat16`]: https://en.wikipedia.org/wiki/Bfloat16_floating-point_format +/// [`bf16`]: struct.bf16.html +/// [`f16`]: struct.f16.html +#[allow(non_camel_case_types)] +#[derive(Clone, Copy, Default)] +#[repr(transparent)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub struct bf16(u16); + +impl bf16 { + /// Constructs a [`bf16`](struct.bf16.html) value from the raw bits. + #[inline] + pub const fn from_bits(bits: u16) -> bf16 { + bf16(bits) + } + + /// Constructs a [`bf16`](struct.bf16.html) value from a 32-bit floating point value. + /// + /// If the 32-bit value is too large to fit, ±∞ will result. NaN values are preserved. + /// Subnormal values that are too tiny to be represented will result in ±0. All other values + /// are truncated and rounded to the nearest representable value. + #[inline] + pub fn from_f32(value: f32) -> bf16 { + bf16(convert::f32_to_bf16(value)) + } + + /// Constructs a [`bf16`](struct.bf16.html) value from a 64-bit floating point value. + /// + /// If the 64-bit value is to large to fit, ±∞ will result. NaN values are preserved. + /// 64-bit subnormal values are too tiny to be represented and result in ±0. Exponents that + /// underflow the minimum exponent will result in subnormals or ±0. All other values are + /// truncated and rounded to the nearest representable value. + #[inline] + pub fn from_f64(value: f64) -> bf16 { + bf16(convert::f64_to_bf16(value)) + } + + /// Converts a [`bf16`](struct.bf16.html) into the underlying bit representation. + #[inline] + pub const fn to_bits(self) -> u16 { + self.0 + } + + /// Return the memory representation of the underlying bit representation as a byte array in + /// little-endian byte order. + /// + /// # Examples + /// + /// ```rust + /// # use half::prelude::*; + /// let bytes = bf16::from_f32(12.5).to_le_bytes(); + /// assert_eq!(bytes, [0x48, 0x41]); + /// ``` + #[inline] + pub fn to_le_bytes(self) -> [u8; 2] { + self.0.to_le_bytes() + } + + /// Return the memory representation of the underlying bit representation as a byte array in + /// big-endian (network) byte order. + /// + /// # Examples + /// + /// ```rust + /// # use half::prelude::*; + /// let bytes = bf16::from_f32(12.5).to_be_bytes(); + /// assert_eq!(bytes, [0x41, 0x48]); + /// ``` + #[inline] + pub fn to_be_bytes(self) -> [u8; 2] { + self.0.to_be_bytes() + } + + /// Return the memory representation of the underlying bit representation as a byte array in + /// native byte order. + /// + /// As the target platform's native endianness is used, portable code should use `to_be_bytes` + /// or `to_le_bytes`, as appropriate, instead. + /// + /// # Examples + /// + /// ```rust + /// # use half::prelude::*; + /// let bytes = bf16::from_f32(12.5).to_ne_bytes(); + /// assert_eq!(bytes, if cfg!(target_endian = "big") { + /// [0x41, 0x48] + /// } else { + /// [0x48, 0x41] + /// }); + /// ``` + #[inline] + pub fn to_ne_bytes(self) -> [u8; 2] { + self.0.to_ne_bytes() + } + + /// Create a floating point value from its representation as a byte array in little endian. + /// + /// # Examples + /// + /// ```rust + /// # use half::prelude::*; + /// let value = bf16::from_le_bytes([0x48, 0x41]); + /// assert_eq!(value, bf16::from_f32(12.5)); + /// ``` + #[inline] + pub fn from_le_bytes(bytes: [u8; 2]) -> bf16 { + bf16::from_bits(u16::from_le_bytes(bytes)) + } + + /// Create a floating point value from its representation as a byte array in big endian. + /// + /// # Examples + /// + /// ```rust + /// # use half::prelude::*; + /// let value = bf16::from_be_bytes([0x41, 0x48]); + /// assert_eq!(value, bf16::from_f32(12.5)); + /// ``` + #[inline] + pub fn from_be_bytes(bytes: [u8; 2]) -> bf16 { + bf16::from_bits(u16::from_be_bytes(bytes)) + } + + /// Create a floating point value from its representation as a byte array in native endian. + /// + /// As the target platform's native endianness is used, portable code likely wants to use + /// `from_be_bytes` or `from_le_bytes`, as appropriate instead. + /// + /// # Examples + /// + /// ```rust + /// # use half::prelude::*; + /// let value = bf16::from_ne_bytes(if cfg!(target_endian = "big") { + /// [0x41, 0x48] + /// } else { + /// [0x48, 0x41] + /// }); + /// assert_eq!(value, bf16::from_f32(12.5)); + /// ``` + #[inline] + pub fn from_ne_bytes(bytes: [u8; 2]) -> bf16 { + bf16::from_bits(u16::from_ne_bytes(bytes)) + } + + /// Converts a [`bf16`](struct.bf16.html) value into an `f32` value. + /// + /// This conversion is lossless as all values can be represented exactly in `f32`. + #[inline] + pub fn to_f32(self) -> f32 { + convert::bf16_to_f32(self.0) + } + + /// Converts a [`bf16`](struct.bf16.html) value into an `f64` value. + /// + /// This conversion is lossless as all values can be represented exactly in `f64`. + #[inline] + pub fn to_f64(self) -> f64 { + convert::bf16_to_f64(self.0) + } + + /// Returns `true` if this value is NaN and `false` otherwise. + /// + /// # Examples + /// + /// ```rust + /// # use half::prelude::*; + /// + /// let nan = bf16::NAN; + /// let f = bf16::from_f32(7.0_f32); + /// + /// assert!(nan.is_nan()); + /// assert!(!f.is_nan()); + /// ``` + #[inline] + pub const fn is_nan(self) -> bool { + self.0 & 0x7FFFu16 > 0x7F80u16 + } + + /// Returns `true` if this value is ±∞ and `false` otherwise. + /// + /// # Examples + /// + /// ```rust + /// # use half::prelude::*; + /// + /// let f = bf16::from_f32(7.0f32); + /// let inf = bf16::INFINITY; + /// let neg_inf = bf16::NEG_INFINITY; + /// let nan = bf16::NAN; + /// + /// assert!(!f.is_infinite()); + /// assert!(!nan.is_infinite()); + /// + /// assert!(inf.is_infinite()); + /// assert!(neg_inf.is_infinite()); + /// ``` + #[inline] + pub const fn is_infinite(self) -> bool { + self.0 & 0x7FFFu16 == 0x7F80u16 + } + + /// Returns `true` if this number is neither infinite nor NaN. + /// + /// # Examples + /// + /// ```rust + /// # use half::prelude::*; + /// + /// let f = bf16::from_f32(7.0f32); + /// let inf = bf16::INFINITY; + /// let neg_inf = bf16::NEG_INFINITY; + /// let nan = bf16::NAN; + /// + /// assert!(f.is_finite()); + /// + /// assert!(!nan.is_finite()); + /// assert!(!inf.is_finite()); + /// assert!(!neg_inf.is_finite()); + /// ``` + #[inline] + pub const fn is_finite(self) -> bool { + self.0 & 0x7F80u16 != 0x7F80u16 + } + + /// Returns `true` if the number is neither zero, infinite, subnormal, or NaN. + /// + /// # Examples + /// + /// ```rust + /// # use half::prelude::*; + /// + /// let min = bf16::MIN_POSITIVE; + /// let max = bf16::MAX; + /// let lower_than_min = bf16::from_f32(1.0e-39_f32); + /// let zero = bf16::from_f32(0.0_f32); + /// + /// assert!(min.is_normal()); + /// assert!(max.is_normal()); + /// + /// assert!(!zero.is_normal()); + /// assert!(!bf16::NAN.is_normal()); + /// assert!(!bf16::INFINITY.is_normal()); + /// // Values between 0 and `min` are subnormal. + /// assert!(!lower_than_min.is_normal()); + /// ``` + #[inline] + pub fn is_normal(self) -> bool { + let exp = self.0 & 0x7F80u16; + exp != 0x7F80u16 && exp != 0 + } + + /// Returns the floating point category of the number. + /// + /// If only one property is going to be tested, it is generally faster to use the specific + /// predicate instead. + /// + /// # Examples + /// + /// ```rust + /// use std::num::FpCategory; + /// # use half::prelude::*; + /// + /// let num = bf16::from_f32(12.4_f32); + /// let inf = bf16::INFINITY; + /// + /// assert_eq!(num.classify(), FpCategory::Normal); + /// assert_eq!(inf.classify(), FpCategory::Infinite); + /// ``` + pub fn classify(self) -> FpCategory { + let exp = self.0 & 0x7F80u16; + let man = self.0 & 0x007Fu16; + match (exp, man) { + (0, 0) => FpCategory::Zero, + (0, _) => FpCategory::Subnormal, + (0x7F80u16, 0) => FpCategory::Infinite, + (0x7F80u16, _) => FpCategory::Nan, + _ => FpCategory::Normal, + } + } + + /// Returns a number that represents the sign of `self`. + /// + /// * 1.0 if the number is positive, +0.0 or `INFINITY` + /// * −1.0 if the number is negative, −0.0` or `NEG_INFINITY` + /// * NaN if the number is NaN + /// + /// # Examples + /// + /// ```rust + /// # use half::prelude::*; + /// + /// let f = bf16::from_f32(3.5_f32); + /// + /// assert_eq!(f.signum(), bf16::from_f32(1.0)); + /// assert_eq!(bf16::NEG_INFINITY.signum(), bf16::from_f32(-1.0)); + /// + /// assert!(bf16::NAN.signum().is_nan()); + /// ``` + pub fn signum(self) -> bf16 { + if self.is_nan() { + self + } else if self.0 & 0x8000u16 != 0 { + bf16::from_f32(-1.0) + } else { + bf16::from_f32(1.0) + } + } + + /// Returns `true` if and only if `self` has a positive sign, including +0.0, NaNs with a + /// positive sign bit and +∞. + /// + /// # Examples + /// + /// ```rust + /// # use half::prelude::*; + /// + /// let nan = bf16::NAN; + /// let f = bf16::from_f32(7.0_f32); + /// let g = bf16::from_f32(-7.0_f32); + /// + /// assert!(f.is_sign_positive()); + /// assert!(!g.is_sign_positive()); + /// // NaN can be either positive or negative + /// assert!(nan.is_sign_positive() != nan.is_sign_negative()); + /// ``` + #[inline] + pub const fn is_sign_positive(self) -> bool { + self.0 & 0x8000u16 == 0 + } + + /// Returns `true` if and only if `self` has a negative sign, including −0.0, NaNs with a + /// negative sign bit and −∞. + /// + /// # Examples + /// + /// ```rust + /// # use half::prelude::*; + /// + /// let nan = bf16::NAN; + /// let f = bf16::from_f32(7.0f32); + /// let g = bf16::from_f32(-7.0f32); + /// + /// assert!(!f.is_sign_negative()); + /// assert!(g.is_sign_negative()); + /// // NaN can be either positive or negative + /// assert!(nan.is_sign_positive() != nan.is_sign_negative()); + /// ``` + #[inline] + pub const fn is_sign_negative(self) -> bool { + self.0 & 0x8000u16 != 0 + } + + /// Approximate number of [`bf16`](struct.bf16.html) significant digits in base 10. + pub const DIGITS: u32 = 2; + /// [`bf16`](struct.bf16.html) + /// [machine epsilon](https://en.wikipedia.org/wiki/Machine_epsilon) value. + /// + /// This is the difference between 1.0 and the next largest representable number. + pub const EPSILON: bf16 = bf16(0x3C00u16); + /// [`bf16`](struct.bf16.html) positive Infinity (+∞). + pub const INFINITY: bf16 = bf16(0x7F80u16); + /// Number of [`bf16`](struct.bf16.html) significant digits in base 2. + pub const MANTISSA_DIGITS: u32 = 8; + /// Largest finite [`bf16`](struct.bf16.html) value. + pub const MAX: bf16 = bf16(0x7F7F); + /// Maximum possible [`bf16`](struct.bf16.html) power of 10 exponent. + pub const MAX_10_EXP: i32 = 38; + /// Maximum possible [`bf16`](struct.bf16.html) power of 2 exponent. + pub const MAX_EXP: i32 = 128; + /// Smallest finite [`bf16`](struct.bf16.html) value. + pub const MIN: bf16 = bf16(0xFF7F); + /// Minimum possible normal [`bf16`](struct.bf16.html) power of 10 exponent. + pub const MIN_10_EXP: i32 = -37; + /// One greater than the minimum possible normal [`bf16`](struct.bf16.html) power of 2 exponent. + pub const MIN_EXP: i32 = -125; + /// Smallest positive normal [`bf16`](struct.bf16.html) value. + pub const MIN_POSITIVE: bf16 = bf16(0x0080u16); + /// [`bf16`](struct.bf16.html) Not a Number (NaN). + pub const NAN: bf16 = bf16(0x7FC0u16); + /// [`bf16`](struct.bf16.html) negative infinity (-∞). + pub const NEG_INFINITY: bf16 = bf16(0xFF80u16); + /// The radix or base of the internal representation of [`bf16`](struct.bf16.html). + pub const RADIX: u32 = 2; + + /// Minimum positive subnormal [`bf16`](struct.bf16.html) value. + pub const MIN_POSITIVE_SUBNORMAL: bf16 = bf16(0x0001u16); + /// Maximum subnormal [`bf16`](struct.bf16.html) value. + pub const MAX_SUBNORMAL: bf16 = bf16(0x007Fu16); + + /// [`bf16`](struct.bf16.html) 1 + pub const ONE: bf16 = bf16(0x3F80u16); + /// [`bf16`](struct.bf16.html) 0 + pub const ZERO: bf16 = bf16(0x0000u16); + /// [`bf16`](struct.bf16.html) -0 + pub const NEG_ZERO: bf16 = bf16(0x8000u16); + + /// [`bf16`](struct.bf16.html) Euler's number (ℯ). + pub const E: bf16 = bf16(0x402Eu16); + /// [`bf16`](struct.bf16.html) Archimedes' constant (π). + pub const PI: bf16 = bf16(0x4049u16); + /// [`bf16`](struct.bf16.html) 1/π + pub const FRAC_1_PI: bf16 = bf16(0x3EA3u16); + /// [`bf16`](struct.bf16.html) 1/√2 + pub const FRAC_1_SQRT_2: bf16 = bf16(0x3F35u16); + /// [`bf16`](struct.bf16.html) 2/π + pub const FRAC_2_PI: bf16 = bf16(0x3F23u16); + /// [`bf16`](struct.bf16.html) 2/√π + pub const FRAC_2_SQRT_PI: bf16 = bf16(0x3F90u16); + /// [`bf16`](struct.bf16.html) π/2 + pub const FRAC_PI_2: bf16 = bf16(0x3FC9u16); + /// [`bf16`](struct.bf16.html) π/3 + pub const FRAC_PI_3: bf16 = bf16(0x3F86u16); + /// [`bf16`](struct.bf16.html) π/4 + pub const FRAC_PI_4: bf16 = bf16(0x3F49u16); + /// [`bf16`](struct.bf16.html) π/6 + pub const FRAC_PI_6: bf16 = bf16(0x3F06u16); + /// [`bf16`](struct.bf16.html) π/8 + pub const FRAC_PI_8: bf16 = bf16(0x3EC9u16); + /// [`bf16`](struct.bf16.html) 𝗅𝗇 10 + pub const LN_10: bf16 = bf16(0x4013u16); + /// [`bf16`](struct.bf16.html) 𝗅𝗇 2 + pub const LN_2: bf16 = bf16(0x3F31u16); + /// [`bf16`](struct.bf16.html) 𝗅𝗈𝗀₁₀ℯ + pub const LOG10_E: bf16 = bf16(0x3EDEu16); + /// [`bf16`](struct.bf16.html) 𝗅𝗈𝗀₁₀2 + pub const LOG10_2: bf16 = bf16(0x3E9Au16); + /// [`bf16`](struct.bf16.html) 𝗅𝗈𝗀₂ℯ + pub const LOG2_E: bf16 = bf16(0x3FB9u16); + /// [`bf16`](struct.bf16.html) 𝗅𝗈𝗀₂10 + pub const LOG2_10: bf16 = bf16(0x4055u16); + /// [`bf16`](struct.bf16.html) √2 + pub const SQRT_2: bf16 = bf16(0x3FB5u16); +} + +impl From<bf16> for f32 { + #[inline] + fn from(x: bf16) -> f32 { + x.to_f32() + } +} + +impl From<bf16> for f64 { + #[inline] + fn from(x: bf16) -> f64 { + x.to_f64() + } +} + +impl From<i8> for bf16 { + #[inline] + fn from(x: i8) -> bf16 { + // Convert to f32, then to bf16 + bf16::from_f32(f32::from(x)) + } +} + +impl From<u8> for bf16 { + #[inline] + fn from(x: u8) -> bf16 { + // Convert to f32, then to f16 + bf16::from_f32(f32::from(x)) + } +} + +impl PartialEq for bf16 { + fn eq(&self, other: &bf16) -> bool { + if self.is_nan() || other.is_nan() { + false + } else { + (self.0 == other.0) || ((self.0 | other.0) & 0x7FFFu16 == 0) + } + } +} + +impl PartialOrd for bf16 { + fn partial_cmp(&self, other: &bf16) -> Option<Ordering> { + if self.is_nan() || other.is_nan() { + None + } else { + let neg = self.0 & 0x8000u16 != 0; + let other_neg = other.0 & 0x8000u16 != 0; + match (neg, other_neg) { + (false, false) => Some(self.0.cmp(&other.0)), + (false, true) => { + if (self.0 | other.0) & 0x7FFFu16 == 0 { + Some(Ordering::Equal) + } else { + Some(Ordering::Greater) + } + } + (true, false) => { + if (self.0 | other.0) & 0x7FFFu16 == 0 { + Some(Ordering::Equal) + } else { + Some(Ordering::Less) + } + } + (true, true) => Some(other.0.cmp(&self.0)), + } + } + } + + fn lt(&self, other: &bf16) -> bool { + if self.is_nan() || other.is_nan() { + false + } else { + let neg = self.0 & 0x8000u16 != 0; + let other_neg = other.0 & 0x8000u16 != 0; + match (neg, other_neg) { + (false, false) => self.0 < other.0, + (false, true) => false, + (true, false) => (self.0 | other.0) & 0x7FFFu16 != 0, + (true, true) => self.0 > other.0, + } + } + } + + fn le(&self, other: &bf16) -> bool { + if self.is_nan() || other.is_nan() { + false + } else { + let neg = self.0 & 0x8000u16 != 0; + let other_neg = other.0 & 0x8000u16 != 0; + match (neg, other_neg) { + (false, false) => self.0 <= other.0, + (false, true) => (self.0 | other.0) & 0x7FFFu16 == 0, + (true, false) => true, + (true, true) => self.0 >= other.0, + } + } + } + + fn gt(&self, other: &bf16) -> bool { + if self.is_nan() || other.is_nan() { + false + } else { + let neg = self.0 & 0x8000u16 != 0; + let other_neg = other.0 & 0x8000u16 != 0; + match (neg, other_neg) { + (false, false) => self.0 > other.0, + (false, true) => (self.0 | other.0) & 0x7FFFu16 != 0, + (true, false) => false, + (true, true) => self.0 < other.0, + } + } + } + + fn ge(&self, other: &bf16) -> bool { + if self.is_nan() || other.is_nan() { + false + } else { + let neg = self.0 & 0x8000u16 != 0; + let other_neg = other.0 & 0x8000u16 != 0; + match (neg, other_neg) { + (false, false) => self.0 >= other.0, + (false, true) => true, + (true, false) => (self.0 | other.0) & 0x7FFFu16 == 0, + (true, true) => self.0 <= other.0, + } + } + } +} + +impl FromStr for bf16 { + type Err = ParseFloatError; + fn from_str(src: &str) -> Result<bf16, ParseFloatError> { + f32::from_str(src).map(bf16::from_f32) + } +} + +impl Debug for bf16 { + fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { + write!(f, "0x{:X}", self.0) + } +} + +impl Display for bf16 { + fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { + write!(f, "{}", self.to_f32()) + } +} + +impl LowerExp for bf16 { + fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { + write!(f, "{:e}", self.to_f32()) + } +} + +impl UpperExp for bf16 { + fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { + write!(f, "{:E}", self.to_f32()) + } +} + +#[allow( + clippy::cognitive_complexity, + clippy::float_cmp, + clippy::neg_cmp_op_on_partial_ord +)] +#[cfg(test)] +mod test { + use super::*; + use core; + use core::cmp::Ordering; + use quickcheck_macros::quickcheck; + + #[test] + fn test_bf16_consts_from_f32() { + let one = bf16::from_f32(1.0); + let zero = bf16::from_f32(0.0); + let neg_zero = bf16::from_f32(-0.0); + let inf = bf16::from_f32(core::f32::INFINITY); + let neg_inf = bf16::from_f32(core::f32::NEG_INFINITY); + let nan = bf16::from_f32(core::f32::NAN); + + assert_eq!(bf16::ONE, one); + assert_eq!(bf16::ZERO, zero); + assert_eq!(bf16::NEG_ZERO, neg_zero); + assert_eq!(bf16::INFINITY, inf); + assert_eq!(bf16::NEG_INFINITY, neg_inf); + assert!(nan.is_nan()); + assert!(bf16::NAN.is_nan()); + + let e = bf16::from_f32(core::f32::consts::E); + let pi = bf16::from_f32(core::f32::consts::PI); + let frac_1_pi = bf16::from_f32(core::f32::consts::FRAC_1_PI); + let frac_1_sqrt_2 = bf16::from_f32(core::f32::consts::FRAC_1_SQRT_2); + let frac_2_pi = bf16::from_f32(core::f32::consts::FRAC_2_PI); + let frac_2_sqrt_pi = bf16::from_f32(core::f32::consts::FRAC_2_SQRT_PI); + let frac_pi_2 = bf16::from_f32(core::f32::consts::FRAC_PI_2); + let frac_pi_3 = bf16::from_f32(core::f32::consts::FRAC_PI_3); + let frac_pi_4 = bf16::from_f32(core::f32::consts::FRAC_PI_4); + let frac_pi_6 = bf16::from_f32(core::f32::consts::FRAC_PI_6); + let frac_pi_8 = bf16::from_f32(core::f32::consts::FRAC_PI_8); + let ln_10 = bf16::from_f32(core::f32::consts::LN_10); + let ln_2 = bf16::from_f32(core::f32::consts::LN_2); + let log10_e = bf16::from_f32(core::f32::consts::LOG10_E); + // core::f32::consts::LOG10_2 requires rustc 1.43.0 + let log10_2 = bf16::from_f32(2f32.log10()); + let log2_e = bf16::from_f32(core::f32::consts::LOG2_E); + // core::f32::consts::LOG2_10 requires rustc 1.43.0 + let log2_10 = bf16::from_f32(10f32.log2()); + let sqrt_2 = bf16::from_f32(core::f32::consts::SQRT_2); + + assert_eq!(bf16::E, e); + assert_eq!(bf16::PI, pi); + assert_eq!(bf16::FRAC_1_PI, frac_1_pi); + assert_eq!(bf16::FRAC_1_SQRT_2, frac_1_sqrt_2); + assert_eq!(bf16::FRAC_2_PI, frac_2_pi); + assert_eq!(bf16::FRAC_2_SQRT_PI, frac_2_sqrt_pi); + assert_eq!(bf16::FRAC_PI_2, frac_pi_2); + assert_eq!(bf16::FRAC_PI_3, frac_pi_3); + assert_eq!(bf16::FRAC_PI_4, frac_pi_4); + assert_eq!(bf16::FRAC_PI_6, frac_pi_6); + assert_eq!(bf16::FRAC_PI_8, frac_pi_8); + assert_eq!(bf16::LN_10, ln_10); + assert_eq!(bf16::LN_2, ln_2); + assert_eq!(bf16::LOG10_E, log10_e); + assert_eq!(bf16::LOG10_2, log10_2); + assert_eq!(bf16::LOG2_E, log2_e); + assert_eq!(bf16::LOG2_10, log2_10); + assert_eq!(bf16::SQRT_2, sqrt_2); + } + + #[test] + fn test_bf16_consts_from_f64() { + let one = bf16::from_f64(1.0); + let zero = bf16::from_f64(0.0); + let neg_zero = bf16::from_f64(-0.0); + let inf = bf16::from_f64(core::f64::INFINITY); + let neg_inf = bf16::from_f64(core::f64::NEG_INFINITY); + let nan = bf16::from_f64(core::f64::NAN); + + assert_eq!(bf16::ONE, one); + assert_eq!(bf16::ZERO, zero); + assert_eq!(bf16::NEG_ZERO, neg_zero); + assert_eq!(bf16::INFINITY, inf); + assert_eq!(bf16::NEG_INFINITY, neg_inf); + assert!(nan.is_nan()); + assert!(bf16::NAN.is_nan()); + + let e = bf16::from_f64(core::f64::consts::E); + let pi = bf16::from_f64(core::f64::consts::PI); + let frac_1_pi = bf16::from_f64(core::f64::consts::FRAC_1_PI); + let frac_1_sqrt_2 = bf16::from_f64(core::f64::consts::FRAC_1_SQRT_2); + let frac_2_pi = bf16::from_f64(core::f64::consts::FRAC_2_PI); + let frac_2_sqrt_pi = bf16::from_f64(core::f64::consts::FRAC_2_SQRT_PI); + let frac_pi_2 = bf16::from_f64(core::f64::consts::FRAC_PI_2); + let frac_pi_3 = bf16::from_f64(core::f64::consts::FRAC_PI_3); + let frac_pi_4 = bf16::from_f64(core::f64::consts::FRAC_PI_4); + let frac_pi_6 = bf16::from_f64(core::f64::consts::FRAC_PI_6); + let frac_pi_8 = bf16::from_f64(core::f64::consts::FRAC_PI_8); + let ln_10 = bf16::from_f64(core::f64::consts::LN_10); + let ln_2 = bf16::from_f64(core::f64::consts::LN_2); + let log10_e = bf16::from_f64(core::f64::consts::LOG10_E); + // core::f64::consts::LOG10_2 requires rustc 1.43.0 + let log10_2 = bf16::from_f64(2f64.log10()); + let log2_e = bf16::from_f64(core::f64::consts::LOG2_E); + // core::f64::consts::LOG2_10 requires rustc 1.43.0 + let log2_10 = bf16::from_f64(10f64.log2()); + let sqrt_2 = bf16::from_f64(core::f64::consts::SQRT_2); + + assert_eq!(bf16::E, e); + assert_eq!(bf16::PI, pi); + assert_eq!(bf16::FRAC_1_PI, frac_1_pi); + assert_eq!(bf16::FRAC_1_SQRT_2, frac_1_sqrt_2); + assert_eq!(bf16::FRAC_2_PI, frac_2_pi); + assert_eq!(bf16::FRAC_2_SQRT_PI, frac_2_sqrt_pi); + assert_eq!(bf16::FRAC_PI_2, frac_pi_2); + assert_eq!(bf16::FRAC_PI_3, frac_pi_3); + assert_eq!(bf16::FRAC_PI_4, frac_pi_4); + assert_eq!(bf16::FRAC_PI_6, frac_pi_6); + assert_eq!(bf16::FRAC_PI_8, frac_pi_8); + assert_eq!(bf16::LN_10, ln_10); + assert_eq!(bf16::LN_2, ln_2); + assert_eq!(bf16::LOG10_E, log10_e); + assert_eq!(bf16::LOG10_2, log10_2); + assert_eq!(bf16::LOG2_E, log2_e); + assert_eq!(bf16::LOG2_10, log2_10); + assert_eq!(bf16::SQRT_2, sqrt_2); + } + + #[test] + fn test_nan_conversion_to_smaller() { + let nan64 = f64::from_bits(0x7FF0_0000_0000_0001u64); + let neg_nan64 = f64::from_bits(0xFFF0_0000_0000_0001u64); + let nan32 = f32::from_bits(0x7F80_0001u32); + let neg_nan32 = f32::from_bits(0xFF80_0001u32); + let nan32_from_64 = nan64 as f32; + let neg_nan32_from_64 = neg_nan64 as f32; + let nan16_from_64 = bf16::from_f64(nan64); + let neg_nan16_from_64 = bf16::from_f64(neg_nan64); + let nan16_from_32 = bf16::from_f32(nan32); + let neg_nan16_from_32 = bf16::from_f32(neg_nan32); + + assert!(nan64.is_nan() && nan64.is_sign_positive()); + assert!(neg_nan64.is_nan() && neg_nan64.is_sign_negative()); + assert!(nan32.is_nan() && nan32.is_sign_positive()); + assert!(neg_nan32.is_nan() && neg_nan32.is_sign_negative()); + assert!(nan32_from_64.is_nan() && nan32_from_64.is_sign_positive()); + assert!(neg_nan32_from_64.is_nan() && neg_nan32_from_64.is_sign_negative()); + assert!(nan16_from_64.is_nan() && nan16_from_64.is_sign_positive()); + assert!(neg_nan16_from_64.is_nan() && neg_nan16_from_64.is_sign_negative()); + assert!(nan16_from_32.is_nan() && nan16_from_32.is_sign_positive()); + assert!(neg_nan16_from_32.is_nan() && neg_nan16_from_32.is_sign_negative()); + } + + #[test] + fn test_nan_conversion_to_larger() { + let nan16 = bf16::from_bits(0x7F81u16); + let neg_nan16 = bf16::from_bits(0xFF81u16); + let nan32 = f32::from_bits(0x7F80_0001u32); + let neg_nan32 = f32::from_bits(0xFF80_0001u32); + let nan32_from_16 = f32::from(nan16); + let neg_nan32_from_16 = f32::from(neg_nan16); + let nan64_from_16 = f64::from(nan16); + let neg_nan64_from_16 = f64::from(neg_nan16); + let nan64_from_32 = f64::from(nan32); + let neg_nan64_from_32 = f64::from(neg_nan32); + + assert!(nan16.is_nan() && nan16.is_sign_positive()); + assert!(neg_nan16.is_nan() && neg_nan16.is_sign_negative()); + assert!(nan32.is_nan() && nan32.is_sign_positive()); + assert!(neg_nan32.is_nan() && neg_nan32.is_sign_negative()); + assert!(nan32_from_16.is_nan() && nan32_from_16.is_sign_positive()); + assert!(neg_nan32_from_16.is_nan() && neg_nan32_from_16.is_sign_negative()); + assert!(nan64_from_16.is_nan() && nan64_from_16.is_sign_positive()); + assert!(neg_nan64_from_16.is_nan() && neg_nan64_from_16.is_sign_negative()); + assert!(nan64_from_32.is_nan() && nan64_from_32.is_sign_positive()); + assert!(neg_nan64_from_32.is_nan() && neg_nan64_from_32.is_sign_negative()); + } + + #[test] + fn test_bf16_to_f32() { + let f = bf16::from_f32(7.0); + assert_eq!(f.to_f32(), 7.0f32); + + // 7.1 is NOT exactly representable in 16-bit, it's rounded + let f = bf16::from_f32(7.1); + let diff = (f.to_f32() - 7.1f32).abs(); + // diff must be <= 4 * EPSILON, as 7 has two more significant bits than 1 + assert!(diff <= 4.0 * bf16::EPSILON.to_f32()); + + let tiny32 = f32::from_bits(0x0001_0000u32); + assert_eq!(bf16::from_bits(0x0001).to_f32(), tiny32); + assert_eq!(bf16::from_bits(0x0005).to_f32(), 5.0 * tiny32); + + assert_eq!(bf16::from_bits(0x0001), bf16::from_f32(tiny32)); + assert_eq!(bf16::from_bits(0x0005), bf16::from_f32(5.0 * tiny32)); + } + + #[test] + fn test_bf16_to_f64() { + let f = bf16::from_f64(7.0); + assert_eq!(f.to_f64(), 7.0f64); + + // 7.1 is NOT exactly representable in 16-bit, it's rounded + let f = bf16::from_f64(7.1); + let diff = (f.to_f64() - 7.1f64).abs(); + // diff must be <= 4 * EPSILON, as 7 has two more significant bits than 1 + assert!(diff <= 4.0 * bf16::EPSILON.to_f64()); + + let tiny64 = 2.0f64.powi(-133); + assert_eq!(bf16::from_bits(0x0001).to_f64(), tiny64); + assert_eq!(bf16::from_bits(0x0005).to_f64(), 5.0 * tiny64); + + assert_eq!(bf16::from_bits(0x0001), bf16::from_f64(tiny64)); + assert_eq!(bf16::from_bits(0x0005), bf16::from_f64(5.0 * tiny64)); + } + + #[test] + fn test_comparisons() { + let zero = bf16::from_f64(0.0); + let one = bf16::from_f64(1.0); + let neg_zero = bf16::from_f64(-0.0); + let neg_one = bf16::from_f64(-1.0); + + assert_eq!(zero.partial_cmp(&neg_zero), Some(Ordering::Equal)); + assert_eq!(neg_zero.partial_cmp(&zero), Some(Ordering::Equal)); + assert!(zero == neg_zero); + assert!(neg_zero == zero); + assert!(!(zero != neg_zero)); + assert!(!(neg_zero != zero)); + assert!(!(zero < neg_zero)); + assert!(!(neg_zero < zero)); + assert!(zero <= neg_zero); + assert!(neg_zero <= zero); + assert!(!(zero > neg_zero)); + assert!(!(neg_zero > zero)); + assert!(zero >= neg_zero); + assert!(neg_zero >= zero); + + assert_eq!(one.partial_cmp(&neg_zero), Some(Ordering::Greater)); + assert_eq!(neg_zero.partial_cmp(&one), Some(Ordering::Less)); + assert!(!(one == neg_zero)); + assert!(!(neg_zero == one)); + assert!(one != neg_zero); + assert!(neg_zero != one); + assert!(!(one < neg_zero)); + assert!(neg_zero < one); + assert!(!(one <= neg_zero)); + assert!(neg_zero <= one); + assert!(one > neg_zero); + assert!(!(neg_zero > one)); + assert!(one >= neg_zero); + assert!(!(neg_zero >= one)); + + assert_eq!(one.partial_cmp(&neg_one), Some(Ordering::Greater)); + assert_eq!(neg_one.partial_cmp(&one), Some(Ordering::Less)); + assert!(!(one == neg_one)); + assert!(!(neg_one == one)); + assert!(one != neg_one); + assert!(neg_one != one); + assert!(!(one < neg_one)); + assert!(neg_one < one); + assert!(!(one <= neg_one)); + assert!(neg_one <= one); + assert!(one > neg_one); + assert!(!(neg_one > one)); + assert!(one >= neg_one); + assert!(!(neg_one >= one)); + } + + #[test] + #[allow(clippy::erasing_op, clippy::identity_op)] + fn round_to_even_f32() { + // smallest positive subnormal = 0b0.0000_001 * 2^-126 = 2^-133 + let min_sub = bf16::from_bits(1); + let min_sub_f = (-133f32).exp2(); + assert_eq!(bf16::from_f32(min_sub_f).to_bits(), min_sub.to_bits()); + assert_eq!(f32::from(min_sub).to_bits(), min_sub_f.to_bits()); + + // 0.0000000_011111 rounded to 0.0000000 (< tie, no rounding) + // 0.0000000_100000 rounded to 0.0000000 (tie and even, remains at even) + // 0.0000000_100001 rounded to 0.0000001 (> tie, rounds up) + assert_eq!( + bf16::from_f32(min_sub_f * 0.49).to_bits(), + min_sub.to_bits() * 0 + ); + assert_eq!( + bf16::from_f32(min_sub_f * 0.50).to_bits(), + min_sub.to_bits() * 0 + ); + assert_eq!( + bf16::from_f32(min_sub_f * 0.51).to_bits(), + min_sub.to_bits() * 1 + ); + + // 0.0000001_011111 rounded to 0.0000001 (< tie, no rounding) + // 0.0000001_100000 rounded to 0.0000010 (tie and odd, rounds up to even) + // 0.0000001_100001 rounded to 0.0000010 (> tie, rounds up) + assert_eq!( + bf16::from_f32(min_sub_f * 1.49).to_bits(), + min_sub.to_bits() * 1 + ); + assert_eq!( + bf16::from_f32(min_sub_f * 1.50).to_bits(), + min_sub.to_bits() * 2 + ); + assert_eq!( + bf16::from_f32(min_sub_f * 1.51).to_bits(), + min_sub.to_bits() * 2 + ); + + // 0.0000010_011111 rounded to 0.0000010 (< tie, no rounding) + // 0.0000010_100000 rounded to 0.0000010 (tie and even, remains at even) + // 0.0000010_100001 rounded to 0.0000011 (> tie, rounds up) + assert_eq!( + bf16::from_f32(min_sub_f * 2.49).to_bits(), + min_sub.to_bits() * 2 + ); + assert_eq!( + bf16::from_f32(min_sub_f * 2.50).to_bits(), + min_sub.to_bits() * 2 + ); + assert_eq!( + bf16::from_f32(min_sub_f * 2.51).to_bits(), + min_sub.to_bits() * 3 + ); + + assert_eq!( + bf16::from_f32(250.49f32).to_bits(), + bf16::from_f32(250.0).to_bits() + ); + assert_eq!( + bf16::from_f32(250.50f32).to_bits(), + bf16::from_f32(250.0).to_bits() + ); + assert_eq!( + bf16::from_f32(250.51f32).to_bits(), + bf16::from_f32(251.0).to_bits() + ); + assert_eq!( + bf16::from_f32(251.49f32).to_bits(), + bf16::from_f32(251.0).to_bits() + ); + assert_eq!( + bf16::from_f32(251.50f32).to_bits(), + bf16::from_f32(252.0).to_bits() + ); + assert_eq!( + bf16::from_f32(251.51f32).to_bits(), + bf16::from_f32(252.0).to_bits() + ); + assert_eq!( + bf16::from_f32(252.49f32).to_bits(), + bf16::from_f32(252.0).to_bits() + ); + assert_eq!( + bf16::from_f32(252.50f32).to_bits(), + bf16::from_f32(252.0).to_bits() + ); + assert_eq!( + bf16::from_f32(252.51f32).to_bits(), + bf16::from_f32(253.0).to_bits() + ); + } + + #[test] + #[allow(clippy::erasing_op, clippy::identity_op)] + fn round_to_even_f64() { + // smallest positive subnormal = 0b0.0000_001 * 2^-126 = 2^-133 + let min_sub = bf16::from_bits(1); + let min_sub_f = (-133f64).exp2(); + assert_eq!(bf16::from_f64(min_sub_f).to_bits(), min_sub.to_bits()); + assert_eq!(f64::from(min_sub).to_bits(), min_sub_f.to_bits()); + + // 0.0000000_011111 rounded to 0.0000000 (< tie, no rounding) + // 0.0000000_100000 rounded to 0.0000000 (tie and even, remains at even) + // 0.0000000_100001 rounded to 0.0000001 (> tie, rounds up) + assert_eq!( + bf16::from_f64(min_sub_f * 0.49).to_bits(), + min_sub.to_bits() * 0 + ); + assert_eq!( + bf16::from_f64(min_sub_f * 0.50).to_bits(), + min_sub.to_bits() * 0 + ); + assert_eq!( + bf16::from_f64(min_sub_f * 0.51).to_bits(), + min_sub.to_bits() * 1 + ); + + // 0.0000001_011111 rounded to 0.0000001 (< tie, no rounding) + // 0.0000001_100000 rounded to 0.0000010 (tie and odd, rounds up to even) + // 0.0000001_100001 rounded to 0.0000010 (> tie, rounds up) + assert_eq!( + bf16::from_f64(min_sub_f * 1.49).to_bits(), + min_sub.to_bits() * 1 + ); + assert_eq!( + bf16::from_f64(min_sub_f * 1.50).to_bits(), + min_sub.to_bits() * 2 + ); + assert_eq!( + bf16::from_f64(min_sub_f * 1.51).to_bits(), + min_sub.to_bits() * 2 + ); + + // 0.0000010_011111 rounded to 0.0000010 (< tie, no rounding) + // 0.0000010_100000 rounded to 0.0000010 (tie and even, remains at even) + // 0.0000010_100001 rounded to 0.0000011 (> tie, rounds up) + assert_eq!( + bf16::from_f64(min_sub_f * 2.49).to_bits(), + min_sub.to_bits() * 2 + ); + assert_eq!( + bf16::from_f64(min_sub_f * 2.50).to_bits(), + min_sub.to_bits() * 2 + ); + assert_eq!( + bf16::from_f64(min_sub_f * 2.51).to_bits(), + min_sub.to_bits() * 3 + ); + + assert_eq!( + bf16::from_f64(250.49f64).to_bits(), + bf16::from_f64(250.0).to_bits() + ); + assert_eq!( + bf16::from_f64(250.50f64).to_bits(), + bf16::from_f64(250.0).to_bits() + ); + assert_eq!( + bf16::from_f64(250.51f64).to_bits(), + bf16::from_f64(251.0).to_bits() + ); + assert_eq!( + bf16::from_f64(251.49f64).to_bits(), + bf16::from_f64(251.0).to_bits() + ); + assert_eq!( + bf16::from_f64(251.50f64).to_bits(), + bf16::from_f64(252.0).to_bits() + ); + assert_eq!( + bf16::from_f64(251.51f64).to_bits(), + bf16::from_f64(252.0).to_bits() + ); + assert_eq!( + bf16::from_f64(252.49f64).to_bits(), + bf16::from_f64(252.0).to_bits() + ); + assert_eq!( + bf16::from_f64(252.50f64).to_bits(), + bf16::from_f64(252.0).to_bits() + ); + assert_eq!( + bf16::from_f64(252.51f64).to_bits(), + bf16::from_f64(253.0).to_bits() + ); + } + + impl quickcheck::Arbitrary for bf16 { + fn arbitrary<G: quickcheck::Gen>(g: &mut G) -> Self { + use rand::Rng; + bf16(g.gen()) + } + } + + #[quickcheck] + fn qc_roundtrip_bf16_f32_is_identity(f: bf16) -> bool { + let roundtrip = bf16::from_f32(f.to_f32()); + if f.is_nan() { + roundtrip.is_nan() && f.is_sign_negative() == roundtrip.is_sign_negative() + } else { + f.0 == roundtrip.0 + } + } + + #[quickcheck] + fn qc_roundtrip_bf16_f64_is_identity(f: bf16) -> bool { + let roundtrip = bf16::from_f64(f.to_f64()); + if f.is_nan() { + roundtrip.is_nan() && f.is_sign_negative() == roundtrip.is_sign_negative() + } else { + f.0 == roundtrip.0 + } + } +} diff --git a/src/bfloat/convert.rs b/src/bfloat/convert.rs new file mode 100644 index 0000000..4aa0aec --- /dev/null +++ b/src/bfloat/convert.rs @@ -0,0 +1,135 @@ +pub(crate) fn f32_to_bf16(value: f32) -> u16 { + // Convert to raw bytes + let x = value.to_bits(); + + // check for NaN + if x & 0x7FFF_FFFFu32 > 0x7F80_0000u32 { + // Keep high part of current mantissa but also set most significiant mantissa bit + return ((x >> 16) | 0x0040u32) as u16; + } + + // round and shift + let round_bit = 0x0000_8000u32; + if (x & round_bit) != 0 && (x & (3 * round_bit - 1)) != 0 { + (x >> 16) as u16 + 1 + } else { + (x >> 16) as u16 + } +} + +pub(crate) fn f64_to_bf16(value: f64) -> u16 { + // Convert to raw bytes, truncating the last 32-bits of mantissa; that precision will always + // be lost on half-precision. + let val = value.to_bits(); + let x = (val >> 32) as u32; + + // Extract IEEE754 components + let sign = x & 0x8000_0000u32; + let exp = x & 0x7FF0_0000u32; + let man = x & 0x000F_FFFFu32; + + // Check for all exponent bits being set, which is Infinity or NaN + if exp == 0x7FF0_0000u32 { + // Set mantissa MSB for NaN (and also keep shifted mantissa bits). + // We also have to check the last 32 bits. + let nan_bit = if man == 0 && (val as u32 == 0) { + 0 + } else { + 0x0040u32 + }; + return ((sign >> 16) | 0x7F80u32 | nan_bit | (man >> 13)) as u16; + } + + // The number is normalized, start assembling half precision version + let half_sign = sign >> 16; + // Unbias the exponent, then bias for bfloat16 precision + let unbiased_exp = ((exp >> 20) as i64) - 1023; + let half_exp = unbiased_exp + 127; + + // Check for exponent overflow, return +infinity + if half_exp >= 0xFF { + return (half_sign | 0x7F80u32) as u16; + } + + // Check for underflow + if half_exp <= 0 { + // Check mantissa for what we can do + if 7 - half_exp > 21 { + // No rounding possibility, so this is a full underflow, return signed zero + return half_sign as u16; + } + // Don't forget about hidden leading mantissa bit when assembling mantissa + let man = man | 0x0010_0000u32; + let mut half_man = man >> (14 - half_exp); + // Check for rounding + let round_bit = 1 << (13 - half_exp); + if (man & round_bit) != 0 && (man & (3 * round_bit - 1)) != 0 { + half_man += 1; + } + // No exponent for subnormals + return (half_sign | half_man) as u16; + } + + // Rebias the exponent + let half_exp = (half_exp as u32) << 7; + let half_man = man >> 13; + // Check for rounding + let round_bit = 0x0000_1000u32; + if (man & round_bit) != 0 && (man & (3 * round_bit - 1)) != 0 { + // Round it + ((half_sign | half_exp | half_man) + 1) as u16 + } else { + (half_sign | half_exp | half_man) as u16 + } +} + +pub(crate) fn bf16_to_f32(i: u16) -> f32 { + // If NaN, keep current mantissa but also set most significiant mantissa bit + if i & 0x7FFFu16 > 0x7F80u16 { + f32::from_bits((i as u32 | 0x0040u32) << 16) + } else { + f32::from_bits((i as u32) << 16) + } +} + +pub(crate) fn bf16_to_f64(i: u16) -> f64 { + // Check for signed zero + if i & 0x7FFFu16 == 0 { + return f64::from_bits((i as u64) << 48); + } + + let half_sign = (i & 0x8000u16) as u64; + let half_exp = (i & 0x7F80u16) as u64; + let half_man = (i & 0x007Fu16) as u64; + + // Check for an infinity or NaN when all exponent bits set + if half_exp == 0x7F80u64 { + // Check for signed infinity if mantissa is zero + if half_man == 0 { + return f64::from_bits((half_sign << 48) | 0x7FF0_0000_0000_0000u64); + } else { + // NaN, keep current mantissa but also set most significiant mantissa bit + return f64::from_bits((half_sign << 48) | 0x7FF8_0000_0000_0000u64 | (half_man << 45)); + } + } + + // Calculate double-precision components with adjusted exponent + let sign = half_sign << 48; + // Unbias exponent + let unbiased_exp = ((half_exp as i64) >> 7) - 127; + + // Check for subnormals, which will be normalized by adjusting exponent + if half_exp == 0 { + // Calculate how much to adjust the exponent by + let e = (half_man as u16).leading_zeros() - 9; + + // Rebias and adjust exponent + let exp = ((1023 - 127 - e) as u64) << 52; + let man = (half_man << (46 + e)) & 0xF_FFFF_FFFF_FFFFu64; + return f64::from_bits(sign | exp | man); + } + // Rebias exponent for a normalized normal + let exp = ((unbiased_exp + 1023) as u64) << 52; + let man = (half_man & 0x007Fu64) << 45; + f64::from_bits(sign | exp | man) +} diff --git a/src/binary16.rs b/src/binary16.rs new file mode 100644 index 0000000..d5164f0 --- /dev/null +++ b/src/binary16.rs @@ -0,0 +1,1398 @@ +#[cfg(feature = "serde")] +use serde::{Deserialize, Serialize}; + +use core::{ + cmp::Ordering, + fmt::{Debug, Display, Error, Formatter, LowerExp, UpperExp}, + num::{FpCategory, ParseFloatError}, + str::FromStr, +}; + +pub(crate) mod convert; + +/// A 16-bit floating point type implementing the IEEE 754-2008 standard [`binary16`] a.k.a `half` +/// format. +/// +/// This 16-bit floating point type is intended for efficient storage where the full range and +/// precision of a larger floating point value is not required. Because [`f16`] is primarily for +/// efficient storage, floating point operations such as addition, multiplication, etc. are not +/// implemented. Operations should be performed with `f32` or higher-precision types and converted +/// to/from [`f16`] as necessary. +/// +/// [`f16`]: struct.f16.html +/// [`binary16`]: https://en.wikipedia.org/wiki/Half-precision_floating-point_format +#[allow(non_camel_case_types)] +#[derive(Clone, Copy, Default)] +#[repr(transparent)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub struct f16(u16); + +#[deprecated( + since = "1.4.0", + note = "all constants moved to associated constants of [`f16`](../struct.f16.html)" +)] +pub mod consts { + //! Useful `f16` constants. + + use super::f16; + + /// Approximate number of [`f16`](../struct.f16.html) significant digits in base 10. + #[deprecated( + since = "1.4.0", + note = "moved to [`f16::DIGITS`](../struct.f16.html#associatedconstant.DIGITS)" + )] + pub const DIGITS: u32 = f16::DIGITS; + /// [`f16`](../struct.f16.html) + /// [machine epsilon](https://en.wikipedia.org/wiki/Machine_epsilon) value. + /// + /// This is the difference between 1.0 and the next largest representable number. + #[deprecated( + since = "1.4.0", + note = "moved to [`f16::EPSILON`](../struct.f16.html#associatedconstant.EPSILON)" + )] + pub const EPSILON: f16 = f16::EPSILON; + /// [`f16`](../struct.f16.html) positive Infinity (+∞). + #[deprecated( + since = "1.4.0", + note = "moved to [`f16::INFINITY`](../struct.f16.html#associatedconstant.INFINITY)" + )] + pub const INFINITY: f16 = f16::INFINITY; + /// Number of [`f16`](../struct.f16.html) significant digits in base 2. + #[deprecated( + since = "1.4.0", + note = "moved to [`f16::MANTISSA_DIGITS`](../struct.f16.html#associatedconstant.MANTISSA_DIGITS)" + )] + pub const MANTISSA_DIGITS: u32 = f16::MANTISSA_DIGITS; + /// Largest finite [`f16`](../struct.f16.html) value. + #[deprecated( + since = "1.4.0", + note = "moved to [`f16::MAX`](../struct.f16.html#associatedconstant.MAX)" + )] + pub const MAX: f16 = f16::MAX; + /// Maximum possible [`f16`](../struct.f16.html) power of 10 exponent. + #[deprecated( + since = "1.4.0", + note = "moved to [`f16::MAX_10_EXP`](../struct.f16.html#associatedconstant.MAX_10_EXP)" + )] + pub const MAX_10_EXP: i32 = f16::MAX_10_EXP; + /// Maximum possible [`f16`](../struct.f16.html) power of 2 exponent. + #[deprecated( + since = "1.4.0", + note = "moved to [`f16::MAX_EXP`](../struct.f16.html#associatedconstant.MAX_EXP)" + )] + pub const MAX_EXP: i32 = f16::MAX_EXP; + /// Smallest finite [`f16`](../struct.f16.html) value. + #[deprecated( + since = "1.4.0", + note = "moved to [`f16::MIN`](../struct.f16.html#associatedconstant.MIN)" + )] + pub const MIN: f16 = f16::MIN; + /// Minimum possible normal [`f16`](../struct.f16.html) power of 10 exponent. + #[deprecated( + since = "1.4.0", + note = "moved to [`f16::MIN_10_EXP`](../struct.f16.html#associatedconstant.MIN_10_EXP)" + )] + pub const MIN_10_EXP: i32 = f16::MIN_10_EXP; + /// One greater than the minimum possible normal [`f16`](../struct.f16.html) power of 2 exponent. + #[deprecated( + since = "1.4.0", + note = "moved to [`f16::MIN_EXP`](../struct.f16.html#associatedconstant.MIN_EXP)" + )] + pub const MIN_EXP: i32 = f16::MIN_EXP; + /// Smallest positive normal [`f16`](../struct.f16.html) value. + #[deprecated( + since = "1.4.0", + note = "moved to [`f16::MIN_POSITIVE`](../struct.f16.html#associatedconstant.MIN_POSITIVE)" + )] + pub const MIN_POSITIVE: f16 = f16::MIN_POSITIVE; + /// [`f16`](../struct.f16.html) Not a Number (NaN). + #[deprecated( + since = "1.4.0", + note = "moved to [`f16::NAN`](../struct.f16.html#associatedconstant.NAN)" + )] + pub const NAN: f16 = f16::NAN; + /// [`f16`](../struct.f16.html) negative infinity (-∞). + #[deprecated( + since = "1.4.0", + note = "moved to [`f16::NEG_INFINITY`](../struct.f16.html#associatedconstant.NEG_INFINITY)" + )] + pub const NEG_INFINITY: f16 = f16::NEG_INFINITY; + /// The radix or base of the internal representation of [`f16`](../struct.f16.html). + #[deprecated( + since = "1.4.0", + note = "moved to [`f16::RADIX`](../struct.f16.html#associatedconstant.RADIX)" + )] + pub const RADIX: u32 = f16::RADIX; + + /// Minimum positive subnormal [`f16`](../struct.f16.html) value. + #[deprecated( + since = "1.4.0", + note = "moved to [`f16::MIN_POSITIVE_SUBNORMAL`](../struct.f16.html#associatedconstant.MIN_POSITIVE_SUBNORMAL)" + )] + pub const MIN_POSITIVE_SUBNORMAL: f16 = f16::MIN_POSITIVE_SUBNORMAL; + /// Maximum subnormal [`f16`](../struct.f16.html) value. + #[deprecated( + since = "1.4.0", + note = "moved to [`f16::MAX_SUBNORMAL`](../struct.f16.html#associatedconstant.MAX_SUBNORMAL)" + )] + pub const MAX_SUBNORMAL: f16 = f16::MAX_SUBNORMAL; + + /// [`f16`](../struct.f16.html) 1 + #[deprecated( + since = "1.4.0", + note = "moved to [`f16::ONE`](../struct.f16.html#associatedconstant.ONE)" + )] + pub const ONE: f16 = f16::ONE; + /// [`f16`](../struct.f16.html) 0 + #[deprecated( + since = "1.4.0", + note = "moved to [`f16::ZERO`](../struct.f16.html#associatedconstant.ZERO)" + )] + pub const ZERO: f16 = f16::ZERO; + /// [`f16`](../struct.f16.html) -0 + #[deprecated( + since = "1.4.0", + note = "moved to [`f16::NEG_ZERO`](../struct.f16.html#associatedconstant.NEG_ZERO)" + )] + pub const NEG_ZERO: f16 = f16::NEG_ZERO; + + /// [`f16`](../struct.f16.html) Euler's number (ℯ). + #[deprecated( + since = "1.4.0", + note = "moved to [`f16::E`](../struct.f16.html#associatedconstant.E)" + )] + pub const E: f16 = f16::E; + /// [`f16`](../struct.f16.html) Archimedes' constant (π). + #[deprecated( + since = "1.4.0", + note = "moved to [`f16::PI`](../struct.f16.html#associatedconstant.PI)" + )] + pub const PI: f16 = f16::PI; + /// [`f16`](../struct.f16.html) 1/π + #[deprecated( + since = "1.4.0", + note = "moved to [`f16::FRAC_1_PI`](../struct.f16.html#associatedconstant.FRAC_1_PI)" + )] + pub const FRAC_1_PI: f16 = f16::FRAC_1_PI; + /// [`f16`](../struct.f16.html) 1/√2 + #[deprecated( + since = "1.4.0", + note = "moved to [`f16::FRAC_1_SQRT_2`](../struct.f16.html#associatedconstant.FRAC_1_SQRT_2)" + )] + pub const FRAC_1_SQRT_2: f16 = f16::FRAC_1_SQRT_2; + /// [`f16`](../struct.f16.html) 2/π + #[deprecated( + since = "1.4.0", + note = "moved to [`f16::FRAC_2_PI`](../struct.f16.html#associatedconstant.FRAC_2_PI)" + )] + pub const FRAC_2_PI: f16 = f16::FRAC_2_PI; + /// [`f16`](../struct.f16.html) 2/√π + #[deprecated( + since = "1.4.0", + note = "moved to [`f16::FRAC_2_SQRT_PI`](../struct.f16.html#associatedconstant.FRAC_2_SQRT_PI)" + )] + pub const FRAC_2_SQRT_PI: f16 = f16::FRAC_2_SQRT_PI; + /// [`f16`](../struct.f16.html) π/2 + #[deprecated( + since = "1.4.0", + note = "moved to [`f16::FRAC_PI_2`](../struct.f16.html#associatedconstant.FRAC_PI_2)" + )] + pub const FRAC_PI_2: f16 = f16::FRAC_PI_2; + /// [`f16`](../struct.f16.html) π/3 + #[deprecated( + since = "1.4.0", + note = "moved to [`f16::FRAC_PI_3`](../struct.f16.html#associatedconstant.FRAC_PI_3)" + )] + pub const FRAC_PI_3: f16 = f16::FRAC_PI_3; + /// [`f16`](../struct.f16.html) π/4 + #[deprecated( + since = "1.4.0", + note = "moved to [`f16::FRAC_PI_4`](../struct.f16.html#associatedconstant.FRAC_PI_4)" + )] + pub const FRAC_PI_4: f16 = f16::FRAC_PI_4; + /// [`f16`](../struct.f16.html) π/6 + #[deprecated( + since = "1.4.0", + note = "moved to [`f16::FRAC_PI_6`](../struct.f16.html#associatedconstant.FRAC_PI_6)" + )] + pub const FRAC_PI_6: f16 = f16::FRAC_PI_6; + /// [`f16`](../struct.f16.html) π/8 + #[deprecated( + since = "1.4.0", + note = "moved to [`f16::FRAC_PI_8`](../struct.f16.html#associatedconstant.FRAC_PI_8)" + )] + pub const FRAC_PI_8: f16 = f16::FRAC_PI_8; + /// [`f16`](../struct.f16.html) 𝗅𝗇 10 + #[deprecated( + since = "1.4.0", + note = "moved to [`f16::LN_10`](../struct.f16.html#associatedconstant.LN_10)" + )] + pub const LN_10: f16 = f16::LN_10; + /// [`f16`](../struct.f16.html) 𝗅𝗇 2 + #[deprecated( + since = "1.4.0", + note = "moved to [`f16::LN_2`](../struct.f16.html#associatedconstant.LN_2)" + )] + pub const LN_2: f16 = f16::LN_2; + /// [`f16`](../struct.f16.html) 𝗅𝗈𝗀₁₀ℯ + #[deprecated( + since = "1.4.0", + note = "moved to [`f16::LOG10_E`](../struct.f16.html#associatedconstant.LOG10_E)" + )] + pub const LOG10_E: f16 = f16::LOG10_E; + /// [`f16`](../struct.f16.html) 𝗅𝗈𝗀₂ℯ + #[deprecated( + since = "1.4.0", + note = "moved to [`f16::LOG2_E`](../struct.f16.html#associatedconstant.LOG2_E)" + )] + pub const LOG2_E: f16 = f16::LOG2_E; + /// [`f16`](../struct.f16.html) √2 + #[deprecated( + since = "1.4.0", + note = "moved to [`f16::SQRT_2`](../struct.f16.html#associatedconstant.SQRT_2)" + )] + pub const SQRT_2: f16 = f16::SQRT_2; +} + +impl f16 { + /// Constructs a 16-bit floating point value from the raw bits. + #[inline] + pub const fn from_bits(bits: u16) -> f16 { + f16(bits) + } + + /// Constructs a 16-bit floating point value from a 32-bit floating point value. + /// + /// If the 32-bit value is to large to fit in 16-bits, ±∞ will result. NaN values are + /// preserved. 32-bit subnormal values are too tiny to be represented in 16-bits and result in + /// ±0. Exponents that underflow the minimum 16-bit exponent will result in 16-bit subnormals + /// or ±0. All other values are truncated and rounded to the nearest representable 16-bit + /// value. + #[inline] + pub fn from_f32(value: f32) -> f16 { + f16(convert::f32_to_f16(value)) + } + + /// Constructs a 16-bit floating point value from a 64-bit floating point value. + /// + /// If the 64-bit value is to large to fit in 16-bits, ±∞ will result. NaN values are + /// preserved. 64-bit subnormal values are too tiny to be represented in 16-bits and result in + /// ±0. Exponents that underflow the minimum 16-bit exponent will result in 16-bit subnormals + /// or ±0. All other values are truncated and rounded to the nearest representable 16-bit + /// value. + #[inline] + pub fn from_f64(value: f64) -> f16 { + f16(convert::f64_to_f16(value)) + } + + /// Converts a [`f16`](struct.f16.html) into the underlying bit representation. + #[inline] + pub const fn to_bits(self) -> u16 { + self.0 + } + + /// Return the memory representation of the underlying bit representation as a byte array in + /// little-endian byte order. + /// + /// # Examples + /// + /// ```rust + /// # use half::prelude::*; + /// let bytes = f16::from_f32(12.5).to_le_bytes(); + /// assert_eq!(bytes, [0x40, 0x4A]); + /// ``` + #[inline] + pub fn to_le_bytes(self) -> [u8; 2] { + self.0.to_le_bytes() + } + + /// Return the memory representation of the underlying bit representation as a byte array in + /// big-endian (network) byte order. + /// + /// # Examples + /// + /// ```rust + /// # use half::prelude::*; + /// let bytes = f16::from_f32(12.5).to_be_bytes(); + /// assert_eq!(bytes, [0x4A, 0x40]); + /// ``` + #[inline] + pub fn to_be_bytes(self) -> [u8; 2] { + self.0.to_be_bytes() + } + + /// Return the memory representation of the underlying bit representation as a byte array in + /// native byte order. + /// + /// As the target platform's native endianness is used, portable code should use `to_be_bytes` + /// or `to_le_bytes`, as appropriate, instead. + /// + /// # Examples + /// + /// ```rust + /// # use half::prelude::*; + /// let bytes = f16::from_f32(12.5).to_ne_bytes(); + /// assert_eq!(bytes, if cfg!(target_endian = "big") { + /// [0x4A, 0x40] + /// } else { + /// [0x40, 0x4A] + /// }); + /// ``` + #[inline] + pub fn to_ne_bytes(self) -> [u8; 2] { + self.0.to_ne_bytes() + } + + /// Create a floating point value from its representation as a byte array in little endian. + /// + /// # Examples + /// + /// ```rust + /// # use half::prelude::*; + /// let value = f16::from_le_bytes([0x40, 0x4A]); + /// assert_eq!(value, f16::from_f32(12.5)); + /// ``` + #[inline] + pub fn from_le_bytes(bytes: [u8; 2]) -> f16 { + f16::from_bits(u16::from_le_bytes(bytes)) + } + + /// Create a floating point value from its representation as a byte array in big endian. + /// + /// # Examples + /// + /// ```rust + /// # use half::prelude::*; + /// let value = f16::from_be_bytes([0x4A, 0x40]); + /// assert_eq!(value, f16::from_f32(12.5)); + /// ``` + #[inline] + pub fn from_be_bytes(bytes: [u8; 2]) -> f16 { + f16::from_bits(u16::from_be_bytes(bytes)) + } + + /// Create a floating point value from its representation as a byte array in native endian. + /// + /// As the target platform's native endianness is used, portable code likely wants to use + /// `from_be_bytes` or `from_le_bytes`, as appropriate instead. + /// + /// # Examples + /// + /// ```rust + /// # use half::prelude::*; + /// let value = f16::from_ne_bytes(if cfg!(target_endian = "big") { + /// [0x4A, 0x40] + /// } else { + /// [0x40, 0x4A] + /// }); + /// assert_eq!(value, f16::from_f32(12.5)); + /// ``` + #[inline] + pub fn from_ne_bytes(bytes: [u8; 2]) -> f16 { + f16::from_bits(u16::from_ne_bytes(bytes)) + } + + /// Converts a [`f16`](struct.f16.html) into the underlying bit representation. + #[deprecated(since = "1.2.0", note = "renamed to [`to_bits`](#method.to_bits)")] + #[inline] + pub fn as_bits(self) -> u16 { + self.to_bits() + } + + /// Converts a [`f16`](struct.f16.html) value into a `f32` value. + /// + /// This conversion is lossless as all 16-bit floating point values can be represented exactly + /// in 32-bit floating point. + #[inline] + pub fn to_f32(self) -> f32 { + convert::f16_to_f32(self.0) + } + + /// Converts a [`f16`](struct.f16.html) value into a `f64` value. + /// + /// This conversion is lossless as all 16-bit floating point values can be represented exactly + /// in 64-bit floating point. + #[inline] + pub fn to_f64(self) -> f64 { + convert::f16_to_f64(self.0) + } + + /// Returns `true` if this value is `NaN` and `false` otherwise. + /// + /// # Examples + /// + /// ```rust + /// # use half::prelude::*; + /// + /// let nan = f16::NAN; + /// let f = f16::from_f32(7.0_f32); + /// + /// assert!(nan.is_nan()); + /// assert!(!f.is_nan()); + /// ``` + #[inline] + pub const fn is_nan(self) -> bool { + self.0 & 0x7FFFu16 > 0x7C00u16 + } + + /// Returns `true` if this value is ±∞ and `false` + /// otherwise. + /// + /// # Examples + /// + /// ```rust + /// # use half::prelude::*; + /// + /// let f = f16::from_f32(7.0f32); + /// let inf = f16::INFINITY; + /// let neg_inf = f16::NEG_INFINITY; + /// let nan = f16::NAN; + /// + /// assert!(!f.is_infinite()); + /// assert!(!nan.is_infinite()); + /// + /// assert!(inf.is_infinite()); + /// assert!(neg_inf.is_infinite()); + /// ``` + #[inline] + pub const fn is_infinite(self) -> bool { + self.0 & 0x7FFFu16 == 0x7C00u16 + } + + /// Returns `true` if this number is neither infinite nor `NaN`. + /// + /// # Examples + /// + /// ```rust + /// # use half::prelude::*; + /// + /// let f = f16::from_f32(7.0f32); + /// let inf = f16::INFINITY; + /// let neg_inf = f16::NEG_INFINITY; + /// let nan = f16::NAN; + /// + /// assert!(f.is_finite()); + /// + /// assert!(!nan.is_finite()); + /// assert!(!inf.is_finite()); + /// assert!(!neg_inf.is_finite()); + /// ``` + #[inline] + pub const fn is_finite(self) -> bool { + self.0 & 0x7C00u16 != 0x7C00u16 + } + + /// Returns `true` if the number is neither zero, infinite, subnormal, or `NaN`. + /// + /// # Examples + /// + /// ```rust + /// # use half::prelude::*; + /// + /// let min = f16::MIN_POSITIVE; + /// let max = f16::MAX; + /// let lower_than_min = f16::from_f32(1.0e-10_f32); + /// let zero = f16::from_f32(0.0_f32); + /// + /// assert!(min.is_normal()); + /// assert!(max.is_normal()); + /// + /// assert!(!zero.is_normal()); + /// assert!(!f16::NAN.is_normal()); + /// assert!(!f16::INFINITY.is_normal()); + /// // Values between `0` and `min` are Subnormal. + /// assert!(!lower_than_min.is_normal()); + /// ``` + #[inline] + pub fn is_normal(self) -> bool { + let exp = self.0 & 0x7C00u16; + exp != 0x7C00u16 && exp != 0 + } + + /// Returns the floating point category of the number. + /// + /// If only one property is going to be tested, it is generally faster to use the specific + /// predicate instead. + /// + /// # Examples + /// + /// ```rust + /// use std::num::FpCategory; + /// # use half::prelude::*; + /// + /// let num = f16::from_f32(12.4_f32); + /// let inf = f16::INFINITY; + /// + /// assert_eq!(num.classify(), FpCategory::Normal); + /// assert_eq!(inf.classify(), FpCategory::Infinite); + /// ``` + pub fn classify(self) -> FpCategory { + let exp = self.0 & 0x7C00u16; + let man = self.0 & 0x03FFu16; + match (exp, man) { + (0, 0) => FpCategory::Zero, + (0, _) => FpCategory::Subnormal, + (0x7C00u16, 0) => FpCategory::Infinite, + (0x7C00u16, _) => FpCategory::Nan, + _ => FpCategory::Normal, + } + } + + /// Returns a number that represents the sign of `self`. + /// + /// * `1.0` if the number is positive, `+0.0` or `INFINITY` + /// * `-1.0` if the number is negative, `-0.0` or `NEG_INFINITY` + /// * `NAN` if the number is `NAN` + /// + /// # Examples + /// + /// ```rust + /// # use half::prelude::*; + /// + /// let f = f16::from_f32(3.5_f32); + /// + /// assert_eq!(f.signum(), f16::from_f32(1.0)); + /// assert_eq!(f16::NEG_INFINITY.signum(), f16::from_f32(-1.0)); + /// + /// assert!(f16::NAN.signum().is_nan()); + /// ``` + pub fn signum(self) -> f16 { + if self.is_nan() { + self + } else if self.0 & 0x8000u16 != 0 { + f16::from_f32(-1.0) + } else { + f16::from_f32(1.0) + } + } + + /// Returns `true` if and only if `self` has a positive sign, including `+0.0`, `NaNs` with a + /// positive sign bit and +∞. + /// + /// # Examples + /// + /// ```rust + /// # use half::prelude::*; + /// + /// let nan = f16::NAN; + /// let f = f16::from_f32(7.0_f32); + /// let g = f16::from_f32(-7.0_f32); + /// + /// assert!(f.is_sign_positive()); + /// assert!(!g.is_sign_positive()); + /// // `NaN` can be either positive or negative + /// assert!(nan.is_sign_positive() != nan.is_sign_negative()); + /// ``` + #[inline] + pub const fn is_sign_positive(self) -> bool { + self.0 & 0x8000u16 == 0 + } + + /// Returns `true` if and only if `self` has a negative sign, including `-0.0`, `NaNs` with a + /// negative sign bit and −∞. + /// + /// # Examples + /// + /// ```rust + /// # use half::prelude::*; + /// + /// let nan = f16::NAN; + /// let f = f16::from_f32(7.0f32); + /// let g = f16::from_f32(-7.0f32); + /// + /// assert!(!f.is_sign_negative()); + /// assert!(g.is_sign_negative()); + /// // `NaN` can be either positive or negative + /// assert!(nan.is_sign_positive() != nan.is_sign_negative()); + /// ``` + #[inline] + pub const fn is_sign_negative(self) -> bool { + self.0 & 0x8000u16 != 0 + } + + /// Approximate number of [`f16`](struct.f16.html) significant digits in base 10. + pub const DIGITS: u32 = 3; + /// [`f16`](struct.f16.html) + /// [machine epsilon](https://en.wikipedia.org/wiki/Machine_epsilon) value. + /// + /// This is the difference between 1.0 and the next largest representable number. + pub const EPSILON: f16 = f16(0x1400u16); + /// [`f16`](struct.f16.html) positive Infinity (+∞). + pub const INFINITY: f16 = f16(0x7C00u16); + /// Number of [`f16`](struct.f16.html) significant digits in base 2. + pub const MANTISSA_DIGITS: u32 = 11; + /// Largest finite [`f16`](struct.f16.html) value. + pub const MAX: f16 = f16(0x7BFF); + /// Maximum possible [`f16`](struct.f16.html) power of 10 exponent. + pub const MAX_10_EXP: i32 = 4; + /// Maximum possible [`f16`](struct.f16.html) power of 2 exponent. + pub const MAX_EXP: i32 = 16; + /// Smallest finite [`f16`](struct.f16.html) value. + pub const MIN: f16 = f16(0xFBFF); + /// Minimum possible normal [`f16`](struct.f16.html) power of 10 exponent. + pub const MIN_10_EXP: i32 = -4; + /// One greater than the minimum possible normal [`f16`](struct.f16.html) power of 2 exponent. + pub const MIN_EXP: i32 = -13; + /// Smallest positive normal [`f16`](struct.f16.html) value. + pub const MIN_POSITIVE: f16 = f16(0x0400u16); + /// [`f16`](struct.f16.html) Not a Number (NaN). + pub const NAN: f16 = f16(0x7E00u16); + /// [`f16`](struct.f16.html) negative infinity (-∞). + pub const NEG_INFINITY: f16 = f16(0xFC00u16); + /// The radix or base of the internal representation of [`f16`](struct.f16.html). + pub const RADIX: u32 = 2; + + /// Minimum positive subnormal [`f16`](struct.f16.html) value. + pub const MIN_POSITIVE_SUBNORMAL: f16 = f16(0x0001u16); + /// Maximum subnormal [`f16`](struct.f16.html) value. + pub const MAX_SUBNORMAL: f16 = f16(0x03FFu16); + + /// [`f16`](struct.f16.html) 1 + pub const ONE: f16 = f16(0x3C00u16); + /// [`f16`](struct.f16.html) 0 + pub const ZERO: f16 = f16(0x0000u16); + /// [`f16`](struct.f16.html) -0 + pub const NEG_ZERO: f16 = f16(0x8000u16); + + /// [`f16`](struct.f16.html) Euler's number (ℯ). + pub const E: f16 = f16(0x4170u16); + /// [`f16`](struct.f16.html) Archimedes' constant (π). + pub const PI: f16 = f16(0x4248u16); + /// [`f16`](struct.f16.html) 1/π + pub const FRAC_1_PI: f16 = f16(0x3518u16); + /// [`f16`](struct.f16.html) 1/√2 + pub const FRAC_1_SQRT_2: f16 = f16(0x39A8u16); + /// [`f16`](struct.f16.html) 2/π + pub const FRAC_2_PI: f16 = f16(0x3918u16); + /// [`f16`](struct.f16.html) 2/√π + pub const FRAC_2_SQRT_PI: f16 = f16(0x3C83u16); + /// [`f16`](struct.f16.html) π/2 + pub const FRAC_PI_2: f16 = f16(0x3E48u16); + /// [`f16`](struct.f16.html) π/3 + pub const FRAC_PI_3: f16 = f16(0x3C30u16); + /// [`f16`](struct.f16.html) π/4 + pub const FRAC_PI_4: f16 = f16(0x3A48u16); + /// [`f16`](struct.f16.html) π/6 + pub const FRAC_PI_6: f16 = f16(0x3830u16); + /// [`f16`](struct.f16.html) π/8 + pub const FRAC_PI_8: f16 = f16(0x3648u16); + /// [`f16`](struct.f16.html) 𝗅𝗇 10 + pub const LN_10: f16 = f16(0x409Bu16); + /// [`f16`](struct.f16.html) 𝗅𝗇 2 + pub const LN_2: f16 = f16(0x398Cu16); + /// [`f16`](struct.f16.html) 𝗅𝗈𝗀₁₀ℯ + pub const LOG10_E: f16 = f16(0x36F3u16); + /// [`f16`](struct.f16.html) 𝗅𝗈𝗀₁₀2 + pub const LOG10_2: f16 = f16(0x34D1u16); + /// [`f16`](struct.f16.html) 𝗅𝗈𝗀₂ℯ + pub const LOG2_E: f16 = f16(0x3DC5u16); + /// [`f16`](struct.f16.html) 𝗅𝗈𝗀₂10 + pub const LOG2_10: f16 = f16(0x42A5u16); + /// [`f16`](struct.f16.html) √2 + pub const SQRT_2: f16 = f16(0x3DA8u16); +} + +impl From<f16> for f32 { + #[inline] + fn from(x: f16) -> f32 { + x.to_f32() + } +} + +impl From<f16> for f64 { + #[inline] + fn from(x: f16) -> f64 { + x.to_f64() + } +} + +impl From<i8> for f16 { + #[inline] + fn from(x: i8) -> f16 { + // Convert to f32, then to f16 + f16::from_f32(f32::from(x)) + } +} + +impl From<u8> for f16 { + #[inline] + fn from(x: u8) -> f16 { + // Convert to f32, then to f16 + f16::from_f32(f32::from(x)) + } +} + +impl PartialEq for f16 { + fn eq(&self, other: &f16) -> bool { + if self.is_nan() || other.is_nan() { + false + } else { + (self.0 == other.0) || ((self.0 | other.0) & 0x7FFFu16 == 0) + } + } +} + +impl PartialOrd for f16 { + fn partial_cmp(&self, other: &f16) -> Option<Ordering> { + if self.is_nan() || other.is_nan() { + None + } else { + let neg = self.0 & 0x8000u16 != 0; + let other_neg = other.0 & 0x8000u16 != 0; + match (neg, other_neg) { + (false, false) => Some(self.0.cmp(&other.0)), + (false, true) => { + if (self.0 | other.0) & 0x7FFFu16 == 0 { + Some(Ordering::Equal) + } else { + Some(Ordering::Greater) + } + } + (true, false) => { + if (self.0 | other.0) & 0x7FFFu16 == 0 { + Some(Ordering::Equal) + } else { + Some(Ordering::Less) + } + } + (true, true) => Some(other.0.cmp(&self.0)), + } + } + } + + fn lt(&self, other: &f16) -> bool { + if self.is_nan() || other.is_nan() { + false + } else { + let neg = self.0 & 0x8000u16 != 0; + let other_neg = other.0 & 0x8000u16 != 0; + match (neg, other_neg) { + (false, false) => self.0 < other.0, + (false, true) => false, + (true, false) => (self.0 | other.0) & 0x7FFFu16 != 0, + (true, true) => self.0 > other.0, + } + } + } + + fn le(&self, other: &f16) -> bool { + if self.is_nan() || other.is_nan() { + false + } else { + let neg = self.0 & 0x8000u16 != 0; + let other_neg = other.0 & 0x8000u16 != 0; + match (neg, other_neg) { + (false, false) => self.0 <= other.0, + (false, true) => (self.0 | other.0) & 0x7FFFu16 == 0, + (true, false) => true, + (true, true) => self.0 >= other.0, + } + } + } + + fn gt(&self, other: &f16) -> bool { + if self.is_nan() || other.is_nan() { + false + } else { + let neg = self.0 & 0x8000u16 != 0; + let other_neg = other.0 & 0x8000u16 != 0; + match (neg, other_neg) { + (false, false) => self.0 > other.0, + (false, true) => (self.0 | other.0) & 0x7FFFu16 != 0, + (true, false) => false, + (true, true) => self.0 < other.0, + } + } + } + + fn ge(&self, other: &f16) -> bool { + if self.is_nan() || other.is_nan() { + false + } else { + let neg = self.0 & 0x8000u16 != 0; + let other_neg = other.0 & 0x8000u16 != 0; + match (neg, other_neg) { + (false, false) => self.0 >= other.0, + (false, true) => true, + (true, false) => (self.0 | other.0) & 0x7FFFu16 == 0, + (true, true) => self.0 <= other.0, + } + } + } +} + +impl FromStr for f16 { + type Err = ParseFloatError; + fn from_str(src: &str) -> Result<f16, ParseFloatError> { + f32::from_str(src).map(f16::from_f32) + } +} + +impl Debug for f16 { + fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { + write!(f, "0x{:X}", self.0) + } +} + +impl Display for f16 { + fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { + write!(f, "{}", self.to_f32()) + } +} + +impl LowerExp for f16 { + fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { + write!(f, "{:e}", self.to_f32()) + } +} + +impl UpperExp for f16 { + fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { + write!(f, "{:E}", self.to_f32()) + } +} + +#[allow( + clippy::cognitive_complexity, + clippy::float_cmp, + clippy::neg_cmp_op_on_partial_ord +)] +#[cfg(test)] +mod test { + use super::*; + use core; + use core::cmp::Ordering; + use quickcheck_macros::quickcheck; + + #[test] + fn test_f16_consts() { + // DIGITS + let digits = ((f16::MANTISSA_DIGITS as f32 - 1.0) * 2f32.log10()).floor() as u32; + assert_eq!(f16::DIGITS, digits); + // sanity check to show test is good + let digits32 = ((core::f32::MANTISSA_DIGITS as f32 - 1.0) * 2f32.log10()).floor() as u32; + assert_eq!(core::f32::DIGITS, digits32); + + // EPSILON + let one = f16::from_f32(1.0); + let one_plus_epsilon = f16::from_bits(one.to_bits() + 1); + let epsilon = f16::from_f32(one_plus_epsilon.to_f32() - 1.0); + assert_eq!(f16::EPSILON, epsilon); + // sanity check to show test is good + let one_plus_epsilon32 = f32::from_bits(1.0f32.to_bits() + 1); + let epsilon32 = one_plus_epsilon32 - 1f32; + assert_eq!(core::f32::EPSILON, epsilon32); + + // MAX, MIN and MIN_POSITIVE + let max = f16::from_bits(f16::INFINITY.to_bits() - 1); + let min = f16::from_bits(f16::NEG_INFINITY.to_bits() - 1); + let min_pos = f16::from_f32(2f32.powi(f16::MIN_EXP - 1)); + assert_eq!(f16::MAX, max); + assert_eq!(f16::MIN, min); + assert_eq!(f16::MIN_POSITIVE, min_pos); + // sanity check to show test is good + let max32 = f32::from_bits(core::f32::INFINITY.to_bits() - 1); + let min32 = f32::from_bits(core::f32::NEG_INFINITY.to_bits() - 1); + let min_pos32 = 2f32.powi(core::f32::MIN_EXP - 1); + assert_eq!(core::f32::MAX, max32); + assert_eq!(core::f32::MIN, min32); + assert_eq!(core::f32::MIN_POSITIVE, min_pos32); + + // MIN_10_EXP and MAX_10_EXP + let ten_to_min = 10f32.powi(f16::MIN_10_EXP); + assert!(ten_to_min / 10.0 < f16::MIN_POSITIVE.to_f32()); + assert!(ten_to_min > f16::MIN_POSITIVE.to_f32()); + let ten_to_max = 10f32.powi(f16::MAX_10_EXP); + assert!(ten_to_max < f16::MAX.to_f32()); + assert!(ten_to_max * 10.0 > f16::MAX.to_f32()); + // sanity check to show test is good + let ten_to_min32 = 10f64.powi(core::f32::MIN_10_EXP); + assert!(ten_to_min32 / 10.0 < f64::from(core::f32::MIN_POSITIVE)); + assert!(ten_to_min32 > f64::from(core::f32::MIN_POSITIVE)); + let ten_to_max32 = 10f64.powi(core::f32::MAX_10_EXP); + assert!(ten_to_max32 < f64::from(core::f32::MAX)); + assert!(ten_to_max32 * 10.0 > f64::from(core::f32::MAX)); + } + + #[test] + fn test_f16_consts_from_f32() { + let one = f16::from_f32(1.0); + let zero = f16::from_f32(0.0); + let neg_zero = f16::from_f32(-0.0); + let inf = f16::from_f32(core::f32::INFINITY); + let neg_inf = f16::from_f32(core::f32::NEG_INFINITY); + let nan = f16::from_f32(core::f32::NAN); + + assert_eq!(f16::ONE, one); + assert_eq!(f16::ZERO, zero); + assert!(zero.is_sign_positive()); + assert_eq!(f16::NEG_ZERO, neg_zero); + assert!(neg_zero.is_sign_negative()); + assert_eq!(f16::INFINITY, inf); + assert_eq!(f16::NEG_INFINITY, neg_inf); + assert!(nan.is_nan()); + assert!(f16::NAN.is_nan()); + + let e = f16::from_f32(core::f32::consts::E); + let pi = f16::from_f32(core::f32::consts::PI); + let frac_1_pi = f16::from_f32(core::f32::consts::FRAC_1_PI); + let frac_1_sqrt_2 = f16::from_f32(core::f32::consts::FRAC_1_SQRT_2); + let frac_2_pi = f16::from_f32(core::f32::consts::FRAC_2_PI); + let frac_2_sqrt_pi = f16::from_f32(core::f32::consts::FRAC_2_SQRT_PI); + let frac_pi_2 = f16::from_f32(core::f32::consts::FRAC_PI_2); + let frac_pi_3 = f16::from_f32(core::f32::consts::FRAC_PI_3); + let frac_pi_4 = f16::from_f32(core::f32::consts::FRAC_PI_4); + let frac_pi_6 = f16::from_f32(core::f32::consts::FRAC_PI_6); + let frac_pi_8 = f16::from_f32(core::f32::consts::FRAC_PI_8); + let ln_10 = f16::from_f32(core::f32::consts::LN_10); + let ln_2 = f16::from_f32(core::f32::consts::LN_2); + let log10_e = f16::from_f32(core::f32::consts::LOG10_E); + // core::f32::consts::LOG10_2 requires rustc 1.43.0 + let log10_2 = f16::from_f32(2f32.log10()); + let log2_e = f16::from_f32(core::f32::consts::LOG2_E); + // core::f32::consts::LOG2_10 requires rustc 1.43.0 + let log2_10 = f16::from_f32(10f32.log2()); + let sqrt_2 = f16::from_f32(core::f32::consts::SQRT_2); + + assert_eq!(f16::E, e); + assert_eq!(f16::PI, pi); + assert_eq!(f16::FRAC_1_PI, frac_1_pi); + assert_eq!(f16::FRAC_1_SQRT_2, frac_1_sqrt_2); + assert_eq!(f16::FRAC_2_PI, frac_2_pi); + assert_eq!(f16::FRAC_2_SQRT_PI, frac_2_sqrt_pi); + assert_eq!(f16::FRAC_PI_2, frac_pi_2); + assert_eq!(f16::FRAC_PI_3, frac_pi_3); + assert_eq!(f16::FRAC_PI_4, frac_pi_4); + assert_eq!(f16::FRAC_PI_6, frac_pi_6); + assert_eq!(f16::FRAC_PI_8, frac_pi_8); + assert_eq!(f16::LN_10, ln_10); + assert_eq!(f16::LN_2, ln_2); + assert_eq!(f16::LOG10_E, log10_e); + assert_eq!(f16::LOG10_2, log10_2); + assert_eq!(f16::LOG2_E, log2_e); + assert_eq!(f16::LOG2_10, log2_10); + assert_eq!(f16::SQRT_2, sqrt_2); + } + + #[test] + fn test_f16_consts_from_f64() { + let one = f16::from_f64(1.0); + let zero = f16::from_f64(0.0); + let neg_zero = f16::from_f64(-0.0); + let inf = f16::from_f64(core::f64::INFINITY); + let neg_inf = f16::from_f64(core::f64::NEG_INFINITY); + let nan = f16::from_f64(core::f64::NAN); + + assert_eq!(f16::ONE, one); + assert_eq!(f16::ZERO, zero); + assert!(zero.is_sign_positive()); + assert_eq!(f16::NEG_ZERO, neg_zero); + assert!(neg_zero.is_sign_negative()); + assert_eq!(f16::INFINITY, inf); + assert_eq!(f16::NEG_INFINITY, neg_inf); + assert!(nan.is_nan()); + assert!(f16::NAN.is_nan()); + + let e = f16::from_f64(core::f64::consts::E); + let pi = f16::from_f64(core::f64::consts::PI); + let frac_1_pi = f16::from_f64(core::f64::consts::FRAC_1_PI); + let frac_1_sqrt_2 = f16::from_f64(core::f64::consts::FRAC_1_SQRT_2); + let frac_2_pi = f16::from_f64(core::f64::consts::FRAC_2_PI); + let frac_2_sqrt_pi = f16::from_f64(core::f64::consts::FRAC_2_SQRT_PI); + let frac_pi_2 = f16::from_f64(core::f64::consts::FRAC_PI_2); + let frac_pi_3 = f16::from_f64(core::f64::consts::FRAC_PI_3); + let frac_pi_4 = f16::from_f64(core::f64::consts::FRAC_PI_4); + let frac_pi_6 = f16::from_f64(core::f64::consts::FRAC_PI_6); + let frac_pi_8 = f16::from_f64(core::f64::consts::FRAC_PI_8); + let ln_10 = f16::from_f64(core::f64::consts::LN_10); + let ln_2 = f16::from_f64(core::f64::consts::LN_2); + let log10_e = f16::from_f64(core::f64::consts::LOG10_E); + // core::f64::consts::LOG10_2 requires rustc 1.43.0 + let log10_2 = f16::from_f64(2f64.log10()); + let log2_e = f16::from_f64(core::f64::consts::LOG2_E); + // core::f64::consts::LOG2_10 requires rustc 1.43.0 + let log2_10 = f16::from_f64(10f64.log2()); + let sqrt_2 = f16::from_f64(core::f64::consts::SQRT_2); + + assert_eq!(f16::E, e); + assert_eq!(f16::PI, pi); + assert_eq!(f16::FRAC_1_PI, frac_1_pi); + assert_eq!(f16::FRAC_1_SQRT_2, frac_1_sqrt_2); + assert_eq!(f16::FRAC_2_PI, frac_2_pi); + assert_eq!(f16::FRAC_2_SQRT_PI, frac_2_sqrt_pi); + assert_eq!(f16::FRAC_PI_2, frac_pi_2); + assert_eq!(f16::FRAC_PI_3, frac_pi_3); + assert_eq!(f16::FRAC_PI_4, frac_pi_4); + assert_eq!(f16::FRAC_PI_6, frac_pi_6); + assert_eq!(f16::FRAC_PI_8, frac_pi_8); + assert_eq!(f16::LN_10, ln_10); + assert_eq!(f16::LN_2, ln_2); + assert_eq!(f16::LOG10_E, log10_e); + assert_eq!(f16::LOG10_2, log10_2); + assert_eq!(f16::LOG2_E, log2_e); + assert_eq!(f16::LOG2_10, log2_10); + assert_eq!(f16::SQRT_2, sqrt_2); + } + + #[test] + fn test_nan_conversion_to_smaller() { + let nan64 = f64::from_bits(0x7FF0_0000_0000_0001u64); + let neg_nan64 = f64::from_bits(0xFFF0_0000_0000_0001u64); + let nan32 = f32::from_bits(0x7F80_0001u32); + let neg_nan32 = f32::from_bits(0xFF80_0001u32); + let nan32_from_64 = nan64 as f32; + let neg_nan32_from_64 = neg_nan64 as f32; + let nan16_from_64 = f16::from_f64(nan64); + let neg_nan16_from_64 = f16::from_f64(neg_nan64); + let nan16_from_32 = f16::from_f32(nan32); + let neg_nan16_from_32 = f16::from_f32(neg_nan32); + + assert!(nan64.is_nan() && nan64.is_sign_positive()); + assert!(neg_nan64.is_nan() && neg_nan64.is_sign_negative()); + assert!(nan32.is_nan() && nan32.is_sign_positive()); + assert!(neg_nan32.is_nan() && neg_nan32.is_sign_negative()); + assert!(nan32_from_64.is_nan() && nan32_from_64.is_sign_positive()); + assert!(neg_nan32_from_64.is_nan() && neg_nan32_from_64.is_sign_negative()); + assert!(nan16_from_64.is_nan() && nan16_from_64.is_sign_positive()); + assert!(neg_nan16_from_64.is_nan() && neg_nan16_from_64.is_sign_negative()); + assert!(nan16_from_32.is_nan() && nan16_from_32.is_sign_positive()); + assert!(neg_nan16_from_32.is_nan() && neg_nan16_from_32.is_sign_negative()); + } + + #[test] + fn test_nan_conversion_to_larger() { + let nan16 = f16::from_bits(0x7C01u16); + let neg_nan16 = f16::from_bits(0xFC01u16); + let nan32 = f32::from_bits(0x7F80_0001u32); + let neg_nan32 = f32::from_bits(0xFF80_0001u32); + let nan32_from_16 = f32::from(nan16); + let neg_nan32_from_16 = f32::from(neg_nan16); + let nan64_from_16 = f64::from(nan16); + let neg_nan64_from_16 = f64::from(neg_nan16); + let nan64_from_32 = f64::from(nan32); + let neg_nan64_from_32 = f64::from(neg_nan32); + + assert!(nan16.is_nan() && nan16.is_sign_positive()); + assert!(neg_nan16.is_nan() && neg_nan16.is_sign_negative()); + assert!(nan32.is_nan() && nan32.is_sign_positive()); + assert!(neg_nan32.is_nan() && neg_nan32.is_sign_negative()); + assert!(nan32_from_16.is_nan() && nan32_from_16.is_sign_positive()); + assert!(neg_nan32_from_16.is_nan() && neg_nan32_from_16.is_sign_negative()); + assert!(nan64_from_16.is_nan() && nan64_from_16.is_sign_positive()); + assert!(neg_nan64_from_16.is_nan() && neg_nan64_from_16.is_sign_negative()); + assert!(nan64_from_32.is_nan() && nan64_from_32.is_sign_positive()); + assert!(neg_nan64_from_32.is_nan() && neg_nan64_from_32.is_sign_negative()); + } + + #[test] + fn test_f16_to_f32() { + let f = f16::from_f32(7.0); + assert_eq!(f.to_f32(), 7.0f32); + + // 7.1 is NOT exactly representable in 16-bit, it's rounded + let f = f16::from_f32(7.1); + let diff = (f.to_f32() - 7.1f32).abs(); + // diff must be <= 4 * EPSILON, as 7 has two more significant bits than 1 + assert!(diff <= 4.0 * f16::EPSILON.to_f32()); + + assert_eq!(f16::from_bits(0x0000_0001).to_f32(), 2.0f32.powi(-24)); + assert_eq!(f16::from_bits(0x0000_0005).to_f32(), 5.0 * 2.0f32.powi(-24)); + + assert_eq!(f16::from_bits(0x0000_0001), f16::from_f32(2.0f32.powi(-24))); + assert_eq!( + f16::from_bits(0x0000_0005), + f16::from_f32(5.0 * 2.0f32.powi(-24)) + ); + } + + #[test] + fn test_f16_to_f64() { + let f = f16::from_f64(7.0); + assert_eq!(f.to_f64(), 7.0f64); + + // 7.1 is NOT exactly representable in 16-bit, it's rounded + let f = f16::from_f64(7.1); + let diff = (f.to_f64() - 7.1f64).abs(); + // diff must be <= 4 * EPSILON, as 7 has two more significant bits than 1 + assert!(diff <= 4.0 * f16::EPSILON.to_f64()); + + assert_eq!(f16::from_bits(0x0000_0001).to_f64(), 2.0f64.powi(-24)); + assert_eq!(f16::from_bits(0x0000_0005).to_f64(), 5.0 * 2.0f64.powi(-24)); + + assert_eq!(f16::from_bits(0x0000_0001), f16::from_f64(2.0f64.powi(-24))); + assert_eq!( + f16::from_bits(0x0000_0005), + f16::from_f64(5.0 * 2.0f64.powi(-24)) + ); + } + + #[test] + fn test_comparisons() { + let zero = f16::from_f64(0.0); + let one = f16::from_f64(1.0); + let neg_zero = f16::from_f64(-0.0); + let neg_one = f16::from_f64(-1.0); + + assert_eq!(zero.partial_cmp(&neg_zero), Some(Ordering::Equal)); + assert_eq!(neg_zero.partial_cmp(&zero), Some(Ordering::Equal)); + assert!(zero == neg_zero); + assert!(neg_zero == zero); + assert!(!(zero != neg_zero)); + assert!(!(neg_zero != zero)); + assert!(!(zero < neg_zero)); + assert!(!(neg_zero < zero)); + assert!(zero <= neg_zero); + assert!(neg_zero <= zero); + assert!(!(zero > neg_zero)); + assert!(!(neg_zero > zero)); + assert!(zero >= neg_zero); + assert!(neg_zero >= zero); + + assert_eq!(one.partial_cmp(&neg_zero), Some(Ordering::Greater)); + assert_eq!(neg_zero.partial_cmp(&one), Some(Ordering::Less)); + assert!(!(one == neg_zero)); + assert!(!(neg_zero == one)); + assert!(one != neg_zero); + assert!(neg_zero != one); + assert!(!(one < neg_zero)); + assert!(neg_zero < one); + assert!(!(one <= neg_zero)); + assert!(neg_zero <= one); + assert!(one > neg_zero); + assert!(!(neg_zero > one)); + assert!(one >= neg_zero); + assert!(!(neg_zero >= one)); + + assert_eq!(one.partial_cmp(&neg_one), Some(Ordering::Greater)); + assert_eq!(neg_one.partial_cmp(&one), Some(Ordering::Less)); + assert!(!(one == neg_one)); + assert!(!(neg_one == one)); + assert!(one != neg_one); + assert!(neg_one != one); + assert!(!(one < neg_one)); + assert!(neg_one < one); + assert!(!(one <= neg_one)); + assert!(neg_one <= one); + assert!(one > neg_one); + assert!(!(neg_one > one)); + assert!(one >= neg_one); + assert!(!(neg_one >= one)); + } + + #[test] + #[allow(clippy::erasing_op, clippy::identity_op)] + fn round_to_even_f32() { + // smallest positive subnormal = 0b0.0000_0000_01 * 2^-14 = 2^-24 + let min_sub = f16::from_bits(1); + let min_sub_f = (-24f32).exp2(); + assert_eq!(f16::from_f32(min_sub_f).to_bits(), min_sub.to_bits()); + assert_eq!(f32::from(min_sub).to_bits(), min_sub_f.to_bits()); + + // 0.0000000000_011111 rounded to 0.0000000000 (< tie, no rounding) + // 0.0000000000_100000 rounded to 0.0000000000 (tie and even, remains at even) + // 0.0000000000_100001 rounded to 0.0000000001 (> tie, rounds up) + assert_eq!( + f16::from_f32(min_sub_f * 0.49).to_bits(), + min_sub.to_bits() * 0 + ); + assert_eq!( + f16::from_f32(min_sub_f * 0.50).to_bits(), + min_sub.to_bits() * 0 + ); + assert_eq!( + f16::from_f32(min_sub_f * 0.51).to_bits(), + min_sub.to_bits() * 1 + ); + + // 0.0000000001_011111 rounded to 0.0000000001 (< tie, no rounding) + // 0.0000000001_100000 rounded to 0.0000000010 (tie and odd, rounds up to even) + // 0.0000000001_100001 rounded to 0.0000000010 (> tie, rounds up) + assert_eq!( + f16::from_f32(min_sub_f * 1.49).to_bits(), + min_sub.to_bits() * 1 + ); + assert_eq!( + f16::from_f32(min_sub_f * 1.50).to_bits(), + min_sub.to_bits() * 2 + ); + assert_eq!( + f16::from_f32(min_sub_f * 1.51).to_bits(), + min_sub.to_bits() * 2 + ); + + // 0.0000000010_011111 rounded to 0.0000000010 (< tie, no rounding) + // 0.0000000010_100000 rounded to 0.0000000010 (tie and even, remains at even) + // 0.0000000010_100001 rounded to 0.0000000011 (> tie, rounds up) + assert_eq!( + f16::from_f32(min_sub_f * 2.49).to_bits(), + min_sub.to_bits() * 2 + ); + assert_eq!( + f16::from_f32(min_sub_f * 2.50).to_bits(), + min_sub.to_bits() * 2 + ); + assert_eq!( + f16::from_f32(min_sub_f * 2.51).to_bits(), + min_sub.to_bits() * 3 + ); + + assert_eq!( + f16::from_f32(2000.49f32).to_bits(), + f16::from_f32(2000.0).to_bits() + ); + assert_eq!( + f16::from_f32(2000.50f32).to_bits(), + f16::from_f32(2000.0).to_bits() + ); + assert_eq!( + f16::from_f32(2000.51f32).to_bits(), + f16::from_f32(2001.0).to_bits() + ); + assert_eq!( + f16::from_f32(2001.49f32).to_bits(), + f16::from_f32(2001.0).to_bits() + ); + assert_eq!( + f16::from_f32(2001.50f32).to_bits(), + f16::from_f32(2002.0).to_bits() + ); + assert_eq!( + f16::from_f32(2001.51f32).to_bits(), + f16::from_f32(2002.0).to_bits() + ); + assert_eq!( + f16::from_f32(2002.49f32).to_bits(), + f16::from_f32(2002.0).to_bits() + ); + assert_eq!( + f16::from_f32(2002.50f32).to_bits(), + f16::from_f32(2002.0).to_bits() + ); + assert_eq!( + f16::from_f32(2002.51f32).to_bits(), + f16::from_f32(2003.0).to_bits() + ); + } + + #[test] + #[allow(clippy::erasing_op, clippy::identity_op)] + fn round_to_even_f64() { + // smallest positive subnormal = 0b0.0000_0000_01 * 2^-14 = 2^-24 + let min_sub = f16::from_bits(1); + let min_sub_f = (-24f64).exp2(); + assert_eq!(f16::from_f64(min_sub_f).to_bits(), min_sub.to_bits()); + assert_eq!(f64::from(min_sub).to_bits(), min_sub_f.to_bits()); + + // 0.0000000000_011111 rounded to 0.0000000000 (< tie, no rounding) + // 0.0000000000_100000 rounded to 0.0000000000 (tie and even, remains at even) + // 0.0000000000_100001 rounded to 0.0000000001 (> tie, rounds up) + assert_eq!( + f16::from_f64(min_sub_f * 0.49).to_bits(), + min_sub.to_bits() * 0 + ); + assert_eq!( + f16::from_f64(min_sub_f * 0.50).to_bits(), + min_sub.to_bits() * 0 + ); + assert_eq!( + f16::from_f64(min_sub_f * 0.51).to_bits(), + min_sub.to_bits() * 1 + ); + + // 0.0000000001_011111 rounded to 0.0000000001 (< tie, no rounding) + // 0.0000000001_100000 rounded to 0.0000000010 (tie and odd, rounds up to even) + // 0.0000000001_100001 rounded to 0.0000000010 (> tie, rounds up) + assert_eq!( + f16::from_f64(min_sub_f * 1.49).to_bits(), + min_sub.to_bits() * 1 + ); + assert_eq!( + f16::from_f64(min_sub_f * 1.50).to_bits(), + min_sub.to_bits() * 2 + ); + assert_eq!( + f16::from_f64(min_sub_f * 1.51).to_bits(), + min_sub.to_bits() * 2 + ); + + // 0.0000000010_011111 rounded to 0.0000000010 (< tie, no rounding) + // 0.0000000010_100000 rounded to 0.0000000010 (tie and even, remains at even) + // 0.0000000010_100001 rounded to 0.0000000011 (> tie, rounds up) + assert_eq!( + f16::from_f64(min_sub_f * 2.49).to_bits(), + min_sub.to_bits() * 2 + ); + assert_eq!( + f16::from_f64(min_sub_f * 2.50).to_bits(), + min_sub.to_bits() * 2 + ); + assert_eq!( + f16::from_f64(min_sub_f * 2.51).to_bits(), + min_sub.to_bits() * 3 + ); + + assert_eq!( + f16::from_f64(2000.49f64).to_bits(), + f16::from_f64(2000.0).to_bits() + ); + assert_eq!( + f16::from_f64(2000.50f64).to_bits(), + f16::from_f64(2000.0).to_bits() + ); + assert_eq!( + f16::from_f64(2000.51f64).to_bits(), + f16::from_f64(2001.0).to_bits() + ); + assert_eq!( + f16::from_f64(2001.49f64).to_bits(), + f16::from_f64(2001.0).to_bits() + ); + assert_eq!( + f16::from_f64(2001.50f64).to_bits(), + f16::from_f64(2002.0).to_bits() + ); + assert_eq!( + f16::from_f64(2001.51f64).to_bits(), + f16::from_f64(2002.0).to_bits() + ); + assert_eq!( + f16::from_f64(2002.49f64).to_bits(), + f16::from_f64(2002.0).to_bits() + ); + assert_eq!( + f16::from_f64(2002.50f64).to_bits(), + f16::from_f64(2002.0).to_bits() + ); + assert_eq!( + f16::from_f64(2002.51f64).to_bits(), + f16::from_f64(2003.0).to_bits() + ); + } + + impl quickcheck::Arbitrary for f16 { + fn arbitrary<G: quickcheck::Gen>(g: &mut G) -> Self { + use rand::Rng; + f16(g.gen()) + } + } + + #[quickcheck] + fn qc_roundtrip_f16_f32_is_identity(f: f16) -> bool { + let roundtrip = f16::from_f32(f.to_f32()); + if f.is_nan() { + roundtrip.is_nan() && f.is_sign_negative() == roundtrip.is_sign_negative() + } else { + f.0 == roundtrip.0 + } + } + + #[quickcheck] + fn qc_roundtrip_f16_f64_is_identity(f: f16) -> bool { + let roundtrip = f16::from_f64(f.to_f64()); + if f.is_nan() { + roundtrip.is_nan() && f.is_sign_negative() == roundtrip.is_sign_negative() + } else { + f.0 == roundtrip.0 + } + } +} diff --git a/src/binary16/convert.rs b/src/binary16/convert.rs new file mode 100644 index 0000000..c2521d8 --- /dev/null +++ b/src/binary16/convert.rs @@ -0,0 +1,491 @@ +#![allow(dead_code, unused_imports)] + +macro_rules! convert_fn { + (fn $name:ident($var:ident : $vartype:ty) -> $restype:ty { + if feature("f16c") { $f16c:expr } + else { $fallback:expr }}) => { + #[inline] + pub(crate) fn $name($var: $vartype) -> $restype { + // Use CPU feature detection if using std + #[cfg(all( + feature = "use-intrinsics", + feature = "std", + any(target_arch = "x86", target_arch = "x86_64"), + not(target_feature = "f16c") + ))] + { + if is_x86_feature_detected!("f16c") { + $f16c + } else { + $fallback + } + } + // Use intrinsics directly when a compile target or using no_std + #[cfg(all( + feature = "use-intrinsics", + any(target_arch = "x86", target_arch = "x86_64"), + target_feature = "f16c" + ))] + { + $f16c + } + // Fallback to software + #[cfg(any( + not(feature = "use-intrinsics"), + not(any(target_arch = "x86", target_arch = "x86_64")), + all(not(feature = "std"), not(target_feature = "f16c")) + ))] + { + $fallback + } + } + }; +} + +convert_fn! { + fn f32_to_f16(f: f32) -> u16 { + if feature("f16c") { + unsafe { x86::f32_to_f16_x86_f16c(f) } + } else { + f32_to_f16_fallback(f) + } + } +} + +convert_fn! { + fn f64_to_f16(f: f64) -> u16 { + if feature("f16c") { + unsafe { x86::f32_to_f16_x86_f16c(f as f32) } + } else { + f64_to_f16_fallback(f) + } + } +} + +convert_fn! { + fn f16_to_f32(i: u16) -> f32 { + if feature("f16c") { + unsafe { x86::f16_to_f32_x86_f16c(i) } + } else { + f16_to_f32_fallback(i) + } + } +} + +convert_fn! { + fn f16_to_f64(i: u16) -> f64 { + if feature("f16c") { + unsafe { x86::f16_to_f32_x86_f16c(i) as f64 } + } else { + f16_to_f64_fallback(i) + } + } +} + +// TODO: While SIMD versions are faster, further improvements can be made by doing runtime feature +// detection once at beginning of convert slice method, rather than per chunk + +convert_fn! { + fn f32x4_to_f16x4(f: &[f32]) -> [u16; 4] { + if feature("f16c") { + unsafe { x86::f32x4_to_f16x4_x86_f16c(f) } + } else { + f32x4_to_f16x4_fallback(f) + } + } +} + +convert_fn! { + fn f16x4_to_f32x4(i: &[u16]) -> [f32; 4] { + if feature("f16c") { + unsafe { x86::f16x4_to_f32x4_x86_f16c(i) } + } else { + f16x4_to_f32x4_fallback(i) + } + } +} + +convert_fn! { + fn f64x4_to_f16x4(f: &[f64]) -> [u16; 4] { + if feature("f16c") { + unsafe { x86::f64x4_to_f16x4_x86_f16c(f) } + } else { + f64x4_to_f16x4_fallback(f) + } + } +} + +convert_fn! { + fn f16x4_to_f64x4(i: &[u16]) -> [f64; 4] { + if feature("f16c") { + unsafe { x86::f16x4_to_f64x4_x86_f16c(i) } + } else { + f16x4_to_f64x4_fallback(i) + } + } +} + +/////////////// Fallbacks //////////////// + +// In the below functions, round to nearest, with ties to even. +// Let us call the most significant bit that will be shifted out the round_bit. +// +// Round up if either +// a) Removed part > tie. +// (mantissa & round_bit) != 0 && (mantissa & (round_bit - 1)) != 0 +// b) Removed part == tie, and retained part is odd. +// (mantissa & round_bit) != 0 && (mantissa & (2 * round_bit)) != 0 +// (If removed part == tie and retained part is even, do not round up.) +// These two conditions can be combined into one: +// (mantissa & round_bit) != 0 && (mantissa & ((round_bit - 1) | (2 * round_bit))) != 0 +// which can be simplified into +// (mantissa & round_bit) != 0 && (mantissa & (3 * round_bit - 1)) != 0 + +fn f32_to_f16_fallback(value: f32) -> u16 { + // Convert to raw bytes + let x = value.to_bits(); + + // Extract IEEE754 components + let sign = x & 0x8000_0000u32; + let exp = x & 0x7F80_0000u32; + let man = x & 0x007F_FFFFu32; + + // Check for all exponent bits being set, which is Infinity or NaN + if exp == 0x7F80_0000u32 { + // Set mantissa MSB for NaN (and also keep shifted mantissa bits) + let nan_bit = if man == 0 { 0 } else { 0x0200u32 }; + return ((sign >> 16) | 0x7C00u32 | nan_bit | (man >> 13)) as u16; + } + + // The number is normalized, start assembling half precision version + let half_sign = sign >> 16; + // Unbias the exponent, then bias for half precision + let unbiased_exp = ((exp >> 23) as i32) - 127; + let half_exp = unbiased_exp + 15; + + // Check for exponent overflow, return +infinity + if half_exp >= 0x1F { + return (half_sign | 0x7C00u32) as u16; + } + + // Check for underflow + if half_exp <= 0 { + // Check mantissa for what we can do + if 14 - half_exp > 24 { + // No rounding possibility, so this is a full underflow, return signed zero + return half_sign as u16; + } + // Don't forget about hidden leading mantissa bit when assembling mantissa + let man = man | 0x0080_0000u32; + let mut half_man = man >> (14 - half_exp); + // Check for rounding (see comment above functions) + let round_bit = 1 << (13 - half_exp); + if (man & round_bit) != 0 && (man & (3 * round_bit - 1)) != 0 { + half_man += 1; + } + // No exponent for subnormals + return (half_sign | half_man) as u16; + } + + // Rebias the exponent + let half_exp = (half_exp as u32) << 10; + let half_man = man >> 13; + // Check for rounding (see comment above functions) + let round_bit = 0x0000_1000u32; + if (man & round_bit) != 0 && (man & (3 * round_bit - 1)) != 0 { + // Round it + ((half_sign | half_exp | half_man) + 1) as u16 + } else { + (half_sign | half_exp | half_man) as u16 + } +} + +fn f64_to_f16_fallback(value: f64) -> u16 { + // Convert to raw bytes, truncating the last 32-bits of mantissa; that precision will always + // be lost on half-precision. + let val = value.to_bits(); + let x = (val >> 32) as u32; + + // Extract IEEE754 components + let sign = x & 0x8000_0000u32; + let exp = x & 0x7FF0_0000u32; + let man = x & 0x000F_FFFFu32; + + // Check for all exponent bits being set, which is Infinity or NaN + if exp == 0x7FF0_0000u32 { + // Set mantissa MSB for NaN (and also keep shifted mantissa bits). + // We also have to check the last 32 bits. + let nan_bit = if man == 0 && (val as u32 == 0) { + 0 + } else { + 0x0200u32 + }; + return ((sign >> 16) | 0x7C00u32 | nan_bit | (man >> 10)) as u16; + } + + // The number is normalized, start assembling half precision version + let half_sign = sign >> 16; + // Unbias the exponent, then bias for half precision + let unbiased_exp = ((exp >> 20) as i64) - 1023; + let half_exp = unbiased_exp + 15; + + // Check for exponent overflow, return +infinity + if half_exp >= 0x1F { + return (half_sign | 0x7C00u32) as u16; + } + + // Check for underflow + if half_exp <= 0 { + // Check mantissa for what we can do + if 10 - half_exp > 21 { + // No rounding possibility, so this is a full underflow, return signed zero + return half_sign as u16; + } + // Don't forget about hidden leading mantissa bit when assembling mantissa + let man = man | 0x0010_0000u32; + let mut half_man = man >> (11 - half_exp); + // Check for rounding (see comment above functions) + let round_bit = 1 << (10 - half_exp); + if (man & round_bit) != 0 && (man & (3 * round_bit - 1)) != 0 { + half_man += 1; + } + // No exponent for subnormals + return (half_sign | half_man) as u16; + } + + // Rebias the exponent + let half_exp = (half_exp as u32) << 10; + let half_man = man >> 10; + // Check for rounding (see comment above functions) + let round_bit = 0x0000_0200u32; + if (man & round_bit) != 0 && (man & (3 * round_bit - 1)) != 0 { + // Round it + ((half_sign | half_exp | half_man) + 1) as u16 + } else { + (half_sign | half_exp | half_man) as u16 + } +} + +fn f16_to_f32_fallback(i: u16) -> f32 { + // Check for signed zero + if i & 0x7FFFu16 == 0 { + return f32::from_bits((i as u32) << 16); + } + + let half_sign = (i & 0x8000u16) as u32; + let half_exp = (i & 0x7C00u16) as u32; + let half_man = (i & 0x03FFu16) as u32; + + // Check for an infinity or NaN when all exponent bits set + if half_exp == 0x7C00u32 { + // Check for signed infinity if mantissa is zero + if half_man == 0 { + return f32::from_bits((half_sign << 16) | 0x7F80_0000u32); + } else { + // NaN, keep current mantissa but also set most significiant mantissa bit + return f32::from_bits((half_sign << 16) | 0x7FC0_0000u32 | (half_man << 13)); + } + } + + // Calculate single-precision components with adjusted exponent + let sign = half_sign << 16; + // Unbias exponent + let unbiased_exp = ((half_exp as i32) >> 10) - 15; + + // Check for subnormals, which will be normalized by adjusting exponent + if half_exp == 0 { + // Calculate how much to adjust the exponent by + let e = (half_man as u16).leading_zeros() - 6; + + // Rebias and adjust exponent + let exp = (127 - 15 - e) << 23; + let man = (half_man << (14 + e)) & 0x7F_FF_FFu32; + return f32::from_bits(sign | exp | man); + } + + // Rebias exponent for a normalized normal + let exp = ((unbiased_exp + 127) as u32) << 23; + let man = (half_man & 0x03FFu32) << 13; + f32::from_bits(sign | exp | man) +} + +fn f16_to_f64_fallback(i: u16) -> f64 { + // Check for signed zero + if i & 0x7FFFu16 == 0 { + return f64::from_bits((i as u64) << 48); + } + + let half_sign = (i & 0x8000u16) as u64; + let half_exp = (i & 0x7C00u16) as u64; + let half_man = (i & 0x03FFu16) as u64; + + // Check for an infinity or NaN when all exponent bits set + if half_exp == 0x7C00u64 { + // Check for signed infinity if mantissa is zero + if half_man == 0 { + return f64::from_bits((half_sign << 48) | 0x7FF0_0000_0000_0000u64); + } else { + // NaN, keep current mantissa but also set most significiant mantissa bit + return f64::from_bits((half_sign << 48) | 0x7FF8_0000_0000_0000u64 | (half_man << 42)); + } + } + + // Calculate double-precision components with adjusted exponent + let sign = half_sign << 48; + // Unbias exponent + let unbiased_exp = ((half_exp as i64) >> 10) - 15; + + // Check for subnormals, which will be normalized by adjusting exponent + if half_exp == 0 { + // Calculate how much to adjust the exponent by + let e = (half_man as u16).leading_zeros() - 6; + + // Rebias and adjust exponent + let exp = ((1023 - 15 - e) as u64) << 52; + let man = (half_man << (43 + e)) & 0xF_FFFF_FFFF_FFFFu64; + return f64::from_bits(sign | exp | man); + } + + // Rebias exponent for a normalized normal + let exp = ((unbiased_exp + 1023) as u64) << 52; + let man = (half_man & 0x03FFu64) << 42; + f64::from_bits(sign | exp | man) +} + +#[inline] +fn f16x4_to_f32x4_fallback(v: &[u16]) -> [f32; 4] { + debug_assert!(v.len() >= 4); + + [ + f16_to_f32_fallback(v[0]), + f16_to_f32_fallback(v[1]), + f16_to_f32_fallback(v[2]), + f16_to_f32_fallback(v[3]), + ] +} + +#[inline] +fn f32x4_to_f16x4_fallback(v: &[f32]) -> [u16; 4] { + debug_assert!(v.len() >= 4); + + [ + f32_to_f16_fallback(v[0]), + f32_to_f16_fallback(v[1]), + f32_to_f16_fallback(v[2]), + f32_to_f16_fallback(v[3]), + ] +} + +#[inline] +fn f16x4_to_f64x4_fallback(v: &[u16]) -> [f64; 4] { + debug_assert!(v.len() >= 4); + + [ + f16_to_f64_fallback(v[0]), + f16_to_f64_fallback(v[1]), + f16_to_f64_fallback(v[2]), + f16_to_f64_fallback(v[3]), + ] +} + +#[inline] +fn f64x4_to_f16x4_fallback(v: &[f64]) -> [u16; 4] { + debug_assert!(v.len() >= 4); + + [ + f64_to_f16_fallback(v[0]), + f64_to_f16_fallback(v[1]), + f64_to_f16_fallback(v[2]), + f64_to_f16_fallback(v[3]), + ] +} + +/////////////// x86/x86_64 f16c //////////////// +#[cfg(all( + feature = "use-intrinsics", + any(target_arch = "x86", target_arch = "x86_64") +))] +mod x86 { + use core::{mem::MaybeUninit, ptr}; + + #[cfg(target_arch = "x86")] + use core::arch::x86::{__m128, __m128i, _mm_cvtph_ps, _mm_cvtps_ph, _MM_FROUND_TO_NEAREST_INT}; + #[cfg(target_arch = "x86_64")] + use core::arch::x86_64::{ + __m128, __m128i, _mm_cvtph_ps, _mm_cvtps_ph, _MM_FROUND_TO_NEAREST_INT, + }; + + #[target_feature(enable = "f16c")] + #[inline] + pub(super) unsafe fn f16_to_f32_x86_f16c(i: u16) -> f32 { + let mut vec = MaybeUninit::<__m128i>::zeroed(); + vec.as_mut_ptr().cast::<u16>().write(i); + let retval = _mm_cvtph_ps(vec.assume_init()); + *(&retval as *const __m128).cast() + } + + #[target_feature(enable = "f16c")] + #[inline] + pub(super) unsafe fn f32_to_f16_x86_f16c(f: f32) -> u16 { + let mut vec = MaybeUninit::<__m128>::zeroed(); + vec.as_mut_ptr().cast::<f32>().write(f); + let retval = _mm_cvtps_ph(vec.assume_init(), _MM_FROUND_TO_NEAREST_INT); + *(&retval as *const __m128i).cast() + } + + #[target_feature(enable = "f16c")] + #[inline] + pub(super) unsafe fn f16x4_to_f32x4_x86_f16c(v: &[u16]) -> [f32; 4] { + debug_assert!(v.len() >= 4); + + let mut vec = MaybeUninit::<__m128i>::zeroed(); + ptr::copy_nonoverlapping(v.as_ptr(), vec.as_mut_ptr().cast(), 4); + let retval = _mm_cvtph_ps(vec.assume_init()); + *(&retval as *const __m128).cast() + } + + #[target_feature(enable = "f16c")] + #[inline] + pub(super) unsafe fn f32x4_to_f16x4_x86_f16c(v: &[f32]) -> [u16; 4] { + debug_assert!(v.len() >= 4); + + let mut vec = MaybeUninit::<__m128>::uninit(); + ptr::copy_nonoverlapping(v.as_ptr(), vec.as_mut_ptr().cast(), 4); + let retval = _mm_cvtps_ph(vec.assume_init(), _MM_FROUND_TO_NEAREST_INT); + *(&retval as *const __m128i).cast() + } + + #[target_feature(enable = "f16c")] + #[inline] + pub(super) unsafe fn f16x4_to_f64x4_x86_f16c(v: &[u16]) -> [f64; 4] { + debug_assert!(v.len() >= 4); + + let mut vec = MaybeUninit::<__m128i>::zeroed(); + ptr::copy_nonoverlapping(v.as_ptr(), vec.as_mut_ptr().cast(), 4); + let retval = _mm_cvtph_ps(vec.assume_init()); + let array = *(&retval as *const __m128).cast::<[f32; 4]>(); + // Let compiler vectorize this regular cast for now. + // TODO: investigate auto-detecting sse2/avx convert features + [ + array[0] as f64, + array[1] as f64, + array[2] as f64, + array[3] as f64, + ] + } + + #[target_feature(enable = "f16c")] + #[inline] + pub(super) unsafe fn f64x4_to_f16x4_x86_f16c(v: &[f64]) -> [u16; 4] { + debug_assert!(v.len() >= 4); + + // Let compiler vectorize this regular cast for now. + // TODO: investigate auto-detecting sse2/avx convert features + let v = [v[0] as f32, v[1] as f32, v[2] as f32, v[3] as f32]; + + let mut vec = MaybeUninit::<__m128>::uninit(); + ptr::copy_nonoverlapping(v.as_ptr(), vec.as_mut_ptr().cast(), 4); + let retval = _mm_cvtps_ph(vec.assume_init(), _MM_FROUND_TO_NEAREST_INT); + *(&retval as *const __m128i).cast() + } +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..de35758 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,113 @@ +//! A crate that provides support for half-precision 16-bit floating point types. +//! +//! This crate provides the [`f16`] type, which is an implementation of the IEEE 754-2008 standard +//! [`binary16`] a.k.a `half` floating point type. This 16-bit floating point type is intended for +//! efficient storage where the full range and precision of a larger floating point value is not +//! required. This is especially useful for image storage formats. +//! +//! This crate also provides a [`bf16`] type, an alternative 16-bit floating point format. The +//! [`bfloat16`] format is a truncated IEEE 754 standard `binary32` float that preserves the +//! exponent to allow the same range as `f32` but with only 8 bits of precision (instead of 11 +//! bits for [`f16`]). See the [`bf16`] type for details. +//! +//! Because [`f16`] and [`bf16`] are primarily for efficient storage, floating point operations such as +//! addition, multiplication, etc. are not implemented. Operations should be performed with `f32` +//! or higher-precision types and converted to/from [`f16`] or [`bf16`] as necessary. +//! +//! This crate also provides a [`slice`] module for zero-copy in-place conversions of `u16` slices +//! to both [`f16`] and [`bf16`], as well as efficient vectorized conversions of larger buffers of +//! floating point values to and from these half formats. +//! +//! A [`prelude`] module is provided for easy importing of available utility traits. +//! +//! Some hardware architectures provide support for 16-bit floating point conversions. Enable the +//! `use-intrinsics` feature to use LLVM intrinsics for hardware conversions. This crate does no +//! checks on whether the hardware supports the feature. This feature currently only works on +//! nightly Rust due to a compiler feature gate. When this feature is enabled and the hardware +//! supports it, the [`slice`] trait conversions will use vectorized SIMD intructions for +//! increased efficiency. +//! +//! Support for [`serde`] crate `Serialize` and `Deserialize` traits is provided when the `serde` +//! feature is enabled. This adds a dependency on [`serde`] crate so is an optional cargo feature. +//! +//! The crate uses `#[no_std]` by default, so can be used in embedded environments without using the +//! Rust `std` library. A `std` feature is available, which enables additional utilities using the +//! `std` library, such as the [`vec`] module that provides zero-copy `Vec` conversions. The `alloc` +//! feature may be used to enable the [`vec`] module without adding a dependency to the `std` +//! library. +//! +//! [`f16`]: struct.f16.html +//! [`binary16`]: https://en.wikipedia.org/wiki/Half-precision_floating-point_format +//! [`bf16`]: struct.bf16.html +//! [`bfloat16`]: https://en.wikipedia.org/wiki/Bfloat16_floating-point_format +//! [`slice`]: slice/index.html +//! [`prelude`]: prelude/index.html +//! [`serde`]: https://crates.io/crates/serde +//! [`vec`]: vec/index.html + +#![warn( + missing_docs, + missing_copy_implementations, + missing_debug_implementations, + trivial_numeric_casts, + unused_extern_crates, + unused_import_braces, + future_incompatible, + rust_2018_compatibility, + rust_2018_idioms, + clippy::all +)] +#![allow(clippy::verbose_bit_mask, clippy::cast_lossless)] +#![cfg_attr(not(feature = "std"), no_std)] +#![cfg_attr( + all( + feature = "use-intrinsics", + any(target_arch = "x86", target_arch = "x86_64") + ), + feature(stdsimd, f16c_target_feature) +)] +#![doc(html_root_url = "https://docs.rs/half/1.6.0")] + +#[cfg(all(feature = "alloc", not(feature = "std")))] +extern crate alloc; + +mod bfloat; +mod binary16; +pub mod slice; +#[cfg(any(feature = "alloc", feature = "std"))] +pub mod vec; + +pub use binary16::f16; + +#[allow(deprecated)] +pub use binary16::consts; + +pub use bfloat::bf16; + +/// A collection of the most used items and traits in this crate for easy importing. +/// +/// # Examples +/// +/// ```rust +/// use half::prelude::*; +/// ``` +pub mod prelude { + #[doc(no_inline)] + pub use crate::{ + bf16, f16, + slice::{HalfBitsSliceExt, HalfFloatSliceExt}, + }; + + #[cfg(any(feature = "alloc", feature = "std"))] + pub use crate::vec::{HalfBitsVecExt, HalfFloatVecExt}; +} + +// Keep this module private to crate +pub(crate) mod private { + use crate::{bf16, f16}; + + pub trait SealedHalf {} + + impl SealedHalf for f16 {} + impl SealedHalf for bf16 {} +} diff --git a/src/slice.rs b/src/slice.rs new file mode 100644 index 0000000..a8abbb0 --- /dev/null +++ b/src/slice.rs @@ -0,0 +1,976 @@ +//! Contains utility functions and traits to convert between slices of `u16` bits and `f16` or +//! `bf16` numbers. +//! +//! The utility [`HalfBitsSliceExt`] sealed extension trait is implemented for `[u16]` slices, +//! while the utility [`HalfFloatSliceExt`] sealed extension trait is implemented for both `[f16]` +//! and `[bf16]` slices. These traits provide efficient conversions and reinterpret casting of +//! larger buffers of floating point values, and are automatically included in the [`prelude`] +//! module. +//! +//! [`HalfBitsSliceExt`]: trait.HalfBitsSliceExt.html +//! [`HalfFloatSliceExt`]: trait.HalfFloatSliceExt.html +//! [`prelude`]: ../prelude/index.html + +use crate::{bf16, binary16::convert, f16}; +use core::slice; + +#[cfg(all(feature = "alloc", not(feature = "std")))] +use alloc::vec::Vec; + +/// Extensions to `[f16]` and `[bf16]` slices to support conversion and reinterpret operations. +/// +/// This trait is sealed and cannot be implemented outside of this crate. +pub trait HalfFloatSliceExt: private::SealedHalfFloatSlice { + /// Reinterpret a slice of [`f16`](../struct.f16.html) or [`bf16`](../struct.bf16.html) + /// numbers as a slice of `u16` bits. + /// + /// This is a zero-copy operation. The reinterpreted slice has the same lifetime and memory + /// location as `self`. + /// + /// # Examples + /// + /// ```rust + /// # use half::prelude::*; + /// let float_buffer = [f16::from_f32(1.), f16::from_f32(2.), f16::from_f32(3.)]; + /// let int_buffer = float_buffer.reinterpret_cast(); + /// + /// assert_eq!(int_buffer, [float_buffer[0].to_bits(), float_buffer[1].to_bits(), float_buffer[2].to_bits()]); + /// ``` + fn reinterpret_cast(&self) -> &[u16]; + + /// Reinterpret a mutable slice of [`f16`](../struct.f16.html) or + /// [`bf16`](../struct.bf16.html) numbers as a mutable slice of `u16` bits. + /// + /// This is a zero-copy operation. The transmuted slice has the same lifetime as the original, + /// which prevents mutating `self` as long as the returned `&mut [u16]` is borrowed. + /// + /// # Examples + /// + /// ```rust + /// # use half::prelude::*; + /// let mut float_buffer = [f16::from_f32(1.), f16::from_f32(2.), f16::from_f32(3.)]; + /// + /// { + /// let int_buffer = float_buffer.reinterpret_cast_mut(); + /// + /// assert_eq!(int_buffer, [f16::from_f32(1.).to_bits(), f16::from_f32(2.).to_bits(), f16::from_f32(3.).to_bits()]); + /// + /// // Mutating the u16 slice will mutating the original + /// int_buffer[0] = 0; + /// } + /// + /// // Note that we need to drop int_buffer before using float_buffer again or we will get a borrow error. + /// assert_eq!(float_buffer, [f16::from_f32(0.), f16::from_f32(2.), f16::from_f32(3.)]); + /// ``` + fn reinterpret_cast_mut(&mut self) -> &mut [u16]; + + /// Convert all of the elements of a `[f32]` slice into [`f16`](../struct.f16.html) or + /// [`bf16`](../struct.bf16.html) values in `self`. + /// + /// The length of `src` must be the same as `self`. + /// + /// The conversion operation is vectorized over the slice, meaning the conversion may be more + /// efficient than converting individual elements on some hardware that supports SIMD + /// conversions. See [crate documentation](../index.html) for more information on hardware + /// conversion support. + /// + /// # Panics + /// + /// This function will panic if the two slices have different lengths. + /// + /// # Examples + /// ```rust + /// # use half::prelude::*; + /// // Initialize an empty buffer + /// let mut buffer = [0u16; 4]; + /// let buffer = buffer.reinterpret_cast_mut::<f16>(); + /// + /// let float_values = [1., 2., 3., 4.]; + /// + /// // Now convert + /// buffer.convert_from_f32_slice(&float_values); + /// + /// assert_eq!(buffer, [f16::from_f32(1.), f16::from_f32(2.), f16::from_f32(3.), f16::from_f32(4.)]); + /// ``` + fn convert_from_f32_slice(&mut self, src: &[f32]); + + /// Convert all of the elements of a `[f64]` slice into [`f16`](../struct.f16.html) or + /// [`bf16`](../struct.bf16.html) values in `self`. + /// + /// The length of `src` must be the same as `self`. + /// + /// The conversion operation is vectorized over the slice, meaning the conversion may be more + /// efficient than converting individual elements on some hardware that supports SIMD + /// conversions. See [crate documentation](../index.html) for more information on hardware + /// conversion support. + /// + /// # Panics + /// + /// This function will panic if the two slices have different lengths. + /// + /// # Examples + /// ```rust + /// # use half::prelude::*; + /// // Initialize an empty buffer + /// let mut buffer = [0u16; 4]; + /// let buffer = buffer.reinterpret_cast_mut::<f16>(); + /// + /// let float_values = [1., 2., 3., 4.]; + /// + /// // Now convert + /// buffer.convert_from_f64_slice(&float_values); + /// + /// assert_eq!(buffer, [f16::from_f64(1.), f16::from_f64(2.), f16::from_f64(3.), f16::from_f64(4.)]); + /// ``` + fn convert_from_f64_slice(&mut self, src: &[f64]); + + /// Convert all of the [`f16`](../struct.f16.html) or [`bf16`](../struct.bf16.html) + /// elements of `self` into `f32` values in `dst`. + /// + /// The length of `src` must be the same as `self`. + /// + /// The conversion operation is vectorized over the slice, meaning the conversion may be more + /// efficient than converting individual elements on some hardware that supports SIMD + /// conversions. See [crate documentation](../index.html) for more information on hardware + /// conversion support. + /// + /// # Panics + /// + /// This function will panic if the two slices have different lengths. + /// + /// # Examples + /// ```rust + /// # use half::prelude::*; + /// // Initialize an empty buffer + /// let mut buffer = [0f32; 4]; + /// + /// let half_values = [f16::from_f32(1.), f16::from_f32(2.), f16::from_f32(3.), f16::from_f32(4.)]; + /// + /// // Now convert + /// half_values.convert_to_f32_slice(&mut buffer); + /// + /// assert_eq!(buffer, [1., 2., 3., 4.]); + /// ``` + fn convert_to_f32_slice(&self, dst: &mut [f32]); + + /// Convert all of the [`f16`](../struct.f16.html) or [`bf16`](../struct.bf16.html) + /// elements of `self` into `f64` values in `dst`. + /// + /// The length of `src` must be the same as `self`. + /// + /// The conversion operation is vectorized over the slice, meaning the conversion may be more + /// efficient than converting individual elements on some hardware that supports SIMD + /// conversions. See [crate documentation](../index.html) for more information on hardware + /// conversion support. + /// + /// # Panics + /// + /// This function will panic if the two slices have different lengths. + /// + /// # Examples + /// ```rust + /// # use half::prelude::*; + /// // Initialize an empty buffer + /// let mut buffer = [0f64; 4]; + /// + /// let half_values = [f16::from_f64(1.), f16::from_f64(2.), f16::from_f64(3.), f16::from_f64(4.)]; + /// + /// // Now convert + /// half_values.convert_to_f64_slice(&mut buffer); + /// + /// assert_eq!(buffer, [1., 2., 3., 4.]); + /// ``` + fn convert_to_f64_slice(&self, dst: &mut [f64]); + + // Because trait is sealed, we can get away with different interfaces between features + + #[cfg(any(feature = "alloc", feature = "std"))] + /// Convert all of the [`f16`](../struct.f16.html) or [`bf16`](../struct.bf16.html) + /// elements of `self` into `f32` values in a new vector. + /// + /// The conversion operation is vectorized over the slice, meaning the conversion may be more + /// efficient than converting individual elements on some hardware that supports SIMD + /// conversions. See [crate documentation](../index.html) for more information on hardware + /// conversion support. + /// + /// This method is only available with the `std` or `alloc` feature. + /// + /// # Examples + /// ```rust + /// # use half::prelude::*; + /// let half_values = [f16::from_f32(1.), f16::from_f32(2.), f16::from_f32(3.), f16::from_f32(4.)]; + /// let vec = half_values.to_f32_vec(); + /// + /// assert_eq!(vec, vec![1., 2., 3., 4.]); + /// ``` + fn to_f32_vec(&self) -> Vec<f32>; + + /// Convert all of the [`f16`](../struct.f16.html) or [`bf16`](../struct.bf16.html) + /// elements of `self` into `f64` values in a new vector. + /// + /// The conversion operation is vectorized over the slice, meaning the conversion may be more + /// efficient than converting individual elements on some hardware that supports SIMD + /// conversions. See [crate documentation](../index.html) for more information on hardware + /// conversion support. + /// + /// This method is only available with the `std` or `alloc` feature. + /// + /// # Examples + /// ```rust + /// # use half::prelude::*; + /// let half_values = [f16::from_f64(1.), f16::from_f64(2.), f16::from_f64(3.), f16::from_f64(4.)]; + /// let vec = half_values.to_f64_vec(); + /// + /// assert_eq!(vec, vec![1., 2., 3., 4.]); + /// ``` + #[cfg(any(feature = "alloc", feature = "std"))] + fn to_f64_vec(&self) -> Vec<f64>; +} + +/// Extensions to `[u16]` slices to support reinterpret operations. +/// +/// This trait is sealed and cannot be implemented outside of this crate. +pub trait HalfBitsSliceExt: private::SealedHalfBitsSlice { + /// Reinterpret a slice of `u16` bits as a slice of [`f16`](../struct.f16.html) or + /// [`bf16`](../struct.bf16.html) numbers. + /// + /// `H` is the type to cast to, and must be either the [`f16`](../struct.f16.html) or + /// [`bf16`](../struct.bf16.html) type. + /// + /// This is a zero-copy operation. The reinterpreted slice has the same lifetime and memory + /// location as `self`. + /// + /// # Examples + /// + /// ```rust + /// # use half::prelude::*; + /// let int_buffer = [f16::from_f32(1.).to_bits(), f16::from_f32(2.).to_bits(), f16::from_f32(3.).to_bits()]; + /// let float_buffer: &[f16] = int_buffer.reinterpret_cast(); + /// + /// assert_eq!(float_buffer, [f16::from_f32(1.), f16::from_f32(2.), f16::from_f32(3.)]); + /// + /// // You may have to specify the cast type directly if the compiler can't infer the type. + /// // The following is also valid in Rust. + /// let typed_buffer = int_buffer.reinterpret_cast::<f16>(); + /// ``` + fn reinterpret_cast<H>(&self) -> &[H] + where + H: crate::private::SealedHalf; + + /// Reinterpret a mutable slice of `u16` bits as a mutable slice of [`f16`](../struct.f16.html) + /// or [`bf16`](../struct.bf16.html) numbers. + /// + /// `H` is the type to cast to, and must be either the [`f16`](../struct.f16.html) or + /// [`bf16`](../struct.bf16.html) type. + /// + /// This is a zero-copy operation. The transmuted slice has the same lifetime as the original, + /// which prevents mutating `self` as long as the returned `&mut [f16]` is borrowed. + /// + /// # Examples + /// + /// ```rust + /// # use half::prelude::*; + /// let mut int_buffer = [f16::from_f32(1.).to_bits(), f16::from_f32(2.).to_bits(), f16::from_f32(3.).to_bits()]; + /// + /// { + /// let float_buffer: &mut [f16] = int_buffer.reinterpret_cast_mut(); + /// + /// assert_eq!(float_buffer, [f16::from_f32(1.), f16::from_f32(2.), f16::from_f32(3.)]); + /// + /// // Mutating the f16 slice will mutating the original + /// float_buffer[0] = f16::from_f32(0.); + /// } + /// + /// // Note that we need to drop float_buffer before using int_buffer again or we will get a borrow error. + /// assert_eq!(int_buffer, [f16::from_f32(0.).to_bits(), f16::from_f32(2.).to_bits(), f16::from_f32(3.).to_bits()]); + /// + /// // You may have to specify the cast type directly if the compiler can't infer the type. + /// // The following is also valid in Rust. + /// let typed_buffer = int_buffer.reinterpret_cast_mut::<f16>(); + /// ``` + fn reinterpret_cast_mut<H>(&mut self) -> &mut [H] + where + H: crate::private::SealedHalf; +} + +mod private { + use crate::{bf16, f16}; + + pub trait SealedHalfFloatSlice {} + impl SealedHalfFloatSlice for [f16] {} + impl SealedHalfFloatSlice for [bf16] {} + + pub trait SealedHalfBitsSlice {} + impl SealedHalfBitsSlice for [u16] {} +} + +impl HalfFloatSliceExt for [f16] { + #[inline] + fn reinterpret_cast(&self) -> &[u16] { + let pointer = self.as_ptr() as *const u16; + let length = self.len(); + // SAFETY: We are reconstructing full length of original slice, using its same lifetime, + // and the size of elements are identical + unsafe { slice::from_raw_parts(pointer, length) } + } + + #[inline] + fn reinterpret_cast_mut(&mut self) -> &mut [u16] { + let pointer = self.as_ptr() as *mut u16; + let length = self.len(); + // SAFETY: We are reconstructing full length of original slice, using its same lifetime, + // and the size of elements are identical + unsafe { slice::from_raw_parts_mut(pointer, length) } + } + + fn convert_from_f32_slice(&mut self, src: &[f32]) { + assert_eq!( + self.len(), + src.len(), + "destination and source slices have different lengths" + ); + + let mut chunks = src.chunks_exact(4); + let mut chunk_count = 0usize; // Not using .enumerate() because we need this value for remainder + for chunk in &mut chunks { + let vec = convert::f32x4_to_f16x4(chunk); + let dst_idx = chunk_count * 4; + self[dst_idx..dst_idx + 4].copy_from_slice(vec.reinterpret_cast()); + chunk_count += 1; + } + + // Process remainder + if !chunks.remainder().is_empty() { + let mut buf = [0f32; 4]; + buf[..chunks.remainder().len()].copy_from_slice(chunks.remainder()); + let vec = convert::f32x4_to_f16x4(&buf); + let dst_idx = chunk_count * 4; + self[dst_idx..dst_idx + chunks.remainder().len()] + .copy_from_slice(vec[..chunks.remainder().len()].reinterpret_cast()); + } + } + + fn convert_from_f64_slice(&mut self, src: &[f64]) { + assert_eq!( + self.len(), + src.len(), + "destination and source slices have different lengths" + ); + + let mut chunks = src.chunks_exact(4); + let mut chunk_count = 0usize; // Not using .enumerate() because we need this value for remainder + for chunk in &mut chunks { + let vec = convert::f64x4_to_f16x4(chunk); + let dst_idx = chunk_count * 4; + self[dst_idx..dst_idx + 4].copy_from_slice(vec.reinterpret_cast()); + chunk_count += 1; + } + + // Process remainder + if !chunks.remainder().is_empty() { + let mut buf = [0f64; 4]; + buf[..chunks.remainder().len()].copy_from_slice(chunks.remainder()); + let vec = convert::f64x4_to_f16x4(&buf); + let dst_idx = chunk_count * 4; + self[dst_idx..dst_idx + chunks.remainder().len()] + .copy_from_slice(vec[..chunks.remainder().len()].reinterpret_cast()); + } + } + + fn convert_to_f32_slice(&self, dst: &mut [f32]) { + assert_eq!( + self.len(), + dst.len(), + "destination and source slices have different lengths" + ); + + let mut chunks = self.chunks_exact(4); + let mut chunk_count = 0usize; // Not using .enumerate() because we need this value for remainder + for chunk in &mut chunks { + let vec = convert::f16x4_to_f32x4(chunk.reinterpret_cast()); + let dst_idx = chunk_count * 4; + dst[dst_idx..dst_idx + 4].copy_from_slice(&vec); + chunk_count += 1; + } + + // Process remainder + if !chunks.remainder().is_empty() { + let mut buf = [0u16; 4]; + buf[..chunks.remainder().len()].copy_from_slice(chunks.remainder().reinterpret_cast()); + let vec = convert::f16x4_to_f32x4(&buf); + let dst_idx = chunk_count * 4; + dst[dst_idx..dst_idx + chunks.remainder().len()] + .copy_from_slice(&vec[..chunks.remainder().len()]); + } + } + + fn convert_to_f64_slice(&self, dst: &mut [f64]) { + assert_eq!( + self.len(), + dst.len(), + "destination and source slices have different lengths" + ); + + let mut chunks = self.chunks_exact(4); + let mut chunk_count = 0usize; // Not using .enumerate() because we need this value for remainder + for chunk in &mut chunks { + let vec = convert::f16x4_to_f64x4(chunk.reinterpret_cast()); + let dst_idx = chunk_count * 4; + dst[dst_idx..dst_idx + 4].copy_from_slice(&vec); + chunk_count += 1; + } + + // Process remainder + if !chunks.remainder().is_empty() { + let mut buf = [0u16; 4]; + buf[..chunks.remainder().len()].copy_from_slice(chunks.remainder().reinterpret_cast()); + let vec = convert::f16x4_to_f64x4(&buf); + let dst_idx = chunk_count * 4; + dst[dst_idx..dst_idx + chunks.remainder().len()] + .copy_from_slice(&vec[..chunks.remainder().len()]); + } + } + + #[cfg(any(feature = "alloc", feature = "std"))] + #[inline] + fn to_f32_vec(&self) -> Vec<f32> { + let mut vec = Vec::with_capacity(self.len()); + // SAFETY: convert will initialize every value in the vector without reading them, + // so this is safe to do instead of double initialize from resize, and we're setting it to + // same value as capacity. + unsafe { vec.set_len(self.len()) }; + self.convert_to_f32_slice(&mut vec); + vec + } + + #[cfg(any(feature = "alloc", feature = "std"))] + #[inline] + fn to_f64_vec(&self) -> Vec<f64> { + let mut vec = Vec::with_capacity(self.len()); + // SAFETY: convert will initialize every value in the vector without reading them, + // so this is safe to do instead of double initialize from resize, and we're setting it to + // same value as capacity. + unsafe { vec.set_len(self.len()) }; + self.convert_to_f64_slice(&mut vec); + vec + } +} + +impl HalfFloatSliceExt for [bf16] { + #[inline] + fn reinterpret_cast(&self) -> &[u16] { + let pointer = self.as_ptr() as *const u16; + let length = self.len(); + // SAFETY: We are reconstructing full length of original slice, using its same lifetime, + // and the size of elements are identical + unsafe { slice::from_raw_parts(pointer, length) } + } + + #[inline] + fn reinterpret_cast_mut(&mut self) -> &mut [u16] { + let pointer = self.as_ptr() as *mut u16; + let length = self.len(); + // SAFETY: We are reconstructing full length of original slice, using its same lifetime, + // and the size of elements are identical + unsafe { slice::from_raw_parts_mut(pointer, length) } + } + + fn convert_from_f32_slice(&mut self, src: &[f32]) { + assert_eq!( + self.len(), + src.len(), + "destination and source slices have different lengths" + ); + + // Just use regular loop here until there's any bf16 SIMD support. + for (i, f) in src.iter().enumerate() { + self[i] = bf16::from_f32(*f); + } + } + + fn convert_from_f64_slice(&mut self, src: &[f64]) { + assert_eq!( + self.len(), + src.len(), + "destination and source slices have different lengths" + ); + + // Just use regular loop here until there's any bf16 SIMD support. + for (i, f) in src.iter().enumerate() { + self[i] = bf16::from_f64(*f); + } + } + + fn convert_to_f32_slice(&self, dst: &mut [f32]) { + assert_eq!( + self.len(), + dst.len(), + "destination and source slices have different lengths" + ); + + // Just use regular loop here until there's any bf16 SIMD support. + for (i, f) in self.iter().enumerate() { + dst[i] = f.to_f32(); + } + } + + fn convert_to_f64_slice(&self, dst: &mut [f64]) { + assert_eq!( + self.len(), + dst.len(), + "destination and source slices have different lengths" + ); + + // Just use regular loop here until there's any bf16 SIMD support. + for (i, f) in self.iter().enumerate() { + dst[i] = f.to_f64(); + } + } + + #[cfg(any(feature = "alloc", feature = "std"))] + #[inline] + fn to_f32_vec(&self) -> Vec<f32> { + let mut vec = Vec::with_capacity(self.len()); + // SAFETY: convert will initialize every value in the vector without reading them, + // so this is safe to do instead of double initialize from resize, and we're setting it to + // same value as capacity. + unsafe { vec.set_len(self.len()) }; + self.convert_to_f32_slice(&mut vec); + vec + } + + #[cfg(any(feature = "alloc", feature = "std"))] + #[inline] + fn to_f64_vec(&self) -> Vec<f64> { + let mut vec = Vec::with_capacity(self.len()); + // SAFETY: convert will initialize every value in the vector without reading them, + // so this is safe to do instead of double initialize from resize, and we're setting it to + // same value as capacity. + unsafe { vec.set_len(self.len()) }; + self.convert_to_f64_slice(&mut vec); + vec + } +} + +impl HalfBitsSliceExt for [u16] { + // Since we sealed all the traits involved, these are safe. + #[inline] + fn reinterpret_cast<H>(&self) -> &[H] + where + H: crate::private::SealedHalf, + { + let pointer = self.as_ptr() as *const H; + let length = self.len(); + // SAFETY: We are reconstructing full length of original slice, using its same lifetime, + // and the size of elements are identical + unsafe { slice::from_raw_parts(pointer, length) } + } + + #[inline] + fn reinterpret_cast_mut<H>(&mut self) -> &mut [H] + where + H: crate::private::SealedHalf, + { + let pointer = self.as_mut_ptr() as *mut H; + let length = self.len(); + // SAFETY: We are reconstructing full length of original slice, using its same lifetime, + // and the size of elements are identical + unsafe { slice::from_raw_parts_mut(pointer, length) } + } +} + +/// Reinterpret a mutable slice of `u16` bits as a mutable slice of [`f16`](../struct.f16.html) +/// numbers. +/// +/// The transmuted slice has the same life time as the original, which prevents mutating the borrowed +/// `mut [u16]` argument as long as the returned `mut [f16]` is borrowed. +#[deprecated( + since = "1.4.0", + note = "use [`HalfBitsSliceExt::reinterpret_cast_mut`](trait.HalfBitsSliceExt.html#tymethod.reinterpret_cast_mut) instead" +)] +#[inline] +pub fn from_bits_mut(bits: &mut [u16]) -> &mut [f16] { + bits.reinterpret_cast_mut() +} + +/// Reinterpret a mutable slice of [`f16`](../struct.f16.html) numbers as a mutable slice of `u16` +/// bits. +/// +///The transmuted slice has the same life time as the original, which prevents mutating the +/// borrowed `mut [f16]` argument as long as the returned `mut [u16]` is borrowed. +#[deprecated( + since = "1.4.0", + note = "use [`HalfFloatSliceExt::reinterpret_cast_mut`](trait.HalfFloatSliceExt.html#tymethod.reinterpret_cast_mut) instead" +)] +#[inline] +pub fn to_bits_mut(bits: &mut [f16]) -> &mut [u16] { + bits.reinterpret_cast_mut() +} + +/// Reinterpret a slice of `u16` bits as a slice of [`f16`](../struct.f16.html) numbers. +/// +/// The transmuted slice has the same life time as the original. +#[deprecated( + since = "1.4.0", + note = "use [`HalfBitsSliceExt::reinterpret_cast`](trait.HalfBitsSliceExt.html#tymethod.reinterpret_cast) instead" +)] +#[inline] +pub fn from_bits(bits: &[u16]) -> &[f16] { + bits.reinterpret_cast() +} + +/// Reinterpret a slice of [`f16`](../struct.f16.html) numbers as a slice of `u16` bits. +/// +/// The transmuted slice has the same life time as the original. +#[deprecated( + since = "1.4.0", + note = "use [`HalfFloatSliceExt::reinterpret_cast`](trait.HalfFloatSliceExt.html#tymethod.reinterpret_cast) instead" +)] +#[inline] +pub fn to_bits(bits: &[f16]) -> &[u16] { + bits.reinterpret_cast() +} + +#[cfg(test)] +mod test { + use super::{HalfBitsSliceExt, HalfFloatSliceExt}; + use crate::{bf16, f16}; + + #[test] + fn test_slice_conversions_f16() { + let bits = &[ + f16::E.to_bits(), + f16::PI.to_bits(), + f16::EPSILON.to_bits(), + f16::FRAC_1_SQRT_2.to_bits(), + ]; + let numbers = &[f16::E, f16::PI, f16::EPSILON, f16::FRAC_1_SQRT_2]; + + // Convert from bits to numbers + let from_bits = bits.reinterpret_cast::<f16>(); + assert_eq!(from_bits, numbers); + + // Convert from numbers back to bits + let to_bits = from_bits.reinterpret_cast(); + assert_eq!(to_bits, bits); + } + + #[test] + fn test_mutablility_f16() { + let mut bits_array = [f16::PI.to_bits()]; + let bits = &mut bits_array[..]; + + { + // would not compile without these braces + // TODO: add automated test to check that it does not compile without braces + let numbers = bits.reinterpret_cast_mut(); + numbers[0] = f16::E; + } + + assert_eq!(bits, &[f16::E.to_bits()]); + + bits[0] = f16::LN_2.to_bits(); + assert_eq!(bits, &[f16::LN_2.to_bits()]); + } + + #[test] + fn test_slice_conversions_bf16() { + let bits = &[ + bf16::E.to_bits(), + bf16::PI.to_bits(), + bf16::EPSILON.to_bits(), + bf16::FRAC_1_SQRT_2.to_bits(), + ]; + let numbers = &[bf16::E, bf16::PI, bf16::EPSILON, bf16::FRAC_1_SQRT_2]; + + // Convert from bits to numbers + let from_bits = bits.reinterpret_cast::<bf16>(); + assert_eq!(from_bits, numbers); + + // Convert from numbers back to bits + let to_bits = from_bits.reinterpret_cast(); + assert_eq!(to_bits, bits); + } + + #[test] + fn test_mutablility_bf16() { + let mut bits_array = [bf16::PI.to_bits()]; + let bits = &mut bits_array[..]; + + { + // would not compile without these braces + // TODO: add automated test to check that it does not compile without braces + let numbers = bits.reinterpret_cast_mut(); + numbers[0] = bf16::E; + } + + assert_eq!(bits, &[bf16::E.to_bits()]); + + bits[0] = bf16::LN_2.to_bits(); + assert_eq!(bits, &[bf16::LN_2.to_bits()]); + } + + #[test] + fn slice_convert_f16_f32() { + // Exact chunks + let vf32 = [1., 2., 3., 4., 5., 6., 7., 8.]; + let vf16 = [ + f16::from_f32(1.), + f16::from_f32(2.), + f16::from_f32(3.), + f16::from_f32(4.), + f16::from_f32(5.), + f16::from_f32(6.), + f16::from_f32(7.), + f16::from_f32(8.), + ]; + let mut buf32 = vf32; + let mut buf16 = vf16; + + vf16.convert_to_f32_slice(&mut buf32); + assert_eq!(&vf32, &buf32); + + buf16.convert_from_f32_slice(&vf32); + assert_eq!(&vf16, &buf16); + + // Partial with chunks + let vf32 = [1., 2., 3., 4., 5., 6., 7., 8., 9.]; + let vf16 = [ + f16::from_f32(1.), + f16::from_f32(2.), + f16::from_f32(3.), + f16::from_f32(4.), + f16::from_f32(5.), + f16::from_f32(6.), + f16::from_f32(7.), + f16::from_f32(8.), + f16::from_f32(9.), + ]; + let mut buf32 = vf32; + let mut buf16 = vf16; + + vf16.convert_to_f32_slice(&mut buf32); + assert_eq!(&vf32, &buf32); + + buf16.convert_from_f32_slice(&vf32); + assert_eq!(&vf16, &buf16); + + // Partial with chunks + let vf32 = [1., 2.]; + let vf16 = [f16::from_f32(1.), f16::from_f32(2.)]; + let mut buf32 = vf32; + let mut buf16 = vf16; + + vf16.convert_to_f32_slice(&mut buf32); + assert_eq!(&vf32, &buf32); + + buf16.convert_from_f32_slice(&vf32); + assert_eq!(&vf16, &buf16); + } + + #[test] + fn slice_convert_bf16_f32() { + // Exact chunks + let vf32 = [1., 2., 3., 4., 5., 6., 7., 8.]; + let vf16 = [ + bf16::from_f32(1.), + bf16::from_f32(2.), + bf16::from_f32(3.), + bf16::from_f32(4.), + bf16::from_f32(5.), + bf16::from_f32(6.), + bf16::from_f32(7.), + bf16::from_f32(8.), + ]; + let mut buf32 = vf32; + let mut buf16 = vf16; + + vf16.convert_to_f32_slice(&mut buf32); + assert_eq!(&vf32, &buf32); + + buf16.convert_from_f32_slice(&vf32); + assert_eq!(&vf16, &buf16); + + // Partial with chunks + let vf32 = [1., 2., 3., 4., 5., 6., 7., 8., 9.]; + let vf16 = [ + bf16::from_f32(1.), + bf16::from_f32(2.), + bf16::from_f32(3.), + bf16::from_f32(4.), + bf16::from_f32(5.), + bf16::from_f32(6.), + bf16::from_f32(7.), + bf16::from_f32(8.), + bf16::from_f32(9.), + ]; + let mut buf32 = vf32; + let mut buf16 = vf16; + + vf16.convert_to_f32_slice(&mut buf32); + assert_eq!(&vf32, &buf32); + + buf16.convert_from_f32_slice(&vf32); + assert_eq!(&vf16, &buf16); + + // Partial with chunks + let vf32 = [1., 2.]; + let vf16 = [bf16::from_f32(1.), bf16::from_f32(2.)]; + let mut buf32 = vf32; + let mut buf16 = vf16; + + vf16.convert_to_f32_slice(&mut buf32); + assert_eq!(&vf32, &buf32); + + buf16.convert_from_f32_slice(&vf32); + assert_eq!(&vf16, &buf16); + } + + #[test] + fn slice_convert_f16_f64() { + // Exact chunks + let vf64 = [1., 2., 3., 4., 5., 6., 7., 8.]; + let vf16 = [ + f16::from_f64(1.), + f16::from_f64(2.), + f16::from_f64(3.), + f16::from_f64(4.), + f16::from_f64(5.), + f16::from_f64(6.), + f16::from_f64(7.), + f16::from_f64(8.), + ]; + let mut buf64 = vf64; + let mut buf16 = vf16; + + vf16.convert_to_f64_slice(&mut buf64); + assert_eq!(&vf64, &buf64); + + buf16.convert_from_f64_slice(&vf64); + assert_eq!(&vf16, &buf16); + + // Partial with chunks + let vf64 = [1., 2., 3., 4., 5., 6., 7., 8., 9.]; + let vf16 = [ + f16::from_f64(1.), + f16::from_f64(2.), + f16::from_f64(3.), + f16::from_f64(4.), + f16::from_f64(5.), + f16::from_f64(6.), + f16::from_f64(7.), + f16::from_f64(8.), + f16::from_f64(9.), + ]; + let mut buf64 = vf64; + let mut buf16 = vf16; + + vf16.convert_to_f64_slice(&mut buf64); + assert_eq!(&vf64, &buf64); + + buf16.convert_from_f64_slice(&vf64); + assert_eq!(&vf16, &buf16); + + // Partial with chunks + let vf64 = [1., 2.]; + let vf16 = [f16::from_f64(1.), f16::from_f64(2.)]; + let mut buf64 = vf64; + let mut buf16 = vf16; + + vf16.convert_to_f64_slice(&mut buf64); + assert_eq!(&vf64, &buf64); + + buf16.convert_from_f64_slice(&vf64); + assert_eq!(&vf16, &buf16); + } + + #[test] + fn slice_convert_bf16_f64() { + // Exact chunks + let vf64 = [1., 2., 3., 4., 5., 6., 7., 8.]; + let vf16 = [ + bf16::from_f64(1.), + bf16::from_f64(2.), + bf16::from_f64(3.), + bf16::from_f64(4.), + bf16::from_f64(5.), + bf16::from_f64(6.), + bf16::from_f64(7.), + bf16::from_f64(8.), + ]; + let mut buf64 = vf64; + let mut buf16 = vf16; + + vf16.convert_to_f64_slice(&mut buf64); + assert_eq!(&vf64, &buf64); + + buf16.convert_from_f64_slice(&vf64); + assert_eq!(&vf16, &buf16); + + // Partial with chunks + let vf64 = [1., 2., 3., 4., 5., 6., 7., 8., 9.]; + let vf16 = [ + bf16::from_f64(1.), + bf16::from_f64(2.), + bf16::from_f64(3.), + bf16::from_f64(4.), + bf16::from_f64(5.), + bf16::from_f64(6.), + bf16::from_f64(7.), + bf16::from_f64(8.), + bf16::from_f64(9.), + ]; + let mut buf64 = vf64; + let mut buf16 = vf16; + + vf16.convert_to_f64_slice(&mut buf64); + assert_eq!(&vf64, &buf64); + + buf16.convert_from_f64_slice(&vf64); + assert_eq!(&vf16, &buf16); + + // Partial with chunks + let vf64 = [1., 2.]; + let vf16 = [bf16::from_f64(1.), bf16::from_f64(2.)]; + let mut buf64 = vf64; + let mut buf16 = vf16; + + vf16.convert_to_f64_slice(&mut buf64); + assert_eq!(&vf64, &buf64); + + buf16.convert_from_f64_slice(&vf64); + assert_eq!(&vf16, &buf16); + } + + #[test] + #[should_panic] + fn convert_from_f32_slice_len_mismatch_panics() { + let mut slice1 = [f16::ZERO; 3]; + let slice2 = [0f32; 4]; + slice1.convert_from_f32_slice(&slice2); + } + + #[test] + #[should_panic] + fn convert_from_f64_slice_len_mismatch_panics() { + let mut slice1 = [f16::ZERO; 3]; + let slice2 = [0f64; 4]; + slice1.convert_from_f64_slice(&slice2); + } + + #[test] + #[should_panic] + fn convert_to_f32_slice_len_mismatch_panics() { + let slice1 = [f16::ZERO; 3]; + let mut slice2 = [0f32; 4]; + slice1.convert_to_f32_slice(&mut slice2); + } + + #[test] + #[should_panic] + fn convert_to_f64_slice_len_mismatch_panics() { + let slice1 = [f16::ZERO; 3]; + let mut slice2 = [0f64; 4]; + slice1.convert_to_f64_slice(&mut slice2); + } +} diff --git a/src/vec.rs b/src/vec.rs new file mode 100644 index 0000000..18a6bb2 --- /dev/null +++ b/src/vec.rs @@ -0,0 +1,301 @@ +//! Contains utility functions and traits to convert between vectors of `u16` bits and `f16` or +//! `bf16` vectors. +//! +//! The utility [`HalfBitsVecExt`] sealed extension trait is implemented for `Vec<u16>` vectors, +//! while the utility [`HalfFloatVecExt`] sealed extension trait is implemented for both `Vec<f16>` +//! and `Vec<bf16>` vectors. These traits provide efficient conversions and reinterpret casting of +//! larger buffers of floating point values, and are automatically included in the [`prelude`] +//! module. +//! +//! This module is only available with the `std` or `alloc` feature. +//! +//! [`HalfBitsVecExt`]: trait.HalfBitsVecExt.html +//! [`HalfFloatVecExt`]: trait.HalfFloatVecExt.html +//! [`prelude`]: ../prelude/index.html + +#![cfg(any(feature = "alloc", feature = "std"))] + +use super::{bf16, f16, slice::HalfFloatSliceExt}; +#[cfg(all(feature = "alloc", not(feature = "std")))] +use alloc::vec::Vec; +use core::mem; + +/// Extensions to `Vec<f16>` and `Vec<bf16>` to support reinterpret operations. +/// +/// This trait is sealed and cannot be implemented outside of this crate. +pub trait HalfFloatVecExt: private::SealedHalfFloatVec { + /// Reinterpret a vector of [`f16`](../struct.f16.html) or [`bf16`](../struct.bf16.html) + /// numbers as a vector of `u16` bits. + /// + /// This is a zero-copy operation. The reinterpreted vector has the same memory location as + /// `self`. + /// + /// # Examples + /// + /// ```rust + /// # use half::prelude::*; + /// let float_buffer = vec![f16::from_f32(1.), f16::from_f32(2.), f16::from_f32(3.)]; + /// let int_buffer = float_buffer.reinterpret_into(); + /// + /// assert_eq!(int_buffer, [f16::from_f32(1.).to_bits(), f16::from_f32(2.).to_bits(), f16::from_f32(3.).to_bits()]); + /// ``` + fn reinterpret_into(self) -> Vec<u16>; + + /// Convert all of the elements of a `[f32]` slice into a new [`f16`](../struct.f16.html) or + /// [`bf16`](../struct.bf16.html) vector. + /// + /// The conversion operation is vectorized over the slice, meaning the conversion may be more + /// efficient than converting individual elements on some hardware that supports SIMD + /// conversions. See [crate documentation](../index.html) for more information on hardware + /// conversion support. + /// + /// # Examples + /// ```rust + /// # use half::prelude::*; + /// let float_values = [1., 2., 3., 4.]; + /// let vec: Vec<f16> = Vec::from_f32_slice(&float_values); + /// + /// assert_eq!(vec, vec![f16::from_f32(1.), f16::from_f32(2.), f16::from_f32(3.), f16::from_f32(4.)]); + /// ``` + fn from_f32_slice(slice: &[f32]) -> Self; + + /// Convert all of the elements of a `[f64]` slice into a new [`f16`](../struct.f16.html) or + /// [`bf16`](../struct.bf16.html) vector. + /// + /// The conversion operation is vectorized over the slice, meaning the conversion may be more + /// efficient than converting individual elements on some hardware that supports SIMD + /// conversions. See [crate documentation](../index.html) for more information on hardware + /// conversion support. + /// + /// # Examples + /// ```rust + /// # use half::prelude::*; + /// let float_values = [1., 2., 3., 4.]; + /// let vec: Vec<f16> = Vec::from_f64_slice(&float_values); + /// + /// assert_eq!(vec, vec![f16::from_f64(1.), f16::from_f64(2.), f16::from_f64(3.), f16::from_f64(4.)]); + /// ``` + fn from_f64_slice(slice: &[f64]) -> Self; +} + +/// Extensions to `Vec<u16>` to support reinterpret operations. +/// +/// This trait is sealed and cannot be implemented outside of this crate. +pub trait HalfBitsVecExt: private::SealedHalfBitsVec { + /// Reinterpret a vector of `u16` bits as a vector of [`f16`](../struct.f16.html) or + /// [`bf16`](../struct.bf16.html) numbers. + /// + /// `H` is the type to cast to, and must be either the [`f16`](../struct.f16.html) or + /// [`bf16`](../struct.bf16.html) type. + /// + /// This is a zero-copy operation. The reinterpreted vector has the same memory location as + /// `self`. + /// + /// # Examples + /// + /// ```rust + /// # use half::prelude::*; + /// let int_buffer = vec![f16::from_f32(1.).to_bits(), f16::from_f32(2.).to_bits(), f16::from_f32(3.).to_bits()]; + /// let float_buffer = int_buffer.reinterpret_into::<f16>(); + /// + /// assert_eq!(float_buffer, [f16::from_f32(1.), f16::from_f32(2.), f16::from_f32(3.)]); + /// ``` + fn reinterpret_into<H>(self) -> Vec<H> + where + H: crate::private::SealedHalf; +} + +mod private { + use crate::{bf16, f16}; + #[cfg(all(feature = "alloc", not(feature = "std")))] + use alloc::vec::Vec; + + pub trait SealedHalfFloatVec {} + impl SealedHalfFloatVec for Vec<f16> {} + impl SealedHalfFloatVec for Vec<bf16> {} + + pub trait SealedHalfBitsVec {} + impl SealedHalfBitsVec for Vec<u16> {} +} + +impl HalfFloatVecExt for Vec<f16> { + #[inline] + fn reinterpret_into(mut self) -> Vec<u16> { + // An f16 array has same length and capacity as u16 array + let length = self.len(); + let capacity = self.capacity(); + + // Actually reinterpret the contents of the Vec<f16> as u16, + // knowing that structs are represented as only their members in memory, + // which is the u16 part of `f16(u16)` + let pointer = self.as_mut_ptr() as *mut u16; + + // Prevent running a destructor on the old Vec<u16>, so the pointer won't be deleted + mem::forget(self); + + // Finally construct a new Vec<f16> from the raw pointer + // SAFETY: We are reconstructing full length and capacity of original vector, + // using its original pointer, and the size of elements are identical. + unsafe { Vec::from_raw_parts(pointer, length, capacity) } + } + + fn from_f32_slice(slice: &[f32]) -> Self { + let mut vec = Vec::with_capacity(slice.len()); + // SAFETY: convert will initialize every value in the vector without reading them, + // so this is safe to do instead of double initialize from resize, and we're setting it to + // same value as capacity. + unsafe { vec.set_len(slice.len()) }; + vec.convert_from_f32_slice(&slice); + vec + } + + fn from_f64_slice(slice: &[f64]) -> Self { + let mut vec = Vec::with_capacity(slice.len()); + // SAFETY: convert will initialize every value in the vector without reading them, + // so this is safe to do instead of double initialize from resize, and we're setting it to + // same value as capacity. + unsafe { vec.set_len(slice.len()) }; + vec.convert_from_f64_slice(&slice); + vec + } +} + +impl HalfFloatVecExt for Vec<bf16> { + #[inline] + fn reinterpret_into(mut self) -> Vec<u16> { + // An f16 array has same length and capacity as u16 array + let length = self.len(); + let capacity = self.capacity(); + + // Actually reinterpret the contents of the Vec<f16> as u16, + // knowing that structs are represented as only their members in memory, + // which is the u16 part of `f16(u16)` + let pointer = self.as_mut_ptr() as *mut u16; + + // Prevent running a destructor on the old Vec<u16>, so the pointer won't be deleted + mem::forget(self); + + // Finally construct a new Vec<f16> from the raw pointer + // SAFETY: We are reconstructing full length and capacity of original vector, + // using its original pointer, and the size of elements are identical. + unsafe { Vec::from_raw_parts(pointer, length, capacity) } + } + + fn from_f32_slice(slice: &[f32]) -> Self { + let mut vec = Vec::with_capacity(slice.len()); + // SAFETY: convert will initialize every value in the vector without reading them, + // so this is safe to do instead of double initialize from resize, and we're setting it to + // same value as capacity. + unsafe { vec.set_len(slice.len()) }; + vec.convert_from_f32_slice(&slice); + vec + } + + fn from_f64_slice(slice: &[f64]) -> Self { + let mut vec = Vec::with_capacity(slice.len()); + // SAFETY: convert will initialize every value in the vector without reading them, + // so this is safe to do instead of double initialize from resize, and we're setting it to + // same value as capacity. + unsafe { vec.set_len(slice.len()) }; + vec.convert_from_f64_slice(&slice); + vec + } +} + +impl HalfBitsVecExt for Vec<u16> { + // This is safe because all traits are sealed + #[inline] + fn reinterpret_into<H>(mut self) -> Vec<H> + where + H: crate::private::SealedHalf, + { + // An f16 array has same length and capacity as u16 array + let length = self.len(); + let capacity = self.capacity(); + + // Actually reinterpret the contents of the Vec<u16> as f16, + // knowing that structs are represented as only their members in memory, + // which is the u16 part of `f16(u16)` + let pointer = self.as_mut_ptr() as *mut H; + + // Prevent running a destructor on the old Vec<u16>, so the pointer won't be deleted + mem::forget(self); + + // Finally construct a new Vec<f16> from the raw pointer + // SAFETY: We are reconstructing full length and capacity of original vector, + // using its original pointer, and the size of elements are identical. + unsafe { Vec::from_raw_parts(pointer, length, capacity) } + } +} + +/// Converts a vector of `u16` elements into a vector of [`f16`](../struct.f16.html) elements. +/// +/// This function merely reinterprets the contents of the vector, so it's a zero-copy operation. +#[deprecated( + since = "1.4.0", + note = "use [`HalfBitsVecExt::reinterpret_into`](trait.HalfBitsVecExt.html#tymethod.reinterpret_into) instead" +)] +#[inline] +pub fn from_bits(bits: Vec<u16>) -> Vec<f16> { + bits.reinterpret_into() +} + +/// Converts a vector of [`f16`](../struct.f16.html) elements into a vector of `u16` elements. +/// +/// This function merely reinterprets the contents of the vector, so it's a zero-copy operation. +#[deprecated( + since = "1.4.0", + note = "use [`HalfFloatVecExt::reinterpret_into`](trait.HalfFloatVecExt.html#tymethod.reinterpret_into) instead" +)] +#[inline] +pub fn to_bits(numbers: Vec<f16>) -> Vec<u16> { + numbers.reinterpret_into() +} + +#[cfg(test)] +mod test { + use super::{HalfBitsVecExt, HalfFloatVecExt}; + use crate::{bf16, f16}; + #[cfg(all(feature = "alloc", not(feature = "std")))] + use alloc::vec; + + #[test] + fn test_vec_conversions_f16() { + let numbers = vec![f16::E, f16::PI, f16::EPSILON, f16::FRAC_1_SQRT_2]; + let bits = vec![ + f16::E.to_bits(), + f16::PI.to_bits(), + f16::EPSILON.to_bits(), + f16::FRAC_1_SQRT_2.to_bits(), + ]; + let bits_cloned = bits.clone(); + + // Convert from bits to numbers + let from_bits = bits.reinterpret_into::<f16>(); + assert_eq!(&from_bits[..], &numbers[..]); + + // Convert from numbers back to bits + let to_bits = from_bits.reinterpret_into(); + assert_eq!(&to_bits[..], &bits_cloned[..]); + } + + #[test] + fn test_vec_conversions_bf16() { + let numbers = vec![bf16::E, bf16::PI, bf16::EPSILON, bf16::FRAC_1_SQRT_2]; + let bits = vec![ + bf16::E.to_bits(), + bf16::PI.to_bits(), + bf16::EPSILON.to_bits(), + bf16::FRAC_1_SQRT_2.to_bits(), + ]; + let bits_cloned = bits.clone(); + + // Convert from bits to numbers + let from_bits = bits.reinterpret_into::<bf16>(); + assert_eq!(&from_bits[..], &numbers[..]); + + // Convert from numbers back to bits + let to_bits = from_bits.reinterpret_into(); + assert_eq!(&to_bits[..], &bits_cloned[..]); + } +} diff --git a/tests/version-numbers.rs b/tests/version-numbers.rs new file mode 100644 index 0000000..ff9860c --- /dev/null +++ b/tests/version-numbers.rs @@ -0,0 +1,14 @@ +#[test] +fn test_readme_deps() { + version_sync::assert_markdown_deps_updated!("README.md"); +} + +#[test] +fn test_html_root_url() { + version_sync::assert_html_root_url_updated!("src/lib.rs"); +} + +#[test] +fn test_changelog_version() { + version_sync::assert_contains_regex!("CHANGELOG.md", "^## \\[{version}\\]"); +} |