From 486b32152e6c9035ed032b4ba6d1f1dddcd06478 Mon Sep 17 00:00:00 2001 From: Joel Galenson Date: Thu, 1 Apr 2021 16:55:16 -0700 Subject: Upgrade rust/crates/hashbrown to 0.11.2 Test: make Change-Id: I635dccc3bc3cd89b50a49c08f6c7010d0a883e70 --- .cargo_vcs_info.json | 2 +- Android.bp | 8 +- CHANGELOG.md | 50 +- Cargo.toml | 16 +- Cargo.toml.orig | 11 +- METADATA | 10 +- README.md | 98 +-- TEST_MAPPING | 11 + benches/bench.rs | 101 ++- src/external_trait_impls/rayon/map.rs | 220 ++++-- src/external_trait_impls/rayon/raw.rs | 58 +- src/external_trait_impls/rayon/set.rs | 141 ++-- src/lib.rs | 53 +- src/map.rs | 726 ++++++++++++++----- src/raw/alloc.rs | 72 ++ src/raw/generic.rs | 8 +- src/raw/mod.rs | 1224 +++++++++++++++++++++------------ src/raw/sse2.rs | 9 +- src/rustc_entry.rs | 58 +- src/scopeguard.rs | 8 +- src/set.rs | 428 ++++++++---- tests/serde.rs | 22 +- 22 files changed, 2319 insertions(+), 1015 deletions(-) create mode 100644 TEST_MAPPING create mode 100644 src/raw/alloc.rs diff --git a/.cargo_vcs_info.json b/.cargo_vcs_info.json index 845b5e7..0eb3824 100644 --- a/.cargo_vcs_info.json +++ b/.cargo_vcs_info.json @@ -1,5 +1,5 @@ { "git": { - "sha1": "34c11891e13fa3c0d08b0540e869aace9d347c26" + "sha1": "bbb5d3bb1c23569c15e54c670bc0c3669ae3e7dc" } } diff --git a/Android.bp b/Android.bp index f4b72d6..73e0c70 100644 --- a/Android.bp +++ b/Android.bp @@ -1,4 +1,5 @@ // This file is generated by cargo2android.py --device --run --dependencies. +// Do not modify this file as changes will be overridden on upgrade. package { default_applicable_licenses: ["external_rust_crates_hashbrown_license"], @@ -53,4 +54,9 @@ rust_library { } // dependent_library ["feature_list"] -// ahash-0.4.6 +// ahash-0.7.2 "folded_multiply,runtime-rng,specialize" +// cfg-if-1.0.0 +// getrandom-0.2.2 +// libc-0.2.92 "default,std" +// once_cell-1.7.2 "alloc,race,unstable" +// version_check-0.9.3 diff --git a/CHANGELOG.md b/CHANGELOG.md index b6eb671..c88d3e0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,50 @@ and this project adheres to [Semantic Versioning](http://semver.org/). ## [Unreleased] +## [v0.11.2] - 2021-03-25 + +## Fixed + +- Added missing allocator type parameter to `HashMap`'s and `HashSet`'s `Clone` impls. (#252) + +## [v0.11.1] - 2021-03-20 + +## Fixed + +- Added missing `pub` modifier to `BumpWrapper`. (#251) + +## [v0.11.0] - 2021-03-14 + +## Added +- Added safe `try_insert_no_grow` method to `RawTable`. (#229) +- Added support for `bumpalo` as an allocator without the `nightly` feature. (#231) +- Implemented `Default` for `RawTable`. (#237) +- Added new safe methods `RawTable::get_each_mut`, `HashMap::get_each_mut`, and + `HashMap::get_each_key_value_mut`. (#239) +- Added `From>` for `HashSet`. (#235) +- Added `try_insert` method to `HashMap`. (#247) + +## Changed +- The minimum Rust version has been bumped to 1.49.0. (#230) +- Significantly improved compilation times by reducing the amount of generated IR. (#205) + +## Removed +- We no longer re-export the unstable allocator items from the standard library, nor the stable shims approximating the same. (#227) +- Removed hasher specialization support from `aHash`, which was resulting in inconsistent hashes being generated for a key. (#248) + +## Fixed +- Fixed union length comparison. (#228) + +## ~~[v0.10.0] - 2021-01-16~~ + +This release was _yanked_ due to inconsistent hashes being generated with the `nightly` feature. (#248) + +## Changed +- Parametrized `RawTable`, `HashSet` and `HashMap` over an allocator. (#133) +- Improved branch prediction hints on stable. (#209) +- Optimized hashing of primitive types with AHash using specialization. (#207) +- Only instantiate `RawTable`'s reserve functions once per key-value. (#204) + ## [v0.9.1] - 2020-09-28 ## Added @@ -263,7 +307,11 @@ This release was _yanked_ due to a breaking change for users of `no-default-feat - Initial release -[Unreleased]: https://github.com/rust-lang/hashbrown/compare/v0.9.1...HEAD +[Unreleased]: https://github.com/rust-lang/hashbrown/compare/v0.11.2...HEAD +[v0.11.2]: https://github.com/rust-lang/hashbrown/compare/v0.11.1...v0.11.2 +[v0.11.1]: https://github.com/rust-lang/hashbrown/compare/v0.11.0...v0.11.1 +[v0.11.0]: https://github.com/rust-lang/hashbrown/compare/v0.10.0...v0.11.0 +[v0.10.0]: https://github.com/rust-lang/hashbrown/compare/v0.9.1...v0.10.0 [v0.9.1]: https://github.com/rust-lang/hashbrown/compare/v0.9.0...v0.9.1 [v0.9.0]: https://github.com/rust-lang/hashbrown/compare/v0.8.2...v0.9.0 [v0.8.2]: https://github.com/rust-lang/hashbrown/compare/v0.8.1...v0.8.2 diff --git a/Cargo.toml b/Cargo.toml index 7be0341..02bd480 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,7 +13,7 @@ [package] edition = "2018" name = "hashbrown" -version = "0.9.1" +version = "0.11.2" authors = ["Amanieu d'Antras "] exclude = [".travis.yml", "bors.toml", "/ci/*"] description = "A Rust port of Google's SwissTable hash map" @@ -25,7 +25,7 @@ repository = "https://github.com/rust-lang/hashbrown" [package.metadata.docs.rs] features = ["nightly", "rayon", "serde", "raw"] [dependencies.ahash] -version = "0.4.4" +version = "0.7.0" optional = true default-features = false @@ -34,6 +34,10 @@ version = "1.0.0" optional = true package = "rustc-std-workspace-alloc" +[dependencies.bumpalo] +version = "3.5.0" +optional = true + [dependencies.compiler_builtins] version = "0.1.2" optional = true @@ -54,8 +58,11 @@ default-features = false [dev-dependencies.doc-comment] version = "0.3.1" +[dev-dependencies.fnv] +version = "1.0.7" + [dev-dependencies.lazy_static] -version = "1.2" +version = "1.4" [dev-dependencies.rand] version = "0.7.3" @@ -64,9 +71,6 @@ features = ["small_rng"] [dev-dependencies.rayon] version = "1.0" -[dev-dependencies.rustc-hash] -version = "=1.0" - [dev-dependencies.serde_test] version = "1.0" diff --git a/Cargo.toml.orig b/Cargo.toml.orig index 21bd5c2..a056c3c 100644 --- a/Cargo.toml.orig +++ b/Cargo.toml.orig @@ -1,6 +1,6 @@ [package] name = "hashbrown" -version = "0.9.1" +version = "0.11.2" authors = ["Amanieu d'Antras "] description = "A Rust port of Google's SwissTable hash map" license = "Apache-2.0/MIT" @@ -13,7 +13,7 @@ edition = "2018" [dependencies] # For the default hasher -ahash = { version = "0.4.4", optional = true, default-features = false } +ahash = { version = "0.7.0", default-features = false, optional = true } # For external trait impls rayon = { version = "1.0", optional = true } @@ -24,11 +24,14 @@ core = { version = "1.0.0", optional = true, package = "rustc-std-workspace-core compiler_builtins = { version = "0.1.2", optional = true } alloc = { version = "1.0.0", optional = true, package = "rustc-std-workspace-alloc" } +# Optional support for bumpalo +bumpalo = { version = "3.5.0", optional = true } + [dev-dependencies] -lazy_static = "1.2" +lazy_static = "1.4" rand = { version = "0.7.3", features = ["small_rng"] } rayon = "1.0" -rustc-hash = "=1.0" +fnv = "1.0.7" serde_test = "1.0" doc-comment = "0.3.1" diff --git a/METADATA b/METADATA index b3d1a7c..fa67d80 100644 --- a/METADATA +++ b/METADATA @@ -7,13 +7,13 @@ third_party { } url { type: ARCHIVE - value: "https://static.crates.io/crates/hashbrown/hashbrown-0.9.1.crate" + value: "https://static.crates.io/crates/hashbrown/hashbrown-0.11.2.crate" } - version: "0.9.1" + version: "0.11.2" license_type: NOTICE last_upgrade_date { - year: 2020 - month: 11 - day: 9 + year: 2021 + month: 4 + day: 1 } } diff --git a/README.md b/README.md index 2e43171..86664c4 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ hashbrown [![Build Status](https://travis-ci.com/rust-lang/hashbrown.svg?branch=master)](https://travis-ci.com/rust-lang/hashbrown) [![Crates.io](https://img.shields.io/crates/v/hashbrown.svg)](https://crates.io/crates/hashbrown) [![Documentation](https://docs.rs/hashbrown/badge.svg)](https://docs.rs/hashbrown) -[![Rust](https://img.shields.io/badge/rust-1.36.0%2B-blue.svg?maxAge=3600)](https://github.com/rust-lang/hashbrown) +[![Rust](https://img.shields.io/badge/rust-1.49.0%2B-blue.svg?maxAge=3600)](https://github.com/rust-lang/hashbrown) This crate is a Rust port of Google's high-performance [SwissTable] hash map, adapted to make it a drop-in replacement for Rust's standard `HashMap` @@ -26,7 +26,8 @@ in environments without `std`, such as embedded systems and kernels. ## Features - Drop-in replacement for the standard library `HashMap` and `HashSet` types. -- Uses `AHash` as the default hasher, which is much faster than SipHash. +- Uses [AHash](https://github.com/tkaitchuck/aHash) as the default hasher, which is much faster than SipHash. + However, AHash does *not provide the same level of HashDoS resistance* as SipHash, so if that is important to you, you might want to consider using a different hasher. - Around 2x faster than the previous standard library `HashMap`. - Lower memory usage: only 1 byte of overhead per entry instead of 8. - Compatible with `#[no_std]` (but requires a global allocator with the `alloc` crate). @@ -37,47 +38,46 @@ in environments without `std`, such as embedded systems and kernels. Compared to the previous implementation of `std::collections::HashMap` (Rust 1.35). -With the hashbrown default AHash hasher (not HashDoS-resistant): - -```text - name oldstdhash ns/iter hashbrown ns/iter diff ns/iter diff % speedup - insert_ahash_highbits 20,846 7,397 -13,449 -64.52% x 2.82 - insert_ahash_random 20,515 7,796 -12,719 -62.00% x 2.63 - insert_ahash_serial 21,668 7,264 -14,404 -66.48% x 2.98 - insert_erase_ahash_highbits 29,570 17,498 -12,072 -40.83% x 1.69 - insert_erase_ahash_random 39,569 17,474 -22,095 -55.84% x 2.26 - insert_erase_ahash_serial 32,073 17,332 -14,741 -45.96% x 1.85 - iter_ahash_highbits 1,572 2,087 515 32.76% x 0.75 - iter_ahash_random 1,609 2,074 465 28.90% x 0.78 - iter_ahash_serial 2,293 2,120 -173 -7.54% x 1.08 - lookup_ahash_highbits 3,460 4,403 943 27.25% x 0.79 - lookup_ahash_random 6,377 3,911 -2,466 -38.67% x 1.63 - lookup_ahash_serial 3,629 3,586 -43 -1.18% x 1.01 - lookup_fail_ahash_highbits 5,286 3,411 -1,875 -35.47% x 1.55 - lookup_fail_ahash_random 12,365 4,171 -8,194 -66.27% x 2.96 - lookup_fail_ahash_serial 4,902 3,240 -1,662 -33.90% x 1.51 -``` - -With the libstd default SipHash hasher (HashDoS-resistant): - -```text - name oldstdhash ns/iter hashbrown ns/iter diff ns/iter diff % speedup - insert_std_highbits 32,598 20,199 -12,399 -38.04% x 1.61 - insert_std_random 29,824 20,760 -9,064 -30.39% x 1.44 - insert_std_serial 33,151 17,256 -15,895 -47.95% x 1.92 - insert_erase_std_highbits 74,731 48,735 -25,996 -34.79% x 1.53 - insert_erase_std_random 73,828 47,649 -26,179 -35.46% x 1.55 - insert_erase_std_serial 73,864 40,147 -33,717 -45.65% x 1.84 - iter_std_highbits 1,518 2,264 746 49.14% x 0.67 - iter_std_random 1,502 2,414 912 60.72% x 0.62 - iter_std_serial 6,361 2,118 -4,243 -66.70% x 3.00 - lookup_std_highbits 21,705 16,962 -4,743 -21.85% x 1.28 - lookup_std_random 21,654 17,158 -4,496 -20.76% x 1.26 - lookup_std_serial 18,726 14,509 -4,217 -22.52% x 1.29 - lookup_fail_std_highbits 25,852 17,323 -8,529 -32.99% x 1.49 - lookup_fail_std_random 25,913 17,760 -8,153 -31.46% x 1.46 - lookup_fail_std_serial 22,648 14,839 -7,809 -34.48% x 1.53 -``` +With the hashbrown default AHash hasher: + +| name | oldstdhash ns/iter | hashbrown ns/iter | diff ns/iter | diff % | speedup | +|:------------------------|:-------------------:|------------------:|:------------:|---------:|---------| +| insert_ahash_highbits | 18,865 | 8,020 | -10,845 | -57.49% | x 2.35 | +| insert_ahash_random | 19,711 | 8,019 | -11,692 | -59.32% | x 2.46 | +| insert_ahash_serial | 19,365 | 6,463 | -12,902 | -66.63% | x 3.00 | +| insert_erase_ahash_highbits | 51,136 | 17,916 | -33,220 | -64.96% | x 2.85 | +| insert_erase_ahash_random | 51,157 | 17,688 | -33,469 | -65.42% | x 2.89 | +| insert_erase_ahash_serial | 45,479 | 14,895 | -30,584 | -67.25% | x 3.05 | +| iter_ahash_highbits | 1,399 | 1,092 | -307 | -21.94% | x 1.28 | +| iter_ahash_random | 1,586 | 1,059 | -527 | -33.23% | x 1.50 | +| iter_ahash_serial | 3,168 | 1,079 | -2,089 | -65.94% | x 2.94 | +| lookup_ahash_highbits | 32,351 | 4,792 | -27,559 | -85.19% | x 6.75 | +| lookup_ahash_random | 17,419 | 4,817 | -12,602 | -72.35% | x 3.62 | +| lookup_ahash_serial | 15,254 | 3,606 | -11,648 | -76.36% | x 4.23 | +| lookup_fail_ahash_highbits | 21,187 | 4,369 | -16,818 | -79.38% | x 4.85 | +| lookup_fail_ahash_random | 21,550 | 4,395 | -17,155 | -79.61% | x 4.90 | +| lookup_fail_ahash_serial | 19,450 | 3,176 | -16,274 | -83.67% | x 6.12 | + + +With the libstd default SipHash hasher: + +|name | oldstdhash ns/iter | hashbrown ns/iter | diff ns/iter | diff % | speedup | +|:------------------------|:-------------------:|------------------:|:------------:|---------:|---------| +|insert_std_highbits |19,216 |16,885 | -2,331 | -12.13% | x 1.14 | +|insert_std_random |19,179 |17,034 | -2,145 | -11.18% | x 1.13 | +|insert_std_serial |19,462 |17,493 | -1,969 | -10.12% | x 1.11 | +|insert_erase_std_highbits |50,825 |35,847 | -14,978 | -29.47% | x 1.42 | +|insert_erase_std_random |51,448 |35,392 | -16,056 | -31.21% | x 1.45 | +|insert_erase_std_serial |87,711 |38,091 | -49,620 | -56.57% | x 2.30 | +|iter_std_highbits |1,378 |1,159 | -219 | -15.89% | x 1.19 | +|iter_std_random |1,395 |1,132 | -263 | -18.85% | x 1.23 | +|iter_std_serial |1,704 |1,105 | -599 | -35.15% | x 1.54 | +|lookup_std_highbits |17,195 |13,642 | -3,553 | -20.66% | x 1.26 | +|lookup_std_random |17,181 |13,773 | -3,408 | -19.84% | x 1.25 | +|lookup_std_serial |15,483 |13,651 | -1,832 | -11.83% | x 1.13 | +|lookup_fail_std_highbits |20,926 |13,474 | -7,452 | -35.61% | x 1.55 | +|lookup_fail_std_random |21,766 |13,505 | -8,261 | -37.95% | x 1.61 | +|lookup_fail_std_serial |19,336 |13,519 | -5,817 | -30.08% | x 1.43 | ## Usage @@ -85,7 +85,7 @@ Add this to your `Cargo.toml`: ```toml [dependencies] -hashbrown = "0.9" +hashbrown = "0.11" ``` Then: @@ -96,19 +96,19 @@ use hashbrown::HashMap; let mut map = HashMap::new(); map.insert(1, "one"); ``` - +## Flags This crate has the following Cargo features: -- `nightly`: Enables nightly-only features: `#[may_dangle]`. +- `nightly`: Enables nightly-only features including: `#[may_dangle]`. - `serde`: Enables serde serialization support. - `rayon`: Enables rayon parallel iterator support. - `raw`: Enables access to the experimental and unsafe `RawTable` API. - `inline-more`: Adds inline hints to most functions, improving run-time performance at the cost of compilation time. (enabled by default) +- `bumpalo`: Provides a `BumpWrapper` type which allows `bumpalo` to be used for memory allocation. - `ahash`: Compiles with ahash as default hasher. (enabled by default) -- `ahash-compile-time-rng`: Activates the `compile-time-rng` feature of ahash, to increase the - DOS-resistance, but can result in issues for `no_std` builds. More details in - [issue#124](https://github.com/rust-lang/hashbrown/issues/124). (enabled by default) +- `ahash-compile-time-rng`: Activates the `compile-time-rng` feature of ahash. For targets with no random number generator +this pre-generates seeds at compile time and embeds them as constants. See [aHash's documentation](https://github.com/tkaitchuck/aHash#flags) (disabled by default) ## License diff --git a/TEST_MAPPING b/TEST_MAPPING new file mode 100644 index 0000000..e707449 --- /dev/null +++ b/TEST_MAPPING @@ -0,0 +1,11 @@ +// Generated by update_crate_tests.py for tests that depend on this crate. +{ + "presubmit": [ + { + "name": "keystore2_test" + }, + { + "name": "vpnprofilestore_test" + } + ] +} diff --git a/benches/bench.rs b/benches/bench.rs index 771e716..568c513 100644 --- a/benches/bench.rs +++ b/benches/bench.rs @@ -9,8 +9,11 @@ extern crate test; use test::{black_box, Bencher}; use hashbrown::hash_map::DefaultHashBuilder; -use hashbrown::HashMap; -use std::collections::hash_map::RandomState; +use hashbrown::{HashMap, HashSet}; +use std::{ + collections::hash_map::RandomState, + sync::atomic::{self, AtomicUsize}, +}; const SIZE: usize = 1000; @@ -40,6 +43,20 @@ impl Iterator for RandomKeys { } } +// Just an arbitrary side effect to make the maps not shortcircuit to the non-dropping path +// when dropping maps/entries (most real world usages likely have drop in the key or value) +lazy_static::lazy_static! { + static ref SIDE_EFFECT: AtomicUsize = AtomicUsize::new(0); +} + +#[derive(Clone)] +struct DropType(usize); +impl Drop for DropType { + fn drop(&mut self) { + SIDE_EFFECT.fetch_add(self.0, atomic::Ordering::SeqCst); + } +} + macro_rules! bench_suite { ($bench_macro:ident, $bench_ahash_serial:ident, $bench_std_serial:ident, $bench_ahash_highbits:ident, $bench_std_highbits:ident, @@ -69,10 +86,11 @@ macro_rules! bench_insert { b.iter(|| { m.clear(); for i in ($keydist).take(SIZE) { - m.insert(i, i); + m.insert(i, (DropType(i), [i; 20])); } black_box(&mut m); - }) + }); + eprintln!("{}", SIDE_EFFECT.load(atomic::Ordering::SeqCst)); } }; } @@ -87,13 +105,38 @@ bench_suite!( insert_std_random ); +macro_rules! bench_grow_insert { + ($name:ident, $maptype:ident, $keydist:expr) => { + #[bench] + fn $name(b: &mut Bencher) { + b.iter(|| { + let mut m = $maptype::default(); + for i in ($keydist).take(SIZE) { + m.insert(i, DropType(i)); + } + black_box(&mut m); + }) + } + }; +} + +bench_suite!( + bench_grow_insert, + grow_insert_ahash_serial, + grow_insert_std_serial, + grow_insert_ahash_highbits, + grow_insert_std_highbits, + grow_insert_ahash_random, + grow_insert_std_random +); + macro_rules! bench_insert_erase { ($name:ident, $maptype:ident, $keydist:expr) => { #[bench] fn $name(b: &mut Bencher) { let mut base = $maptype::default(); for i in ($keydist).take(SIZE) { - base.insert(i, i); + base.insert(i, DropType(i)); } let skip = $keydist.skip(SIZE); b.iter(|| { @@ -103,11 +146,12 @@ macro_rules! bench_insert_erase { // While keeping the size constant, // replace the first keydist with the second. for (add, remove) in (&mut add_iter).zip(&mut remove_iter).take(SIZE) { - m.insert(add, add); + m.insert(add, DropType(add)); black_box(m.remove(&remove)); } black_box(m); - }) + }); + eprintln!("{}", SIDE_EFFECT.load(atomic::Ordering::SeqCst)); } }; } @@ -128,14 +172,15 @@ macro_rules! bench_lookup { fn $name(b: &mut Bencher) { let mut m = $maptype::default(); for i in $keydist.take(SIZE) { - m.insert(i, i); + m.insert(i, DropType(i)); } b.iter(|| { for i in $keydist.take(SIZE) { black_box(m.get(&i)); } - }) + }); + eprintln!("{}", SIDE_EFFECT.load(atomic::Ordering::SeqCst)); } }; } @@ -157,7 +202,7 @@ macro_rules! bench_lookup_fail { let mut m = $maptype::default(); let mut iter = $keydist; for i in (&mut iter).take(SIZE) { - m.insert(i, i); + m.insert(i, DropType(i)); } b.iter(|| { @@ -185,7 +230,7 @@ macro_rules! bench_iter { fn $name(b: &mut Bencher) { let mut m = $maptype::default(); for i in ($keydist).take(SIZE) { - m.insert(i, i); + m.insert(i, DropType(i)); } b.iter(|| { @@ -211,7 +256,7 @@ bench_suite!( fn clone_small(b: &mut Bencher) { let mut m = HashMap::new(); for i in 0..10 { - m.insert(i, i); + m.insert(i, DropType(i)); } b.iter(|| { @@ -224,7 +269,7 @@ fn clone_from_small(b: &mut Bencher) { let mut m = HashMap::new(); let mut m2 = HashMap::new(); for i in 0..10 { - m.insert(i, i); + m.insert(i, DropType(i)); } b.iter(|| { @@ -237,7 +282,7 @@ fn clone_from_small(b: &mut Bencher) { fn clone_large(b: &mut Bencher) { let mut m = HashMap::new(); for i in 0..1000 { - m.insert(i, i); + m.insert(i, DropType(i)); } b.iter(|| { @@ -250,7 +295,7 @@ fn clone_from_large(b: &mut Bencher) { let mut m = HashMap::new(); let mut m2 = HashMap::new(); for i in 0..1000 { - m.insert(i, i); + m.insert(i, DropType(i)); } b.iter(|| { @@ -258,3 +303,29 @@ fn clone_from_large(b: &mut Bencher) { black_box(&mut m2); }) } + +#[bench] +fn rehash_in_place(b: &mut Bencher) { + b.iter(|| { + let mut set = HashSet::new(); + + // Each loop triggers one rehash + for _ in 0..10 { + for i in 0..224 { + set.insert(i); + } + + assert_eq!( + set.capacity(), + 224, + "The set must be at or close to capacity to trigger a re hashing" + ); + + for i in 100..1400 { + set.remove(&(i - 100)); + set.insert(i); + } + set.clear(); + } + }); +} diff --git a/src/external_trait_impls/rayon/map.rs b/src/external_trait_impls/rayon/map.rs index 334f8bb..61b7380 100644 --- a/src/external_trait_impls/rayon/map.rs +++ b/src/external_trait_impls/rayon/map.rs @@ -1,8 +1,11 @@ //! Rayon extensions for `HashMap`. +use super::raw::{RawIntoParIter, RawParDrain, RawParIter}; use crate::hash_map::HashMap; +use crate::raw::{Allocator, Global}; use core::fmt; use core::hash::{BuildHasher, Hash}; +use core::marker::PhantomData; use rayon::iter::plumbing::UnindexedConsumer; use rayon::iter::{FromParallelIterator, IntoParallelIterator, ParallelExtend, ParallelIterator}; @@ -15,11 +18,12 @@ use rayon::iter::{FromParallelIterator, IntoParallelIterator, ParallelExtend, Pa /// [`par_iter`]: /hashbrown/struct.HashMap.html#method.par_iter /// [`HashMap`]: /hashbrown/struct.HashMap.html /// [`IntoParallelRefIterator`]: https://docs.rs/rayon/1.0/rayon/iter/trait.IntoParallelRefIterator.html -pub struct ParIter<'a, K, V, S> { - map: &'a HashMap, +pub struct ParIter<'a, K, V> { + inner: RawParIter<(K, V)>, + marker: PhantomData<(&'a K, &'a V)>, } -impl<'a, K: Sync, V: Sync, S: Sync> ParallelIterator for ParIter<'a, K, V, S> { +impl<'a, K: Sync, V: Sync> ParallelIterator for ParIter<'a, K, V> { type Item = (&'a K, &'a V); #[cfg_attr(feature = "inline-more", inline)] @@ -27,7 +31,7 @@ impl<'a, K: Sync, V: Sync, S: Sync> ParallelIterator for ParIter<'a, K, V, S> { where C: UnindexedConsumer, { - unsafe { self.map.table.par_iter() } + self.inner .map(|x| unsafe { let r = x.as_ref(); (&r.0, &r.1) @@ -36,16 +40,23 @@ impl<'a, K: Sync, V: Sync, S: Sync> ParallelIterator for ParIter<'a, K, V, S> { } } -impl Clone for ParIter<'_, K, V, S> { +impl Clone for ParIter<'_, K, V> { #[cfg_attr(feature = "inline-more", inline)] fn clone(&self) -> Self { - ParIter { map: self.map } + Self { + inner: self.inner.clone(), + marker: PhantomData, + } } } -impl fmt::Debug for ParIter<'_, K, V, S> { +impl fmt::Debug for ParIter<'_, K, V> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - self.map.iter().fmt(f) + let iter = unsafe { self.inner.iter() }.map(|x| unsafe { + let r = x.as_ref(); + (&r.0, &r.1) + }); + f.debug_list().entries(iter).finish() } } @@ -56,11 +67,12 @@ impl fmt::Debug for Pa /// /// [`par_keys`]: /hashbrown/struct.HashMap.html#method.par_keys /// [`HashMap`]: /hashbrown/struct.HashMap.html -pub struct ParKeys<'a, K, V, S> { - map: &'a HashMap, +pub struct ParKeys<'a, K, V> { + inner: RawParIter<(K, V)>, + marker: PhantomData<(&'a K, &'a V)>, } -impl<'a, K: Sync, V: Sync, S: Sync> ParallelIterator for ParKeys<'a, K, V, S> { +impl<'a, K: Sync, V: Sync> ParallelIterator for ParKeys<'a, K, V> { type Item = &'a K; #[cfg_attr(feature = "inline-more", inline)] @@ -68,22 +80,26 @@ impl<'a, K: Sync, V: Sync, S: Sync> ParallelIterator for ParKeys<'a, K, V, S> { where C: UnindexedConsumer, { - unsafe { self.map.table.par_iter() } + self.inner .map(|x| unsafe { &x.as_ref().0 }) .drive_unindexed(consumer) } } -impl Clone for ParKeys<'_, K, V, S> { +impl Clone for ParKeys<'_, K, V> { #[cfg_attr(feature = "inline-more", inline)] fn clone(&self) -> Self { - ParKeys { map: self.map } + Self { + inner: self.inner.clone(), + marker: PhantomData, + } } } -impl fmt::Debug for ParKeys<'_, K, V, S> { +impl fmt::Debug for ParKeys<'_, K, V> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - self.map.keys().fmt(f) + let iter = unsafe { self.inner.iter() }.map(|x| unsafe { &x.as_ref().0 }); + f.debug_list().entries(iter).finish() } } @@ -94,11 +110,12 @@ impl fmt::Debug for ParKeys<'_, K, /// /// [`par_values`]: /hashbrown/struct.HashMap.html#method.par_values /// [`HashMap`]: /hashbrown/struct.HashMap.html -pub struct ParValues<'a, K, V, S> { - map: &'a HashMap, +pub struct ParValues<'a, K, V> { + inner: RawParIter<(K, V)>, + marker: PhantomData<(&'a K, &'a V)>, } -impl<'a, K: Sync, V: Sync, S: Sync> ParallelIterator for ParValues<'a, K, V, S> { +impl<'a, K: Sync, V: Sync> ParallelIterator for ParValues<'a, K, V> { type Item = &'a V; #[cfg_attr(feature = "inline-more", inline)] @@ -106,22 +123,26 @@ impl<'a, K: Sync, V: Sync, S: Sync> ParallelIterator for ParValues<'a, K, V, S> where C: UnindexedConsumer, { - unsafe { self.map.table.par_iter() } + self.inner .map(|x| unsafe { &x.as_ref().1 }) .drive_unindexed(consumer) } } -impl Clone for ParValues<'_, K, V, S> { +impl Clone for ParValues<'_, K, V> { #[cfg_attr(feature = "inline-more", inline)] fn clone(&self) -> Self { - ParValues { map: self.map } + Self { + inner: self.inner.clone(), + marker: PhantomData, + } } } -impl fmt::Debug for ParValues<'_, K, V, S> { +impl fmt::Debug for ParValues<'_, K, V> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - self.map.values().fmt(f) + let iter = unsafe { self.inner.iter() }.map(|x| unsafe { &x.as_ref().1 }); + f.debug_list().entries(iter).finish() } } @@ -134,11 +155,12 @@ impl fmt::Debug for ParValues<'_, K /// [`par_iter_mut`]: /hashbrown/struct.HashMap.html#method.par_iter_mut /// [`HashMap`]: /hashbrown/struct.HashMap.html /// [`IntoParallelRefMutIterator`]: https://docs.rs/rayon/1.0/rayon/iter/trait.IntoParallelRefMutIterator.html -pub struct ParIterMut<'a, K, V, S> { - map: &'a mut HashMap, +pub struct ParIterMut<'a, K, V> { + inner: RawParIter<(K, V)>, + marker: PhantomData<(&'a K, &'a mut V)>, } -impl<'a, K: Send + Sync, V: Send, S: Send> ParallelIterator for ParIterMut<'a, K, V, S> { +impl<'a, K: Sync, V: Send> ParallelIterator for ParIterMut<'a, K, V> { type Item = (&'a K, &'a mut V); #[cfg_attr(feature = "inline-more", inline)] @@ -146,7 +168,7 @@ impl<'a, K: Send + Sync, V: Send, S: Send> ParallelIterator for ParIterMut<'a, K where C: UnindexedConsumer, { - unsafe { self.map.table.par_iter() } + self.inner .map(|x| unsafe { let r = x.as_mut(); (&r.0, &mut r.1) @@ -155,11 +177,13 @@ impl<'a, K: Send + Sync, V: Send, S: Send> ParallelIterator for ParIterMut<'a, K } } -impl fmt::Debug - for ParIterMut<'_, K, V, S> -{ +impl fmt::Debug for ParIterMut<'_, K, V> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - self.map.iter().fmt(f) + ParIter { + inner: self.inner.clone(), + marker: PhantomData, + } + .fmt(f) } } @@ -170,11 +194,12 @@ impl fmt::Debug /// /// [`par_values_mut`]: /hashbrown/struct.HashMap.html#method.par_values_mut /// [`HashMap`]: /hashbrown/struct.HashMap.html -pub struct ParValuesMut<'a, K, V, S> { - map: &'a mut HashMap, +pub struct ParValuesMut<'a, K, V> { + inner: RawParIter<(K, V)>, + marker: PhantomData<(&'a K, &'a mut V)>, } -impl<'a, K: Send, V: Send, S: Send> ParallelIterator for ParValuesMut<'a, K, V, S> { +impl<'a, K: Sync, V: Send> ParallelIterator for ParValuesMut<'a, K, V> { type Item = &'a mut V; #[cfg_attr(feature = "inline-more", inline)] @@ -182,15 +207,19 @@ impl<'a, K: Send, V: Send, S: Send> ParallelIterator for ParValuesMut<'a, K, V, where C: UnindexedConsumer, { - unsafe { self.map.table.par_iter() } + self.inner .map(|x| unsafe { &mut x.as_mut().1 }) .drive_unindexed(consumer) } } -impl fmt::Debug for ParValuesMut<'_, K, V, S> { +impl fmt::Debug for ParValuesMut<'_, K, V> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - self.map.values().fmt(f) + ParValues { + inner: self.inner.clone(), + marker: PhantomData, + } + .fmt(f) } } @@ -203,11 +232,11 @@ impl fmt::Debug for ParValuesMut<'_ /// [`into_par_iter`]: /hashbrown/struct.HashMap.html#method.into_par_iter /// [`HashMap`]: /hashbrown/struct.HashMap.html /// [`IntoParallelIterator`]: https://docs.rs/rayon/1.0/rayon/iter/trait.IntoParallelIterator.html -pub struct IntoParIter { - map: HashMap, +pub struct IntoParIter { + inner: RawIntoParIter<(K, V), A>, } -impl ParallelIterator for IntoParIter { +impl ParallelIterator for IntoParIter { type Item = (K, V); #[cfg_attr(feature = "inline-more", inline)] @@ -215,13 +244,19 @@ impl ParallelIterator for IntoParIter { where C: UnindexedConsumer, { - self.map.table.into_par_iter().drive_unindexed(consumer) + self.inner.drive_unindexed(consumer) } } -impl fmt::Debug for IntoParIter { +impl fmt::Debug + for IntoParIter +{ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - self.map.iter().fmt(f) + ParIter { + inner: unsafe { self.inner.par_iter() }, + marker: PhantomData, + } + .fmt(f) } } @@ -232,11 +267,11 @@ impl fmt::Debug for In /// /// [`par_drain`]: /hashbrown/struct.HashMap.html#method.par_drain /// [`HashMap`]: /hashbrown/struct.HashMap.html -pub struct ParDrain<'a, K, V, S> { - map: &'a mut HashMap, +pub struct ParDrain<'a, K, V, A: Allocator + Clone = Global> { + inner: RawParDrain<'a, (K, V), A>, } -impl ParallelIterator for ParDrain<'_, K, V, S> { +impl ParallelIterator for ParDrain<'_, K, V, A> { type Item = (K, V); #[cfg_attr(feature = "inline-more", inline)] @@ -244,52 +279,68 @@ impl ParallelIterator for ParDrain<'_, K, V, S> { where C: UnindexedConsumer, { - self.map.table.par_drain().drive_unindexed(consumer) + self.inner.drive_unindexed(consumer) } } -impl fmt::Debug - for ParDrain<'_, K, V, S> +impl fmt::Debug + for ParDrain<'_, K, V, A> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - self.map.iter().fmt(f) + ParIter { + inner: unsafe { self.inner.par_iter() }, + marker: PhantomData, + } + .fmt(f) } } -impl HashMap { +impl HashMap { /// Visits (potentially in parallel) immutably borrowed keys in an arbitrary order. #[cfg_attr(feature = "inline-more", inline)] - pub fn par_keys(&self) -> ParKeys<'_, K, V, S> { - ParKeys { map: self } + pub fn par_keys(&self) -> ParKeys<'_, K, V> { + ParKeys { + inner: unsafe { self.table.par_iter() }, + marker: PhantomData, + } } /// Visits (potentially in parallel) immutably borrowed values in an arbitrary order. #[cfg_attr(feature = "inline-more", inline)] - pub fn par_values(&self) -> ParValues<'_, K, V, S> { - ParValues { map: self } + pub fn par_values(&self) -> ParValues<'_, K, V> { + ParValues { + inner: unsafe { self.table.par_iter() }, + marker: PhantomData, + } } } -impl HashMap { +impl HashMap { /// Visits (potentially in parallel) mutably borrowed values in an arbitrary order. #[cfg_attr(feature = "inline-more", inline)] - pub fn par_values_mut(&mut self) -> ParValuesMut<'_, K, V, S> { - ParValuesMut { map: self } + pub fn par_values_mut(&mut self) -> ParValuesMut<'_, K, V> { + ParValuesMut { + inner: unsafe { self.table.par_iter() }, + marker: PhantomData, + } } /// Consumes (potentially in parallel) all values in an arbitrary order, /// while preserving the map's allocated memory for reuse. #[cfg_attr(feature = "inline-more", inline)] - pub fn par_drain(&mut self) -> ParDrain<'_, K, V, S> { - ParDrain { map: self } + pub fn par_drain(&mut self) -> ParDrain<'_, K, V, A> { + ParDrain { + inner: self.table.par_drain(), + } } } -impl HashMap +impl HashMap where K: Eq + Hash + Sync, V: PartialEq + Sync, S: BuildHasher + Sync, + A: Allocator + Clone + Sync, { /// Returns `true` if the map is equal to another, /// i.e. both maps contain the same keys mapped to the same values. @@ -303,33 +354,47 @@ where } } -impl IntoParallelIterator for HashMap { +impl IntoParallelIterator + for HashMap +{ type Item = (K, V); - type Iter = IntoParIter; + type Iter = IntoParIter; #[cfg_attr(feature = "inline-more", inline)] fn into_par_iter(self) -> Self::Iter { - IntoParIter { map: self } + IntoParIter { + inner: self.table.into_par_iter(), + } } } -impl<'a, K: Sync, V: Sync, S: Sync> IntoParallelIterator for &'a HashMap { +impl<'a, K: Sync, V: Sync, S, A: Allocator + Clone> IntoParallelIterator + for &'a HashMap +{ type Item = (&'a K, &'a V); - type Iter = ParIter<'a, K, V, S>; + type Iter = ParIter<'a, K, V>; #[cfg_attr(feature = "inline-more", inline)] fn into_par_iter(self) -> Self::Iter { - ParIter { map: self } + ParIter { + inner: unsafe { self.table.par_iter() }, + marker: PhantomData, + } } } -impl<'a, K: Send + Sync, V: Send, S: Send> IntoParallelIterator for &'a mut HashMap { +impl<'a, K: Sync, V: Send, S, A: Allocator + Clone> IntoParallelIterator + for &'a mut HashMap +{ type Item = (&'a K, &'a mut V); - type Iter = ParIterMut<'a, K, V, S>; + type Iter = ParIterMut<'a, K, V>; #[cfg_attr(feature = "inline-more", inline)] fn into_par_iter(self) -> Self::Iter { - ParIterMut { map: self } + ParIterMut { + inner: unsafe { self.table.par_iter() }, + marker: PhantomData, + } } } @@ -337,7 +402,7 @@ impl<'a, K: Send + Sync, V: Send, S: Send> IntoParallelIterator for &'a mut Hash /// hashmap. If multiple pairs correspond to the same key, then the /// ones produced earlier in the parallel iterator will be /// overwritten, just as with a sequential iterator. -impl FromParallelIterator<(K, V)> for HashMap +impl FromParallelIterator<(K, V)> for HashMap where K: Eq + Hash + Send, V: Send, @@ -354,11 +419,12 @@ where } /// Extend a hash map with items from a parallel iterator. -impl ParallelExtend<(K, V)> for HashMap +impl ParallelExtend<(K, V)> for HashMap where K: Eq + Hash + Send, V: Send, S: BuildHasher, + A: Allocator + Clone, { fn par_extend(&mut self, par_iter: I) where @@ -369,11 +435,12 @@ where } /// Extend a hash map with copied items from a parallel iterator. -impl<'a, K, V, S> ParallelExtend<(&'a K, &'a V)> for HashMap +impl<'a, K, V, S, A> ParallelExtend<(&'a K, &'a V)> for HashMap where K: Copy + Eq + Hash + Sync, V: Copy + Sync, S: BuildHasher, + A: Allocator + Clone, { fn par_extend(&mut self, par_iter: I) where @@ -384,12 +451,13 @@ where } // This is equal to the normal `HashMap` -- no custom advantage. -fn extend(map: &mut HashMap, par_iter: I) +fn extend(map: &mut HashMap, par_iter: I) where K: Eq + Hash, S: BuildHasher, I: IntoParallelIterator, - HashMap: Extend, + A: Allocator + Clone, + HashMap: Extend, { let (list, len) = super::helpers::collect(par_iter); diff --git a/src/external_trait_impls/rayon/raw.rs b/src/external_trait_impls/rayon/raw.rs index 1bd2c17..18da1ea 100644 --- a/src/external_trait_impls/rayon/raw.rs +++ b/src/external_trait_impls/rayon/raw.rs @@ -1,5 +1,5 @@ use crate::raw::Bucket; -use crate::raw::{RawIter, RawIterRange, RawTable}; +use crate::raw::{Allocator, Global, RawIter, RawIterRange, RawTable}; use crate::scopeguard::guard; use alloc::alloc::dealloc; use core::marker::PhantomData; @@ -15,6 +15,22 @@ pub struct RawParIter { iter: RawIterRange, } +impl RawParIter { + #[cfg_attr(feature = "inline-more", inline)] + pub(super) unsafe fn iter(&self) -> RawIterRange { + self.iter.clone() + } +} + +impl Clone for RawParIter { + #[cfg_attr(feature = "inline-more", inline)] + fn clone(&self) -> Self { + Self { + iter: self.iter.clone(), + } + } +} + impl From> for RawParIter { fn from(it: RawIter) -> Self { RawParIter { iter: it.iter } @@ -60,11 +76,18 @@ impl UnindexedProducer for ParIterProducer { } /// Parallel iterator which consumes a table and returns elements. -pub struct RawIntoParIter { - table: RawTable, +pub struct RawIntoParIter { + table: RawTable, } -impl ParallelIterator for RawIntoParIter { +impl RawIntoParIter { + #[cfg_attr(feature = "inline-more", inline)] + pub(super) unsafe fn par_iter(&self) -> RawParIter { + self.table.par_iter() + } +} + +impl ParallelIterator for RawIntoParIter { type Item = T; #[cfg_attr(feature = "inline-more", inline)] @@ -73,7 +96,7 @@ impl ParallelIterator for RawIntoParIter { C: UnindexedConsumer, { let iter = unsafe { self.table.iter().iter }; - let _guard = guard(self.table.into_alloc(), |alloc| { + let _guard = guard(self.table.into_allocation(), |alloc| { if let Some((ptr, layout)) = *alloc { unsafe { dealloc(ptr.as_ptr(), layout); @@ -86,16 +109,23 @@ impl ParallelIterator for RawIntoParIter { } /// Parallel iterator which consumes elements without freeing the table storage. -pub struct RawParDrain<'a, T> { +pub struct RawParDrain<'a, T, A: Allocator + Clone = Global> { // We don't use a &'a mut RawTable because we want RawParDrain to be // covariant over T. - table: NonNull>, - marker: PhantomData<&'a RawTable>, + table: NonNull>, + marker: PhantomData<&'a RawTable>, } -unsafe impl Send for RawParDrain<'_, T> {} +unsafe impl Send for RawParDrain<'_, T, A> {} + +impl RawParDrain<'_, T, A> { + #[cfg_attr(feature = "inline-more", inline)] + pub(super) unsafe fn par_iter(&self) -> RawParIter { + self.table.as_ref().par_iter() + } +} -impl ParallelIterator for RawParDrain<'_, T> { +impl ParallelIterator for RawParDrain<'_, T, A> { type Item = T; #[cfg_attr(feature = "inline-more", inline)] @@ -113,7 +143,7 @@ impl ParallelIterator for RawParDrain<'_, T> { } } -impl Drop for RawParDrain<'_, T> { +impl Drop for RawParDrain<'_, T, A> { fn drop(&mut self) { // If drive_unindexed is not called then simply clear the table. unsafe { self.table.as_mut().clear() } @@ -172,7 +202,7 @@ impl Drop for ParDrainProducer { } } -impl RawTable { +impl RawTable { /// Returns a parallel iterator over the elements in a `RawTable`. #[cfg_attr(feature = "inline-more", inline)] pub unsafe fn par_iter(&self) -> RawParIter { @@ -183,14 +213,14 @@ impl RawTable { /// Returns a parallel iterator over the elements in a `RawTable`. #[cfg_attr(feature = "inline-more", inline)] - pub fn into_par_iter(self) -> RawIntoParIter { + pub fn into_par_iter(self) -> RawIntoParIter { RawIntoParIter { table: self } } /// Returns a parallel iterator which consumes all elements of a `RawTable` /// without freeing its memory allocation. #[cfg_attr(feature = "inline-more", inline)] - pub fn par_drain(&mut self) -> RawParDrain<'_, T> { + pub fn par_drain(&mut self) -> RawParDrain<'_, T, A> { RawParDrain { table: NonNull::from(self), marker: PhantomData, diff --git a/src/external_trait_impls/rayon/set.rs b/src/external_trait_impls/rayon/set.rs index 53d2660..ee4f6e6 100644 --- a/src/external_trait_impls/rayon/set.rs +++ b/src/external_trait_impls/rayon/set.rs @@ -1,6 +1,8 @@ //! Rayon extensions for `HashSet`. +use super::map; use crate::hash_set::HashSet; +use crate::raw::{Allocator, Global}; use core::hash::{BuildHasher, Hash}; use rayon::iter::plumbing::UnindexedConsumer; use rayon::iter::{FromParallelIterator, IntoParallelIterator, ParallelExtend, ParallelIterator}; @@ -14,22 +16,18 @@ use rayon::iter::{FromParallelIterator, IntoParallelIterator, ParallelExtend, Pa /// [`into_par_iter`]: /hashbrown/struct.HashSet.html#method.into_par_iter /// [`HashSet`]: /hashbrown/struct.HashSet.html /// [`IntoParallelIterator`]: https://docs.rs/rayon/1.0/rayon/iter/trait.IntoParallelIterator.html -pub struct IntoParIter { - set: HashSet, +pub struct IntoParIter { + inner: map::IntoParIter, } -impl ParallelIterator for IntoParIter { +impl ParallelIterator for IntoParIter { type Item = T; fn drive_unindexed(self, consumer: C) -> C::Result where C: UnindexedConsumer, { - self.set - .map - .into_par_iter() - .map(|(k, _)| k) - .drive_unindexed(consumer) + self.inner.map(|(k, _)| k).drive_unindexed(consumer) } } @@ -40,22 +38,18 @@ impl ParallelIterator for IntoParIter { /// /// [`par_drain`]: /hashbrown/struct.HashSet.html#method.par_drain /// [`HashSet`]: /hashbrown/struct.HashSet.html -pub struct ParDrain<'a, T, S> { - set: &'a mut HashSet, +pub struct ParDrain<'a, T, A: Allocator + Clone = Global> { + inner: map::ParDrain<'a, T, (), A>, } -impl ParallelIterator for ParDrain<'_, T, S> { +impl ParallelIterator for ParDrain<'_, T, A> { type Item = T; fn drive_unindexed(self, consumer: C) -> C::Result where C: UnindexedConsumer, { - self.set - .map - .par_drain() - .map(|(k, _)| k) - .drive_unindexed(consumer) + self.inner.map(|(k, _)| k).drive_unindexed(consumer) } } @@ -68,18 +62,18 @@ impl ParallelIterator for ParDrain<'_, T, S> { /// [`par_iter`]: /hashbrown/struct.HashSet.html#method.par_iter /// [`HashSet`]: /hashbrown/struct.HashSet.html /// [`IntoParallelRefIterator`]: https://docs.rs/rayon/1.0/rayon/iter/trait.IntoParallelRefIterator.html -pub struct ParIter<'a, T, S> { - set: &'a HashSet, +pub struct ParIter<'a, T> { + inner: map::ParKeys<'a, T, ()>, } -impl<'a, T: Sync, S: Sync> ParallelIterator for ParIter<'a, T, S> { +impl<'a, T: Sync> ParallelIterator for ParIter<'a, T> { type Item = &'a T; fn drive_unindexed(self, consumer: C) -> C::Result where C: UnindexedConsumer, { - self.set.map.par_keys().drive_unindexed(consumer) + self.inner.drive_unindexed(consumer) } } @@ -91,15 +85,16 @@ impl<'a, T: Sync, S: Sync> ParallelIterator for ParIter<'a, T, S> { /// /// [`par_difference`]: /hashbrown/struct.HashSet.html#method.par_difference /// [`HashSet`]: /hashbrown/struct.HashSet.html -pub struct ParDifference<'a, T, S> { - a: &'a HashSet, - b: &'a HashSet, +pub struct ParDifference<'a, T, S, A: Allocator + Clone = Global> { + a: &'a HashSet, + b: &'a HashSet, } -impl<'a, T, S> ParallelIterator for ParDifference<'a, T, S> +impl<'a, T, S, A> ParallelIterator for ParDifference<'a, T, S, A> where T: Eq + Hash + Sync, S: BuildHasher + Sync, + A: Allocator + Clone + Sync, { type Item = &'a T; @@ -123,15 +118,16 @@ where /// /// [`par_symmetric_difference`]: /hashbrown/struct.HashSet.html#method.par_symmetric_difference /// [`HashSet`]: /hashbrown/struct.HashSet.html -pub struct ParSymmetricDifference<'a, T, S> { - a: &'a HashSet, - b: &'a HashSet, +pub struct ParSymmetricDifference<'a, T, S, A: Allocator + Clone = Global> { + a: &'a HashSet, + b: &'a HashSet, } -impl<'a, T, S> ParallelIterator for ParSymmetricDifference<'a, T, S> +impl<'a, T, S, A> ParallelIterator for ParSymmetricDifference<'a, T, S, A> where T: Eq + Hash + Sync, S: BuildHasher + Sync, + A: Allocator + Clone + Sync, { type Item = &'a T; @@ -154,15 +150,16 @@ where /// /// [`par_intersection`]: /hashbrown/struct.HashSet.html#method.par_intersection /// [`HashSet`]: /hashbrown/struct.HashSet.html -pub struct ParIntersection<'a, T, S> { - a: &'a HashSet, - b: &'a HashSet, +pub struct ParIntersection<'a, T, S, A: Allocator + Clone = Global> { + a: &'a HashSet, + b: &'a HashSet, } -impl<'a, T, S> ParallelIterator for ParIntersection<'a, T, S> +impl<'a, T, S, A> ParallelIterator for ParIntersection<'a, T, S, A> where T: Eq + Hash + Sync, S: BuildHasher + Sync, + A: Allocator + Clone + Sync, { type Item = &'a T; @@ -184,15 +181,16 @@ where /// /// [`par_union`]: /hashbrown/struct.HashSet.html#method.par_union /// [`HashSet`]: /hashbrown/struct.HashSet.html -pub struct ParUnion<'a, T, S> { - a: &'a HashSet, - b: &'a HashSet, +pub struct ParUnion<'a, T, S, A: Allocator + Clone = Global> { + a: &'a HashSet, + b: &'a HashSet, } -impl<'a, T, S> ParallelIterator for ParUnion<'a, T, S> +impl<'a, T, S, A> ParallelIterator for ParUnion<'a, T, S, A> where T: Eq + Hash + Sync, S: BuildHasher + Sync, + A: Allocator + Clone + Sync, { type Item = &'a T; @@ -200,22 +198,37 @@ where where C: UnindexedConsumer, { - self.a + // We'll iterate one set in full, and only the remaining difference from the other. + // Use the smaller set for the difference in order to reduce hash lookups. + let (smaller, larger) = if self.a.len() <= self.b.len() { + (self.a, self.b) + } else { + (self.b, self.a) + }; + larger .into_par_iter() - .chain(self.b.par_difference(self.a)) + .chain(smaller.par_difference(larger)) .drive_unindexed(consumer) } } -impl HashSet +impl HashSet where T: Eq + Hash + Sync, S: BuildHasher + Sync, + A: Allocator + Clone + Sync, { + /// Visits (potentially in parallel) the values representing the union, + /// i.e. all the values in `self` or `other`, without duplicates. + #[cfg_attr(feature = "inline-more", inline)] + pub fn par_union<'a>(&'a self, other: &'a Self) -> ParUnion<'a, T, S, A> { + ParUnion { a: self, b: other } + } + /// Visits (potentially in parallel) the values representing the difference, /// i.e. the values that are in `self` but not in `other`. #[cfg_attr(feature = "inline-more", inline)] - pub fn par_difference<'a>(&'a self, other: &'a Self) -> ParDifference<'a, T, S> { + pub fn par_difference<'a>(&'a self, other: &'a Self) -> ParDifference<'a, T, S, A> { ParDifference { a: self, b: other } } @@ -225,24 +238,17 @@ where pub fn par_symmetric_difference<'a>( &'a self, other: &'a Self, - ) -> ParSymmetricDifference<'a, T, S> { + ) -> ParSymmetricDifference<'a, T, S, A> { ParSymmetricDifference { a: self, b: other } } /// Visits (potentially in parallel) the values representing the /// intersection, i.e. the values that are both in `self` and `other`. #[cfg_attr(feature = "inline-more", inline)] - pub fn par_intersection<'a>(&'a self, other: &'a Self) -> ParIntersection<'a, T, S> { + pub fn par_intersection<'a>(&'a self, other: &'a Self) -> ParIntersection<'a, T, S, A> { ParIntersection { a: self, b: other } } - /// Visits (potentially in parallel) the values representing the union, - /// i.e. all the values in `self` or `other`, without duplicates. - #[cfg_attr(feature = "inline-more", inline)] - pub fn par_union<'a>(&'a self, other: &'a Self) -> ParUnion<'a, T, S> { - ParUnion { a: self, b: other } - } - /// Returns `true` if `self` has no elements in common with `other`. /// This is equivalent to checking for an empty intersection. /// @@ -280,41 +286,47 @@ where } } -impl HashSet +impl HashSet where T: Eq + Hash + Send, - S: BuildHasher + Send, + A: Allocator + Clone + Send, { /// Consumes (potentially in parallel) all values in an arbitrary order, /// while preserving the set's allocated memory for reuse. #[cfg_attr(feature = "inline-more", inline)] - pub fn par_drain(&mut self) -> ParDrain<'_, T, S> { - ParDrain { set: self } + pub fn par_drain(&mut self) -> ParDrain<'_, T, A> { + ParDrain { + inner: self.map.par_drain(), + } } } -impl IntoParallelIterator for HashSet { +impl IntoParallelIterator for HashSet { type Item = T; - type Iter = IntoParIter; + type Iter = IntoParIter; #[cfg_attr(feature = "inline-more", inline)] fn into_par_iter(self) -> Self::Iter { - IntoParIter { set: self } + IntoParIter { + inner: self.map.into_par_iter(), + } } } -impl<'a, T: Sync, S: Sync> IntoParallelIterator for &'a HashSet { +impl<'a, T: Sync, S, A: Allocator + Clone> IntoParallelIterator for &'a HashSet { type Item = &'a T; - type Iter = ParIter<'a, T, S>; + type Iter = ParIter<'a, T>; #[cfg_attr(feature = "inline-more", inline)] fn into_par_iter(self) -> Self::Iter { - ParIter { set: self } + ParIter { + inner: self.map.par_keys(), + } } } /// Collect values from a parallel iterator into a hashset. -impl FromParallelIterator for HashSet +impl FromParallelIterator for HashSet where T: Eq + Hash + Send, S: BuildHasher + Default, @@ -330,7 +342,7 @@ where } /// Extend a hash set with items from a parallel iterator. -impl ParallelExtend for HashSet +impl ParallelExtend for HashSet where T: Eq + Hash + Send, S: BuildHasher, @@ -344,7 +356,7 @@ where } /// Extend a hash set with copied items from a parallel iterator. -impl<'a, T, S> ParallelExtend<&'a T> for HashSet +impl<'a, T, S> ParallelExtend<&'a T> for HashSet where T: 'a + Copy + Eq + Hash + Sync, S: BuildHasher, @@ -358,12 +370,13 @@ where } // This is equal to the normal `HashSet` -- no custom advantage. -fn extend(set: &mut HashSet, par_iter: I) +fn extend(set: &mut HashSet, par_iter: I) where T: Eq + Hash, S: BuildHasher, + A: Allocator + Clone, I: IntoParallelIterator, - HashSet: Extend, + HashSet: Extend, { let (list, len) = super::helpers::collect(par_iter); diff --git a/src/lib.rs b/src/lib.rs index 3aff40a..b2d6584 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -12,13 +12,25 @@ #![no_std] #![cfg_attr( feature = "nightly", - feature(test, core_intrinsics, dropck_eyepatch, min_specialization, extend_one) + feature( + test, + core_intrinsics, + dropck_eyepatch, + min_specialization, + extend_one, + allocator_api, + slice_ptr_get, + nonnull_slice_from_raw_parts, + maybe_uninit_array_assume_init + ) )] #![allow( clippy::doc_markdown, clippy::module_name_repetitions, clippy::must_use_candidate, - clippy::option_if_let_else + clippy::option_if_let_else, + clippy::redundant_else, + clippy::manual_map )] #![warn(missing_docs)] #![warn(rust_2018_idioms)] @@ -48,6 +60,11 @@ pub mod raw { pub use inner::*; #[cfg(feature = "rayon")] + /// [rayon]-based parallel iterator types for hash maps. + /// You will rarely need to interact with it directly unless you have need + /// to name one of the iterator types. + /// + /// [rayon]: https://docs.rs/rayon/1.0/rayon pub mod rayon { pub use crate::external_trait_impls::rayon::raw::*; } @@ -110,3 +127,35 @@ pub enum TryReserveError { layout: alloc::alloc::Layout, }, } + +/// The error type for [`RawTable::get_each_mut`](crate::raw::RawTable::get_each_mut), +/// [`HashMap::get_each_mut`], and [`HashMap::get_each_key_value_mut`]. +#[cfg(feature = "nightly")] +#[derive(Clone, PartialEq, Eq, Debug)] +pub enum UnavailableMutError { + /// The requested entry is not present in the table. + Absent, + /// The requested entry is present, but a mutable reference to it was already created and + /// returned from this call to `get_each_mut` or `get_each_key_value_mut`. + /// + /// Includes the index of the existing mutable reference in the returned array. + Duplicate(usize), +} + +/// Wrapper around `Bump` which allows it to be used as an allocator for +/// `HashMap`, `HashSet` and `RawTable`. +/// +/// `Bump` can be used directly without this wrapper on nightly if you enable +/// the `allocator-api` feature of the `bumpalo` crate. +#[cfg(feature = "bumpalo")] +#[derive(Clone, Copy, Debug)] +pub struct BumpWrapper<'a>(pub &'a bumpalo::Bump); + +#[cfg(feature = "bumpalo")] +#[test] +fn test_bumpalo() { + use bumpalo::Bump; + let bump = Bump::new(); + let mut map = HashMap::new_in(BumpWrapper(&bump)); + map.insert(0, 1); +} diff --git a/src/map.rs b/src/map.rs index 1ccba31..ab11288 100644 --- a/src/map.rs +++ b/src/map.rs @@ -1,11 +1,15 @@ -use crate::raw::{Bucket, RawDrain, RawIntoIter, RawIter, RawTable}; +use crate::raw::{Allocator, Bucket, Global, RawDrain, RawIntoIter, RawIter, RawTable}; use crate::TryReserveError; +#[cfg(feature = "nightly")] +use crate::UnavailableMutError; use core::borrow::Borrow; use core::fmt::{self, Debug}; -use core::hash::{BuildHasher, Hash, Hasher}; +use core::hash::{BuildHasher, Hash}; use core::iter::{FromIterator, FusedIterator}; use core::marker::PhantomData; use core::mem; +#[cfg(feature = "nightly")] +use core::mem::MaybeUninit; use core::ops::Index; /// Default hasher for `HashMap`. @@ -185,12 +189,12 @@ pub enum DefaultHashBuilder {} /// .iter().cloned().collect(); /// // use the values stored in map /// ``` -pub struct HashMap { +pub struct HashMap { pub(crate) hash_builder: S, - pub(crate) table: RawTable<(K, V)>, + pub(crate) table: RawTable<(K, V), A>, } -impl Clone for HashMap { +impl Clone for HashMap { fn clone(&self) -> Self { HashMap { hash_builder: self.hash_builder.clone(), @@ -206,8 +210,60 @@ impl Clone for HashMap { } } +/// Ensures that a single closure type across uses of this which, in turn prevents multiple +/// instances of any functions like RawTable::reserve from being generated #[cfg_attr(feature = "inline-more", inline)] -pub(crate) fn make_hash(hash_builder: &impl BuildHasher, val: &K) -> u64 { +pub(crate) fn make_hasher(hash_builder: &S) -> impl Fn(&(Q, V)) -> u64 + '_ +where + K: Borrow, + Q: Hash, + S: BuildHasher, +{ + move |val| make_hash::(hash_builder, &val.0) +} + +/// Ensures that a single closure type across uses of this which, in turn prevents multiple +/// instances of any functions like RawTable::reserve from being generated +#[cfg_attr(feature = "inline-more", inline)] +fn equivalent_key(k: &Q) -> impl Fn(&(K, V)) -> bool + '_ +where + K: Borrow, + Q: ?Sized + Eq, +{ + move |x| k.eq(x.0.borrow()) +} + +/// Ensures that a single closure type across uses of this which, in turn prevents multiple +/// instances of any functions like RawTable::reserve from being generated +#[cfg_attr(feature = "inline-more", inline)] +fn equivalent(k: &Q) -> impl Fn(&K) -> bool + '_ +where + K: Borrow, + Q: ?Sized + Eq, +{ + move |x| k.eq(x.borrow()) +} + +#[cfg_attr(feature = "inline-more", inline)] +pub(crate) fn make_hash(hash_builder: &S, val: &Q) -> u64 +where + K: Borrow, + Q: Hash + ?Sized, + S: BuildHasher, +{ + use core::hash::Hasher; + let mut state = hash_builder.build_hasher(); + val.hash(&mut state); + state.finish() +} + +#[cfg_attr(feature = "inline-more", inline)] +pub(crate) fn make_insert_hash(hash_builder: &S, val: &K) -> u64 +where + K: Hash, + S: BuildHasher, +{ + use core::hash::Hasher; let mut state = hash_builder.build_hasher(); val.hash(&mut state); state.finish() @@ -248,6 +304,27 @@ impl HashMap { } } +#[cfg(feature = "ahash")] +impl HashMap { + /// Creates an empty `HashMap` using the given allocator. + /// + /// The hash map is initially created with a capacity of 0, so it will not allocate until it + /// is first inserted into. + #[cfg_attr(feature = "inline-more", inline)] + pub fn new_in(alloc: A) -> Self { + Self::with_hasher_in(DefaultHashBuilder::default(), alloc) + } + + /// Creates an empty `HashMap` with the specified capacity using the given allocator. + /// + /// The hash map will be able to hold at least `capacity` elements without + /// reallocating. If `capacity` is 0, the hash map will not allocate. + #[cfg_attr(feature = "inline-more", inline)] + pub fn with_capacity_in(capacity: usize, alloc: A) -> Self { + Self::with_capacity_and_hasher_in(capacity, DefaultHashBuilder::default(), alloc) + } +} + impl HashMap { /// Creates an empty `HashMap` which will use the given hash builder to hash /// keys. @@ -315,6 +392,65 @@ impl HashMap { table: RawTable::with_capacity(capacity), } } +} + +impl HashMap { + /// Creates an empty `HashMap` which will use the given hash builder to hash + /// keys. It will be allocated with the given allocator. + /// + /// The created map has the default initial capacity. + /// + /// Warning: `hash_builder` is normally randomly generated, and + /// is designed to allow HashMaps to be resistant to attacks that + /// cause many collisions and very poor performance. Setting it + /// manually using this function can expose a DoS attack vector. + /// + /// # Examples + /// + /// ``` + /// use hashbrown::HashMap; + /// use hashbrown::hash_map::DefaultHashBuilder; + /// + /// let s = DefaultHashBuilder::default(); + /// let mut map = HashMap::with_hasher(s); + /// map.insert(1, 2); + /// ``` + #[cfg_attr(feature = "inline-more", inline)] + pub fn with_hasher_in(hash_builder: S, alloc: A) -> Self { + Self { + hash_builder, + table: RawTable::new_in(alloc), + } + } + + /// Creates an empty `HashMap` with the specified capacity, using `hash_builder` + /// to hash the keys. It will be allocated with the given allocator. + /// + /// The hash map will be able to hold at least `capacity` elements without + /// reallocating. If `capacity` is 0, the hash map will not allocate. + /// + /// Warning: `hash_builder` is normally randomly generated, and + /// is designed to allow HashMaps to be resistant to attacks that + /// cause many collisions and very poor performance. Setting it + /// manually using this function can expose a DoS attack vector. + /// + /// # Examples + /// + /// ``` + /// use hashbrown::HashMap; + /// use hashbrown::hash_map::DefaultHashBuilder; + /// + /// let s = DefaultHashBuilder::default(); + /// let mut map = HashMap::with_capacity_and_hasher(10, s); + /// map.insert(1, 2); + /// ``` + #[cfg_attr(feature = "inline-more", inline)] + pub fn with_capacity_and_hasher_in(capacity: usize, hash_builder: S, alloc: A) -> Self { + Self { + hash_builder, + table: RawTable::with_capacity_in(capacity, alloc), + } + } /// Returns a reference to the map's [`BuildHasher`]. /// @@ -547,7 +683,7 @@ impl HashMap { /// assert!(a.is_empty()); /// ``` #[cfg_attr(feature = "inline-more", inline)] - pub fn drain(&mut self) -> Drain<'_, K, V> { + pub fn drain(&mut self) -> Drain<'_, K, V, A> { Drain { inner: self.table.drain(), } @@ -607,7 +743,7 @@ impl HashMap { /// assert_eq!(odds, vec![1, 3, 5, 7]); /// ``` #[cfg_attr(feature = "inline-more", inline)] - pub fn drain_filter(&mut self, f: F) -> DrainFilter<'_, K, V, F> + pub fn drain_filter(&mut self, f: F) -> DrainFilter<'_, K, V, F, A> where F: FnMut(&K, &mut V) -> bool, { @@ -639,10 +775,11 @@ impl HashMap { } } -impl HashMap +impl HashMap where K: Eq + Hash, S: BuildHasher, + A: Allocator + Clone, { /// Reserves capacity for at least `additional` more elements to be inserted /// in the `HashMap`. The collection may reserve more space to avoid @@ -663,9 +800,8 @@ where /// ``` #[cfg_attr(feature = "inline-more", inline)] pub fn reserve(&mut self, additional: usize) { - let hash_builder = &self.hash_builder; self.table - .reserve(additional, |x| make_hash(hash_builder, &x.0)); + .reserve(additional, make_hasher::(&self.hash_builder)); } /// Tries to reserve capacity for at least `additional` more elements to be inserted @@ -686,9 +822,8 @@ where /// ``` #[cfg_attr(feature = "inline-more", inline)] pub fn try_reserve(&mut self, additional: usize) -> Result<(), TryReserveError> { - let hash_builder = &self.hash_builder; self.table - .try_reserve(additional, |x| make_hash(hash_builder, &x.0)) + .try_reserve(additional, make_hasher::(&self.hash_builder)) } /// Shrinks the capacity of the map as much as possible. It will drop @@ -709,8 +844,8 @@ where /// ``` #[cfg_attr(feature = "inline-more", inline)] pub fn shrink_to_fit(&mut self) { - let hash_builder = &self.hash_builder; - self.table.shrink_to(0, |x| make_hash(hash_builder, &x.0)); + self.table + .shrink_to(0, make_hasher::(&self.hash_builder)); } /// Shrinks the capacity of the map with a lower limit. It will drop @@ -738,9 +873,8 @@ where /// ``` #[cfg_attr(feature = "inline-more", inline)] pub fn shrink_to(&mut self, min_capacity: usize) { - let hash_builder = &self.hash_builder; self.table - .shrink_to(min_capacity, |x| make_hash(hash_builder, &x.0)); + .shrink_to(min_capacity, make_hasher::(&self.hash_builder)); } /// Gets the given key's corresponding entry in the map for in-place manipulation. @@ -763,9 +897,9 @@ where /// assert_eq!(letters.get(&'y'), None); /// ``` #[cfg_attr(feature = "inline-more", inline)] - pub fn entry(&mut self, key: K) -> Entry<'_, K, V, S> { - let hash = make_hash(&self.hash_builder, &key); - if let Some(elem) = self.table.find(hash, |q| q.0.eq(&key)) { + pub fn entry(&mut self, key: K) -> Entry<'_, K, V, S, A> { + let hash = make_insert_hash::(&self.hash_builder, &key); + if let Some(elem) = self.table.find(hash, equivalent_key(&key)) { Entry::Occupied(OccupiedEntry { hash, key: Some(key), @@ -851,8 +985,8 @@ where K: Borrow, Q: Hash + Eq, { - let hash = make_hash(&self.hash_builder, k); - self.table.get(hash, |x| k.eq(x.0.borrow())) + let hash = make_hash::(&self.hash_builder, k); + self.table.get(hash, equivalent_key(k)) } /// Returns the key-value pair corresponding to the supplied key, with a mutable reference to value. @@ -959,8 +1093,143 @@ where K: Borrow, Q: Hash + Eq, { - let hash = make_hash(&self.hash_builder, k); - self.table.get_mut(hash, |x| k.eq(x.0.borrow())) + let hash = make_hash::(&self.hash_builder, k); + self.table.get_mut(hash, equivalent_key(k)) + } + + /// Attempts to get mutable references to `N` values in the map at once. + /// + /// Returns an array of length `N` with the results of each query. For soundness, + /// at most one mutable reference will be returned to any value. An + /// `Err(UnavailableMutError::Duplicate(i))` in the returned array indicates that a suitable + /// key-value pair exists, but a mutable reference to the value already occurs at index `i` in + /// the returned array. + /// + /// This method is available only if the `nightly` feature is enabled. + /// + /// # Examples + /// + /// ``` + /// use hashbrown::{HashMap, UnavailableMutError}; + /// + /// let mut libraries = HashMap::new(); + /// libraries.insert("Bodleian Library".to_string(), 1602); + /// libraries.insert("Athenæum".to_string(), 1807); + /// libraries.insert("Herzogin-Anna-Amalia-Bibliothek".to_string(), 1691); + /// libraries.insert("Library of Congress".to_string(), 1800); + /// + /// let got = libraries.get_each_mut([ + /// "Athenæum", + /// "New York Public Library", + /// "Athenæum", + /// "Library of Congress", + /// ]); + /// assert_eq!( + /// got, + /// [ + /// Ok(&mut 1807), + /// Err(UnavailableMutError::Absent), + /// Err(UnavailableMutError::Duplicate(0)), + /// Ok(&mut 1800), + /// ] + /// ); + /// ``` + #[cfg(feature = "nightly")] + pub fn get_each_mut( + &mut self, + ks: [&Q; N], + ) -> [Result<&'_ mut V, UnavailableMutError>; N] + where + K: Borrow, + Q: Hash + Eq, + { + let mut pairs = self.get_each_inner_mut(ks); + // TODO use `MaybeUninit::uninit_array` here instead once that's stable. + let mut out: [MaybeUninit>; N] = + unsafe { MaybeUninit::uninit().assume_init() }; + for i in 0..N { + out[i] = MaybeUninit::new( + mem::replace(&mut pairs[i], Err(UnavailableMutError::Absent)).map(|(_, v)| v), + ); + } + unsafe { MaybeUninit::array_assume_init(out) } + } + + /// Attempts to get mutable references to `N` values in the map at once, with immutable + /// references to the corresponding keys. + /// + /// Returns an array of length `N` with the results of each query. For soundness, + /// at most one mutable reference will be returned to any value. An + /// `Err(UnavailableMutError::Duplicate(i))` in the returned array indicates that a suitable + /// key-value pair exists, but a mutable reference to the value already occurs at index `i` in + /// the returned array. + /// + /// This method is available only if the `nightly` feature is enabled. + /// + /// # Examples + /// + /// ``` + /// use hashbrown::{HashMap, UnavailableMutError}; + /// + /// let mut libraries = HashMap::new(); + /// libraries.insert("Bodleian Library".to_string(), 1602); + /// libraries.insert("Athenæum".to_string(), 1807); + /// libraries.insert("Herzogin-Anna-Amalia-Bibliothek".to_string(), 1691); + /// libraries.insert("Library of Congress".to_string(), 1800); + /// + /// let got = libraries.get_each_key_value_mut([ + /// "Bodleian Library", + /// "Herzogin-Anna-Amalia-Bibliothek", + /// "Herzogin-Anna-Amalia-Bibliothek", + /// "Gewandhaus", + /// ]); + /// assert_eq!( + /// got, + /// [ + /// Ok((&"Bodleian Library".to_string(), &mut 1602)), + /// Ok((&"Herzogin-Anna-Amalia-Bibliothek".to_string(), &mut 1691)), + /// Err(UnavailableMutError::Duplicate(1)), + /// Err(UnavailableMutError::Absent), + /// ] + /// ); + /// ``` + #[cfg(feature = "nightly")] + pub fn get_each_key_value_mut( + &mut self, + ks: [&Q; N], + ) -> [Result<(&'_ K, &'_ mut V), UnavailableMutError>; N] + where + K: Borrow, + Q: Hash + Eq, + { + let mut pairs = self.get_each_inner_mut(ks); + // TODO use `MaybeUninit::uninit_array` here instead once that's stable. + let mut out: [MaybeUninit>; N] = + unsafe { MaybeUninit::uninit().assume_init() }; + for i in 0..N { + out[i] = MaybeUninit::new( + mem::replace(&mut pairs[i], Err(UnavailableMutError::Absent)) + .map(|(k, v)| (&*k, v)), + ); + } + unsafe { MaybeUninit::array_assume_init(out) } + } + + #[cfg(feature = "nightly")] + fn get_each_inner_mut( + &mut self, + ks: [&Q; N], + ) -> [Result<&'_ mut (K, V), UnavailableMutError>; N] + where + K: Borrow, + Q: Hash + Eq, + { + let mut hashes = [0_u64; N]; + for i in 0..N { + hashes[i] = make_hash::(&self.hash_builder, ks[i]); + } + self.table + .get_each_mut(hashes, |i, (k, _)| ks[i].eq(k.borrow())) } /// Inserts a key-value pair into the map. @@ -990,17 +1259,51 @@ where /// ``` #[cfg_attr(feature = "inline-more", inline)] pub fn insert(&mut self, k: K, v: V) -> Option { - let hash = make_hash(&self.hash_builder, &k); - if let Some((_, item)) = self.table.get_mut(hash, |x| k.eq(&x.0)) { + let hash = make_insert_hash::(&self.hash_builder, &k); + if let Some((_, item)) = self.table.get_mut(hash, equivalent_key(&k)) { Some(mem::replace(item, v)) } else { - let hash_builder = &self.hash_builder; self.table - .insert(hash, (k, v), |x| make_hash(hash_builder, &x.0)); + .insert(hash, (k, v), make_hasher::(&self.hash_builder)); None } } + /// Tries to insert a key-value pair into the map, and returns + /// a mutable reference to the value in the entry. + /// + /// # Errors + /// + /// If the map already had this key present, nothing is updated, and + /// an error containing the occupied entry and the value is returned. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use hashbrown::HashMap; + /// + /// let mut map = HashMap::new(); + /// assert_eq!(map.try_insert(37, "a").unwrap(), &"a"); + /// + /// let err = map.try_insert(37, "b").unwrap_err(); + /// assert_eq!(err.entry.key(), &37); + /// assert_eq!(err.entry.get(), &"a"); + /// assert_eq!(err.value, "b"); + /// ``` + #[cfg_attr(feature = "inline-more", inline)] + pub fn try_insert( + &mut self, + key: K, + value: V, + ) -> Result<&mut V, OccupiedError<'_, K, V, S, A>> { + match self.entry(key) { + Entry::Occupied(entry) => Err(OccupiedError { entry, value }), + Entry::Vacant(entry) => Ok(entry.insert(value)), + } + } + /// Removes a key from the map, returning the value at the key if the key /// was previously in the map. /// @@ -1060,12 +1363,12 @@ where K: Borrow, Q: Hash + Eq, { - let hash = make_hash(&self.hash_builder, &k); - self.table.remove_entry(hash, |x| k.eq(x.0.borrow())) + let hash = make_hash::(&self.hash_builder, k); + self.table.remove_entry(hash, equivalent_key(k)) } } -impl HashMap { +impl HashMap { /// Creates a raw entry builder for the HashMap. /// /// Raw entries provide the lowest level of control for searching and @@ -1098,7 +1401,7 @@ impl HashMap { /// acting erratically, with two keys randomly masking each other. Implementations /// are free to assume this doesn't happen (within the limits of memory-safety). #[cfg_attr(feature = "inline-more", inline)] - pub fn raw_entry_mut(&mut self) -> RawEntryBuilderMut<'_, K, V, S> { + pub fn raw_entry_mut(&mut self) -> RawEntryBuilderMut<'_, K, V, S, A> { RawEntryBuilderMut { map: self } } @@ -1118,16 +1421,17 @@ impl HashMap { /// /// Immutable raw entries have very limited use; you might instead want `raw_entry_mut`. #[cfg_attr(feature = "inline-more", inline)] - pub fn raw_entry(&self) -> RawEntryBuilder<'_, K, V, S> { + pub fn raw_entry(&self) -> RawEntryBuilder<'_, K, V, S, A> { RawEntryBuilder { map: self } } } -impl PartialEq for HashMap +impl PartialEq for HashMap where K: Eq + Hash, V: PartialEq, S: BuildHasher, + A: Allocator + Clone, { fn eq(&self, other: &Self) -> bool { if self.len() != other.len() { @@ -1139,40 +1443,44 @@ where } } -impl Eq for HashMap +impl Eq for HashMap where K: Eq + Hash, V: Eq, S: BuildHasher, + A: Allocator + Clone, { } -impl Debug for HashMap +impl Debug for HashMap where K: Debug, V: Debug, + A: Allocator + Clone, { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_map().entries(self.iter()).finish() } } -impl Default for HashMap +impl Default for HashMap where S: Default, + A: Default + Allocator + Clone, { - /// Creates an empty `HashMap`, with the `Default` value for the hasher. + /// Creates an empty `HashMap`, with the `Default` value for the hasher and allocator. #[cfg_attr(feature = "inline-more", inline)] fn default() -> Self { - Self::with_hasher(Default::default()) + Self::with_hasher_in(Default::default(), Default::default()) } } -impl Index<&Q> for HashMap +impl Index<&Q> for HashMap where K: Eq + Hash + Borrow, Q: Eq + Hash, S: BuildHasher, + A: Allocator + Clone, { type Output = V; @@ -1252,11 +1560,11 @@ impl IterMut<'_, K, V> { /// /// [`into_iter`]: struct.HashMap.html#method.into_iter /// [`HashMap`]: struct.HashMap.html -pub struct IntoIter { - inner: RawIntoIter<(K, V)>, +pub struct IntoIter { + inner: RawIntoIter<(K, V), A>, } -impl IntoIter { +impl IntoIter { /// Returns a iterator of references over the remaining items. #[cfg_attr(feature = "inline-more", inline)] pub(super) fn iter(&self) -> Iter<'_, K, V> { @@ -1328,11 +1636,11 @@ impl fmt::Debug for Values<'_, K, V> { /// /// [`drain`]: struct.HashMap.html#method.drain /// [`HashMap`]: struct.HashMap.html -pub struct Drain<'a, K, V> { - inner: RawDrain<'a, (K, V)>, +pub struct Drain<'a, K, V, A: Allocator + Clone = Global> { + inner: RawDrain<'a, (K, V), A>, } -impl Drain<'_, K, V> { +impl Drain<'_, K, V, A> { /// Returns a iterator of references over the remaining items. #[cfg_attr(feature = "inline-more", inline)] pub(super) fn iter(&self) -> Iter<'_, K, V> { @@ -1350,17 +1658,18 @@ impl Drain<'_, K, V> { /// /// [`drain_filter`]: struct.HashMap.html#method.drain_filter /// [`HashMap`]: struct.HashMap.html -pub struct DrainFilter<'a, K, V, F> +pub struct DrainFilter<'a, K, V, F, A: Allocator + Clone = Global> where F: FnMut(&K, &mut V) -> bool, { f: F, - inner: DrainFilterInner<'a, K, V>, + inner: DrainFilterInner<'a, K, V, A>, } -impl<'a, K, V, F> Drop for DrainFilter<'a, K, V, F> +impl<'a, K, V, F, A> Drop for DrainFilter<'a, K, V, F, A> where F: FnMut(&K, &mut V) -> bool, + A: Allocator + Clone, { #[cfg_attr(feature = "inline-more", inline)] fn drop(&mut self) { @@ -1381,9 +1690,10 @@ impl Drop for ConsumeAllOnDrop<'_, T> { } } -impl Iterator for DrainFilter<'_, K, V, F> +impl Iterator for DrainFilter<'_, K, V, F, A> where F: FnMut(&K, &mut V) -> bool, + A: Allocator + Clone, { type Item = (K, V); @@ -1401,12 +1711,12 @@ where impl FusedIterator for DrainFilter<'_, K, V, F> where F: FnMut(&K, &mut V) -> bool {} /// Portions of `DrainFilter` shared with `set::DrainFilter` -pub(super) struct DrainFilterInner<'a, K, V> { +pub(super) struct DrainFilterInner<'a, K, V, A: Allocator + Clone> { pub iter: RawIter<(K, V)>, - pub table: &'a mut RawTable<(K, V)>, + pub table: &'a mut RawTable<(K, V), A>, } -impl DrainFilterInner<'_, K, V> { +impl DrainFilterInner<'_, K, V, A> { #[cfg_attr(feature = "inline-more", inline)] pub(super) fn next(&mut self, f: &mut F) -> Option<(K, V)> where @@ -1440,8 +1750,8 @@ pub struct ValuesMut<'a, K, V> { /// See the [`HashMap::raw_entry_mut`] docs for usage examples. /// /// [`HashMap::raw_entry_mut`]: struct.HashMap.html#method.raw_entry_mut -pub struct RawEntryBuilderMut<'a, K, V, S> { - map: &'a mut HashMap, +pub struct RawEntryBuilderMut<'a, K, V, S, A: Allocator + Clone = Global> { + map: &'a mut HashMap, } /// A view into a single entry in a map, which may either be vacant or occupied. @@ -1455,35 +1765,35 @@ pub struct RawEntryBuilderMut<'a, K, V, S> { /// [`Entry`]: enum.Entry.html /// [`raw_entry_mut`]: struct.HashMap.html#method.raw_entry_mut /// [`RawEntryBuilderMut`]: struct.RawEntryBuilderMut.html -pub enum RawEntryMut<'a, K, V, S> { +pub enum RawEntryMut<'a, K, V, S, A: Allocator + Clone = Global> { /// An occupied entry. - Occupied(RawOccupiedEntryMut<'a, K, V, S>), + Occupied(RawOccupiedEntryMut<'a, K, V, S, A>), /// A vacant entry. - Vacant(RawVacantEntryMut<'a, K, V, S>), + Vacant(RawVacantEntryMut<'a, K, V, S, A>), } /// A view into an occupied entry in a `HashMap`. /// It is part of the [`RawEntryMut`] enum. /// /// [`RawEntryMut`]: enum.RawEntryMut.html -pub struct RawOccupiedEntryMut<'a, K, V, S> { +pub struct RawOccupiedEntryMut<'a, K, V, S, A: Allocator + Clone = Global> { elem: Bucket<(K, V)>, - table: &'a mut RawTable<(K, V)>, + table: &'a mut RawTable<(K, V), A>, hash_builder: &'a S, } -unsafe impl Send for RawOccupiedEntryMut<'_, K, V, S> +unsafe impl Send for RawOccupiedEntryMut<'_, K, V, S, A> where K: Send, V: Send, - S: Sync, + A: Send + Allocator + Clone, { } -unsafe impl Sync for RawOccupiedEntryMut<'_, K, V, S> +unsafe impl Sync for RawOccupiedEntryMut<'_, K, V, S, A> where K: Sync, V: Sync, - S: Sync, + A: Send + Allocator + Clone, { } @@ -1491,8 +1801,8 @@ where /// It is part of the [`RawEntryMut`] enum. /// /// [`RawEntryMut`]: enum.RawEntryMut.html -pub struct RawVacantEntryMut<'a, K, V, S> { - table: &'a mut RawTable<(K, V)>, +pub struct RawVacantEntryMut<'a, K, V, S, A: Allocator + Clone = Global> { + table: &'a mut RawTable<(K, V), A>, hash_builder: &'a S, } @@ -1501,42 +1811,41 @@ pub struct RawVacantEntryMut<'a, K, V, S> { /// See the [`HashMap::raw_entry`] docs for usage examples. /// /// [`HashMap::raw_entry`]: struct.HashMap.html#method.raw_entry -pub struct RawEntryBuilder<'a, K, V, S> { - map: &'a HashMap, +pub struct RawEntryBuilder<'a, K, V, S, A: Allocator + Clone = Global> { + map: &'a HashMap, } -impl<'a, K, V, S> RawEntryBuilderMut<'a, K, V, S> { +impl<'a, K, V, S, A: Allocator + Clone> RawEntryBuilderMut<'a, K, V, S, A> { /// Creates a `RawEntryMut` from the given key. #[cfg_attr(feature = "inline-more", inline)] #[allow(clippy::wrong_self_convention)] - pub fn from_key(self, k: &Q) -> RawEntryMut<'a, K, V, S> + pub fn from_key(self, k: &Q) -> RawEntryMut<'a, K, V, S, A> where S: BuildHasher, K: Borrow, Q: Hash + Eq, { - let mut hasher = self.map.hash_builder.build_hasher(); - k.hash(&mut hasher); - self.from_key_hashed_nocheck(hasher.finish(), k) + let hash = make_hash::(&self.map.hash_builder, k); + self.from_key_hashed_nocheck(hash, k) } /// Creates a `RawEntryMut` from the given key and its hash. #[inline] #[allow(clippy::wrong_self_convention)] - pub fn from_key_hashed_nocheck(self, hash: u64, k: &Q) -> RawEntryMut<'a, K, V, S> + pub fn from_key_hashed_nocheck(self, hash: u64, k: &Q) -> RawEntryMut<'a, K, V, S, A> where K: Borrow, Q: Eq, { - self.from_hash(hash, |q| q.borrow().eq(k)) + self.from_hash(hash, equivalent(k)) } } -impl<'a, K, V, S> RawEntryBuilderMut<'a, K, V, S> { +impl<'a, K, V, S, A: Allocator + Clone> RawEntryBuilderMut<'a, K, V, S, A> { /// Creates a `RawEntryMut` from the given hash. #[cfg_attr(feature = "inline-more", inline)] #[allow(clippy::wrong_self_convention)] - pub fn from_hash(self, hash: u64, is_match: F) -> RawEntryMut<'a, K, V, S> + pub fn from_hash(self, hash: u64, is_match: F) -> RawEntryMut<'a, K, V, S, A> where for<'b> F: FnMut(&'b K) -> bool, { @@ -1544,7 +1853,7 @@ impl<'a, K, V, S> RawEntryBuilderMut<'a, K, V, S> { } #[cfg_attr(feature = "inline-more", inline)] - fn search(self, hash: u64, mut is_match: F) -> RawEntryMut<'a, K, V, S> + fn search(self, hash: u64, mut is_match: F) -> RawEntryMut<'a, K, V, S, A> where for<'b> F: FnMut(&'b K) -> bool, { @@ -1562,7 +1871,7 @@ impl<'a, K, V, S> RawEntryBuilderMut<'a, K, V, S> { } } -impl<'a, K, V, S> RawEntryBuilder<'a, K, V, S> { +impl<'a, K, V, S, A: Allocator + Clone> RawEntryBuilder<'a, K, V, S, A> { /// Access an entry by key. #[cfg_attr(feature = "inline-more", inline)] #[allow(clippy::wrong_self_convention)] @@ -1572,9 +1881,8 @@ impl<'a, K, V, S> RawEntryBuilder<'a, K, V, S> { K: Borrow, Q: Hash + Eq, { - let mut hasher = self.map.hash_builder.build_hasher(); - k.hash(&mut hasher); - self.from_key_hashed_nocheck(hasher.finish(), k) + let hash = make_hash::(&self.map.hash_builder, k); + self.from_key_hashed_nocheck(hash, k) } /// Access an entry by a key and its hash. @@ -1585,7 +1893,7 @@ impl<'a, K, V, S> RawEntryBuilder<'a, K, V, S> { K: Borrow, Q: Eq, { - self.from_hash(hash, |q| q.borrow().eq(k)) + self.from_hash(hash, equivalent(k)) } #[cfg_attr(feature = "inline-more", inline)] @@ -1610,7 +1918,7 @@ impl<'a, K, V, S> RawEntryBuilder<'a, K, V, S> { } } -impl<'a, K, V, S> RawEntryMut<'a, K, V, S> { +impl<'a, K, V, S, A: Allocator + Clone> RawEntryMut<'a, K, V, S, A> { /// Sets the value of the entry, and returns a RawOccupiedEntryMut. /// /// # Examples @@ -1624,7 +1932,7 @@ impl<'a, K, V, S> RawEntryMut<'a, K, V, S> { /// assert_eq!(entry.remove_entry(), ("horseyland", 37)); /// ``` #[cfg_attr(feature = "inline-more", inline)] - pub fn insert(self, key: K, value: V) -> RawOccupiedEntryMut<'a, K, V, S> + pub fn insert(self, key: K, value: V) -> RawOccupiedEntryMut<'a, K, V, S, A> where K: Hash, S: BuildHasher, @@ -1804,7 +2112,7 @@ impl<'a, K, V, S> RawEntryMut<'a, K, V, S> { } } -impl<'a, K, V, S> RawOccupiedEntryMut<'a, K, V, S> { +impl<'a, K, V, S, A: Allocator + Clone> RawOccupiedEntryMut<'a, K, V, S, A> { /// Gets a reference to the key in the entry. #[cfg_attr(feature = "inline-more", inline)] pub fn key(&self) -> &K { @@ -1899,7 +2207,7 @@ impl<'a, K, V, S> RawOccupiedEntryMut<'a, K, V, S> { /// the entry and allows to replace or remove it based on the /// value of the returned option. #[cfg_attr(feature = "inline-more", inline)] - pub fn replace_entry_with(self, f: F) -> RawEntryMut<'a, K, V, S> + pub fn replace_entry_with(self, f: F) -> RawEntryMut<'a, K, V, S, A> where F: FnOnce(&K, V) -> Option, { @@ -1922,7 +2230,7 @@ impl<'a, K, V, S> RawOccupiedEntryMut<'a, K, V, S> { } } -impl<'a, K, V, S> RawVacantEntryMut<'a, K, V, S> { +impl<'a, K, V, S, A: Allocator + Clone> RawVacantEntryMut<'a, K, V, S, A> { /// Sets the value of the entry with the VacantEntry's key, /// and returns a mutable reference to it. #[cfg_attr(feature = "inline-more", inline)] @@ -1931,9 +2239,8 @@ impl<'a, K, V, S> RawVacantEntryMut<'a, K, V, S> { K: Hash, S: BuildHasher, { - let mut hasher = self.hash_builder.build_hasher(); - key.hash(&mut hasher); - self.insert_hashed_nocheck(hasher.finish(), key, value) + let hash = make_insert_hash::(self.hash_builder, &key); + self.insert_hashed_nocheck(hash, key, value) } /// Sets the value of the entry with the VacantEntry's key, @@ -1945,8 +2252,12 @@ impl<'a, K, V, S> RawVacantEntryMut<'a, K, V, S> { K: Hash, S: BuildHasher, { - let hash_builder = self.hash_builder; - self.insert_with_hasher(hash, key, value, |k| make_hash(hash_builder, k)) + let &mut (ref mut k, ref mut v) = self.table.insert_entry( + hash, + (key, value), + make_hasher::(self.hash_builder), + ); + (k, v) } /// Set the value of an entry with a custom hasher function. @@ -1968,18 +2279,17 @@ impl<'a, K, V, S> RawVacantEntryMut<'a, K, V, S> { } #[cfg_attr(feature = "inline-more", inline)] - fn insert_entry(self, key: K, value: V) -> RawOccupiedEntryMut<'a, K, V, S> + fn insert_entry(self, key: K, value: V) -> RawOccupiedEntryMut<'a, K, V, S, A> where K: Hash, S: BuildHasher, { - let hash_builder = self.hash_builder; - let mut hasher = self.hash_builder.build_hasher(); - key.hash(&mut hasher); - - let elem = self.table.insert(hasher.finish(), (key, value), |k| { - make_hash(hash_builder, &k.0) - }); + let hash = make_insert_hash::(self.hash_builder, &key); + let elem = self.table.insert( + hash, + (key, value), + make_hasher::(self.hash_builder), + ); RawOccupiedEntryMut { elem, table: self.table, @@ -1988,13 +2298,13 @@ impl<'a, K, V, S> RawVacantEntryMut<'a, K, V, S> { } } -impl Debug for RawEntryBuilderMut<'_, K, V, S> { +impl Debug for RawEntryBuilderMut<'_, K, V, S, A> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("RawEntryBuilder").finish() } } -impl Debug for RawEntryMut<'_, K, V, S> { +impl Debug for RawEntryMut<'_, K, V, S, A> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match *self { RawEntryMut::Vacant(ref v) => f.debug_tuple("RawEntry").field(v).finish(), @@ -2003,7 +2313,7 @@ impl Debug for RawEntryMut<'_, K, V, S> { } } -impl Debug for RawOccupiedEntryMut<'_, K, V, S> { +impl Debug for RawOccupiedEntryMut<'_, K, V, S, A> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("RawOccupiedEntryMut") .field("key", self.key()) @@ -2012,13 +2322,13 @@ impl Debug for RawOccupiedEntryMut<'_, K, V, S> { } } -impl Debug for RawVacantEntryMut<'_, K, V, S> { +impl Debug for RawVacantEntryMut<'_, K, V, S, A> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("RawVacantEntryMut").finish() } } -impl Debug for RawEntryBuilder<'_, K, V, S> { +impl Debug for RawEntryBuilder<'_, K, V, S, A> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("RawEntryBuilder").finish() } @@ -2030,15 +2340,18 @@ impl Debug for RawEntryBuilder<'_, K, V, S> { /// /// [`HashMap`]: struct.HashMap.html /// [`entry`]: struct.HashMap.html#method.entry -pub enum Entry<'a, K, V, S> { +pub enum Entry<'a, K, V, S, A = Global> +where + A: Allocator + Clone, +{ /// An occupied entry. - Occupied(OccupiedEntry<'a, K, V, S>), + Occupied(OccupiedEntry<'a, K, V, S, A>), /// A vacant entry. - Vacant(VacantEntry<'a, K, V, S>), + Vacant(VacantEntry<'a, K, V, S, A>), } -impl Debug for Entry<'_, K, V, S> { +impl Debug for Entry<'_, K, V, S, A> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match *self { Entry::Vacant(ref v) => f.debug_tuple("Entry").field(v).finish(), @@ -2051,29 +2364,31 @@ impl Debug for Entry<'_, K, V, S> { /// It is part of the [`Entry`] enum. /// /// [`Entry`]: enum.Entry.html -pub struct OccupiedEntry<'a, K, V, S> { +pub struct OccupiedEntry<'a, K, V, S, A: Allocator + Clone = Global> { hash: u64, key: Option, elem: Bucket<(K, V)>, - table: &'a mut HashMap, + table: &'a mut HashMap, } -unsafe impl Send for OccupiedEntry<'_, K, V, S> +unsafe impl Send for OccupiedEntry<'_, K, V, S, A> where K: Send, V: Send, S: Send, + A: Send + Allocator + Clone, { } -unsafe impl Sync for OccupiedEntry<'_, K, V, S> +unsafe impl Sync for OccupiedEntry<'_, K, V, S, A> where K: Sync, V: Sync, S: Sync, + A: Sync + Allocator + Clone, { } -impl Debug for OccupiedEntry<'_, K, V, S> { +impl Debug for OccupiedEntry<'_, K, V, S, A> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("OccupiedEntry") .field("key", self.key()) @@ -2086,19 +2401,53 @@ impl Debug for OccupiedEntry<'_, K, V, S> { /// It is part of the [`Entry`] enum. /// /// [`Entry`]: enum.Entry.html -pub struct VacantEntry<'a, K, V, S> { +pub struct VacantEntry<'a, K, V, S, A: Allocator + Clone = Global> { hash: u64, key: K, - table: &'a mut HashMap, + table: &'a mut HashMap, } -impl Debug for VacantEntry<'_, K, V, S> { +impl Debug for VacantEntry<'_, K, V, S, A> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_tuple("VacantEntry").field(self.key()).finish() } } -impl<'a, K, V, S> IntoIterator for &'a HashMap { +/// The error returned by [`try_insert`](HashMap::try_insert) when the key already exists. +/// +/// Contains the occupied entry, and the value that was not inserted. +pub struct OccupiedError<'a, K, V, S, A: Allocator + Clone = Global> { + /// The entry in the map that was already occupied. + pub entry: OccupiedEntry<'a, K, V, S, A>, + /// The value which was not inserted, because the entry was already occupied. + pub value: V, +} + +impl Debug for OccupiedError<'_, K, V, S, A> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("OccupiedError") + .field("key", self.entry.key()) + .field("old_value", self.entry.get()) + .field("new_value", &self.value) + .finish() + } +} + +impl<'a, K: Debug, V: Debug, S, A: Allocator + Clone> fmt::Display + for OccupiedError<'a, K, V, S, A> +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "failed to insert {:?}, key {:?} already exists with value {:?}", + self.value, + self.entry.key(), + self.entry.get(), + ) + } +} + +impl<'a, K, V, S, A: Allocator + Clone> IntoIterator for &'a HashMap { type Item = (&'a K, &'a V); type IntoIter = Iter<'a, K, V>; @@ -2108,7 +2457,7 @@ impl<'a, K, V, S> IntoIterator for &'a HashMap { } } -impl<'a, K, V, S> IntoIterator for &'a mut HashMap { +impl<'a, K, V, S, A: Allocator + Clone> IntoIterator for &'a mut HashMap { type Item = (&'a K, &'a mut V); type IntoIter = IterMut<'a, K, V>; @@ -2118,9 +2467,9 @@ impl<'a, K, V, S> IntoIterator for &'a mut HashMap { } } -impl IntoIterator for HashMap { +impl IntoIterator for HashMap { type Item = (K, V); - type IntoIter = IntoIter; + type IntoIter = IntoIter; /// Creates a consuming iterator, that is, one that moves each key-value /// pair out of the map in arbitrary order. The map cannot be used after @@ -2140,7 +2489,7 @@ impl IntoIterator for HashMap { /// let vec: Vec<(&str, i32)> = map.into_iter().collect(); /// ``` #[cfg_attr(feature = "inline-more", inline)] - fn into_iter(self) -> IntoIter { + fn into_iter(self) -> IntoIter { IntoIter { inner: self.table.into_iter(), } @@ -2212,7 +2561,7 @@ where } } -impl Iterator for IntoIter { +impl Iterator for IntoIter { type Item = (K, V); #[cfg_attr(feature = "inline-more", inline)] @@ -2224,15 +2573,15 @@ impl Iterator for IntoIter { self.inner.size_hint() } } -impl ExactSizeIterator for IntoIter { +impl ExactSizeIterator for IntoIter { #[cfg_attr(feature = "inline-more", inline)] fn len(&self) -> usize { self.inner.len() } } -impl FusedIterator for IntoIter {} +impl FusedIterator for IntoIter {} -impl fmt::Debug for IntoIter { +impl fmt::Debug for IntoIter { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_list().entries(self.iter()).finish() } @@ -2320,7 +2669,7 @@ where } } -impl<'a, K, V> Iterator for Drain<'a, K, V> { +impl<'a, K, V, A: Allocator + Clone> Iterator for Drain<'a, K, V, A> { type Item = (K, V); #[cfg_attr(feature = "inline-more", inline)] @@ -2332,25 +2681,26 @@ impl<'a, K, V> Iterator for Drain<'a, K, V> { self.inner.size_hint() } } -impl ExactSizeIterator for Drain<'_, K, V> { +impl ExactSizeIterator for Drain<'_, K, V, A> { #[cfg_attr(feature = "inline-more", inline)] fn len(&self) -> usize { self.inner.len() } } -impl FusedIterator for Drain<'_, K, V> {} +impl FusedIterator for Drain<'_, K, V, A> {} -impl fmt::Debug for Drain<'_, K, V> +impl fmt::Debug for Drain<'_, K, V, A> where K: fmt::Debug, V: fmt::Debug, + A: Allocator + Clone, { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_list().entries(self.iter()).finish() } } -impl<'a, K, V, S> Entry<'a, K, V, S> { +impl<'a, K, V, S, A: Allocator + Clone> Entry<'a, K, V, S, A> { /// Sets the value of the entry, and returns an OccupiedEntry. /// /// # Examples @@ -2364,7 +2714,7 @@ impl<'a, K, V, S> Entry<'a, K, V, S> { /// assert_eq!(entry.key(), &"horseyland"); /// ``` #[cfg_attr(feature = "inline-more", inline)] - pub fn insert(self, value: V) -> OccupiedEntry<'a, K, V, S> + pub fn insert(self, value: V) -> OccupiedEntry<'a, K, V, S, A> where K: Hash, S: BuildHasher, @@ -2433,9 +2783,12 @@ impl<'a, K, V, S> Entry<'a, K, V, S> { } } - /// Ensures a value is in the entry by inserting, if empty, the result of the default function, - /// which takes the key as its argument, and returns a mutable reference to the value in the - /// entry. + /// Ensures a value is in the entry by inserting, if empty, the result of the default function. + /// This method allows for generating key-derived values for insertion by providing the default + /// function a reference to the key that was moved during the `.entry(key)` method call. + /// + /// The reference to the moved key is provided so that cloning or copying the key is + /// unnecessary, unlike with `.or_insert_with(|| ... )`. /// /// # Examples /// @@ -2581,7 +2934,7 @@ impl<'a, K, V, S> Entry<'a, K, V, S> { } } -impl<'a, K, V: Default, S> Entry<'a, K, V, S> { +impl<'a, K, V: Default, S, A: Allocator + Clone> Entry<'a, K, V, S, A> { /// Ensures a value is in the entry by inserting the default value if empty, /// and returns a mutable reference to the value in the entry. /// @@ -2608,7 +2961,7 @@ impl<'a, K, V: Default, S> Entry<'a, K, V, S> { } } -impl<'a, K, V, S> OccupiedEntry<'a, K, V, S> { +impl<'a, K, V, S, A: Allocator + Clone> OccupiedEntry<'a, K, V, S, A> { /// Gets a reference to the key in the entry. /// /// # Examples @@ -2777,6 +3130,10 @@ impl<'a, K, V, S> OccupiedEntry<'a, K, V, S> { /// Replaces the entry, returning the old key and value. The new key in the hash map will be /// the key used to create this entry. /// + /// # Panics + /// + /// Will panic if this OccupiedEntry was created through [`Entry::insert`]. + /// /// # Examples /// /// ``` @@ -2806,6 +3163,10 @@ impl<'a, K, V, S> OccupiedEntry<'a, K, V, S> { /// Replaces the key in the hash map with the key used to create this entry. /// + /// # Panics + /// + /// Will panic if this OccupiedEntry was created through [`Entry::insert`]. + /// /// # Examples /// /// ``` @@ -2883,7 +3244,7 @@ impl<'a, K, V, S> OccupiedEntry<'a, K, V, S> { /// assert!(!map.contains_key("poneyland")); /// ``` #[cfg_attr(feature = "inline-more", inline)] - pub fn replace_entry_with(self, f: F) -> Entry<'a, K, V, S> + pub fn replace_entry_with(self, f: F) -> Entry<'a, K, V, S, A> where F: FnOnce(&K, V) -> Option, { @@ -2914,7 +3275,7 @@ impl<'a, K, V, S> OccupiedEntry<'a, K, V, S> { } } -impl<'a, K, V, S> VacantEntry<'a, K, V, S> { +impl<'a, K, V, S, A: Allocator + Clone> VacantEntry<'a, K, V, S, A> { /// Gets a reference to the key that would be used when inserting a value /// through the `VacantEntry`. /// @@ -2972,24 +3333,26 @@ impl<'a, K, V, S> VacantEntry<'a, K, V, S> { K: Hash, S: BuildHasher, { - let hash_builder = &self.table.hash_builder; let table = &mut self.table.table; - let entry = table.insert_entry(self.hash, (self.key, value), |x| { - make_hash(hash_builder, &x.0) - }); + let entry = table.insert_entry( + self.hash, + (self.key, value), + make_hasher::(&self.table.hash_builder), + ); &mut entry.1 } #[cfg_attr(feature = "inline-more", inline)] - fn insert_entry(self, value: V) -> OccupiedEntry<'a, K, V, S> + fn insert_entry(self, value: V) -> OccupiedEntry<'a, K, V, S, A> where K: Hash, S: BuildHasher, { - let hash_builder = &self.table.hash_builder; - let elem = self.table.table.insert(self.hash, (self.key, value), |x| { - make_hash(hash_builder, &x.0) - }); + let elem = self.table.table.insert( + self.hash, + (self.key, value), + make_hasher::(&self.table.hash_builder), + ); OccupiedEntry { hash: self.hash, key: None, @@ -2999,15 +3362,17 @@ impl<'a, K, V, S> VacantEntry<'a, K, V, S> { } } -impl FromIterator<(K, V)> for HashMap +impl FromIterator<(K, V)> for HashMap where K: Eq + Hash, S: BuildHasher + Default, + A: Default + Allocator + Clone, { #[cfg_attr(feature = "inline-more", inline)] fn from_iter>(iter: T) -> Self { let iter = iter.into_iter(); - let mut map = Self::with_capacity_and_hasher(iter.size_hint().0, S::default()); + let mut map = + Self::with_capacity_and_hasher_in(iter.size_hint().0, S::default(), A::default()); iter.for_each(|(k, v)| { map.insert(k, v); }); @@ -3017,10 +3382,11 @@ where /// Inserts all new key-values from the iterator and replaces values with existing /// keys with new values returned from the iterator. -impl Extend<(K, V)> for HashMap +impl Extend<(K, V)> for HashMap where K: Eq + Hash, S: BuildHasher, + A: Allocator + Clone, { #[cfg_attr(feature = "inline-more", inline)] fn extend>(&mut self, iter: T) { @@ -3062,11 +3428,12 @@ where } } -impl<'a, K, V, S> Extend<(&'a K, &'a V)> for HashMap +impl<'a, K, V, S, A> Extend<(&'a K, &'a V)> for HashMap where K: Eq + Hash + Copy, V: Copy, S: BuildHasher, + A: Allocator + Clone, { #[cfg_attr(feature = "inline-more", inline)] fn extend>(&mut self, iter: T) { @@ -3100,10 +3467,14 @@ fn assert_covariance() { fn iter_val<'a, 'new>(v: Iter<'a, u8, &'static str>) -> Iter<'a, u8, &'new str> { v } - fn into_iter_key<'new>(v: IntoIter<&'static str, u8>) -> IntoIter<&'new str, u8> { + fn into_iter_key<'new, A: Allocator + Clone>( + v: IntoIter<&'static str, u8, A>, + ) -> IntoIter<&'new str, u8, A> { v } - fn into_iter_val<'new>(v: IntoIter) -> IntoIter { + fn into_iter_val<'new, A: Allocator + Clone>( + v: IntoIter, + ) -> IntoIter { v } fn keys_key<'a, 'new>(v: Keys<'a, &'static str, u8>) -> Keys<'a, &'new str, u8> { @@ -3132,6 +3503,7 @@ mod test_map { use super::{HashMap, RawEntryMut}; use crate::TryReserveError::*; use rand::{rngs::SmallRng, Rng, SeedableRng}; + use std::borrow::ToOwned; use std::cell::RefCell; use std::usize; use std::vec::Vec; @@ -4287,11 +4659,7 @@ mod test_map { let mut map: HashMap<_, _> = xs.iter().cloned().collect(); let compute_hash = |map: &HashMap, k: i32| -> u64 { - use core::hash::{BuildHasher, Hash, Hasher}; - - let mut hasher = map.hasher().build_hasher(); - k.hash(&mut hasher); - hasher.finish() + super::make_insert_hash::(map.hasher(), &k) }; // Existing key (insert) @@ -4468,9 +4836,11 @@ mod test_map { left -= 1; } else { assert!(removed.contains(&(i, 2 * i)), "{} not in {:?}", i, removed); - let e = m - .table - .insert(hash, (i, 2 * i), |x| super::make_hash(&hasher, &x.0)); + let e = m.table.insert( + hash, + (i, 2 * i), + super::make_hasher::(&hasher), + ); it.reflect_insert(&e); if let Some(p) = removed.iter().position(|e| e == &(i, 2 * i)) { removed.swap_remove(p); @@ -4501,7 +4871,6 @@ mod test_map { #[test] fn test_const_with_hasher() { use core::hash::BuildHasher; - use std::borrow::ToOwned; use std::collections::hash_map::DefaultHasher; #[derive(Clone)] @@ -4521,4 +4890,33 @@ mod test_map { map.insert(17, "seventeen".to_owned()); assert_eq!("seventeen", map[&17]); } + + #[test] + #[cfg(feature = "nightly")] + fn test_get_each_mut() { + use crate::UnavailableMutError::*; + + let mut map = HashMap::new(); + map.insert("foo".to_owned(), 0); + map.insert("bar".to_owned(), 10); + map.insert("baz".to_owned(), 20); + map.insert("qux".to_owned(), 30); + + let xs = map.get_each_mut(["foo", "dud", "foo", "qux"]); + assert_eq!( + xs, + [Ok(&mut 0), Err(Absent), Err(Duplicate(0)), Ok(&mut 30)] + ); + + let ys = map.get_each_key_value_mut(["bar", "baz", "baz", "dip"]); + assert_eq!( + ys, + [ + Ok((&"bar".to_owned(), &mut 10)), + Ok((&"baz".to_owned(), &mut 20)), + Err(Duplicate(1)), + Err(Absent), + ] + ); + } } diff --git a/src/raw/alloc.rs b/src/raw/alloc.rs new file mode 100644 index 0000000..de6c455 --- /dev/null +++ b/src/raw/alloc.rs @@ -0,0 +1,72 @@ +pub(crate) use self::inner::{do_alloc, Allocator, Global}; + +#[cfg(feature = "nightly")] +mod inner { + use crate::alloc::alloc::Layout; + pub use crate::alloc::alloc::{Allocator, Global}; + use core::ptr::NonNull; + + #[allow(clippy::map_err_ignore)] + pub fn do_alloc(alloc: &A, layout: Layout) -> Result, ()> { + alloc + .allocate(layout) + .map(|ptr| ptr.as_non_null_ptr()) + .map_err(|_| ()) + } + + #[cfg(feature = "bumpalo")] + unsafe impl Allocator for crate::BumpWrapper<'_> { + #[inline] + fn allocate(&self, layout: Layout) -> Result, core::alloc::AllocError> { + match self.0.try_alloc_layout(layout) { + Ok(ptr) => Ok(NonNull::slice_from_raw_parts(ptr, layout.size())), + Err(_) => Err(core::alloc::AllocError), + } + } + #[inline] + unsafe fn deallocate(&self, _ptr: NonNull, _layout: Layout) {} + } +} + +#[cfg(not(feature = "nightly"))] +mod inner { + use crate::alloc::alloc::{alloc, dealloc, Layout}; + use core::ptr::NonNull; + + pub unsafe trait Allocator { + fn allocate(&self, layout: Layout) -> Result, ()>; + unsafe fn deallocate(&self, ptr: NonNull, layout: Layout); + } + + #[derive(Copy, Clone)] + pub struct Global; + unsafe impl Allocator for Global { + #[inline] + fn allocate(&self, layout: Layout) -> Result, ()> { + unsafe { NonNull::new(alloc(layout)).ok_or(()) } + } + #[inline] + unsafe fn deallocate(&self, ptr: NonNull, layout: Layout) { + dealloc(ptr.as_ptr(), layout) + } + } + impl Default for Global { + #[inline] + fn default() -> Self { + Global + } + } + + pub fn do_alloc(alloc: &A, layout: Layout) -> Result, ()> { + alloc.allocate(layout) + } + + #[cfg(feature = "bumpalo")] + unsafe impl Allocator for crate::BumpWrapper<'_> { + #[allow(clippy::map_err_ignore)] + fn allocate(&self, layout: Layout) -> Result, ()> { + self.0.try_alloc_layout(layout).map_err(|_| ()) + } + unsafe fn deallocate(&self, _ptr: NonNull, _layout: Layout) {} + } +} diff --git a/src/raw/generic.rs b/src/raw/generic.rs index 26f8c58..ef066e8 100644 --- a/src/raw/generic.rs +++ b/src/raw/generic.rs @@ -55,7 +55,7 @@ impl Group { struct AlignedBytes { _align: [Group; 0], bytes: [u8; Group::WIDTH], - }; + } const ALIGNED_BYTES: AlignedBytes = AlignedBytes { _align: [], bytes: [EMPTY; Group::WIDTH], @@ -67,7 +67,7 @@ impl Group { #[inline] #[allow(clippy::cast_ptr_alignment)] // unaligned load pub unsafe fn load(ptr: *const u8) -> Self { - Group(ptr::read_unaligned(ptr as *const _)) + Group(ptr::read_unaligned(ptr.cast())) } /// Loads a group of bytes starting at the given address, which must be @@ -77,7 +77,7 @@ impl Group { pub unsafe fn load_aligned(ptr: *const u8) -> Self { // FIXME: use align_offset once it stabilizes debug_assert_eq!(ptr as usize & (mem::align_of::() - 1), 0); - Group(ptr::read(ptr as *const _)) + Group(ptr::read(ptr.cast())) } /// Stores the group of bytes to the given address, which must be @@ -87,7 +87,7 @@ impl Group { pub unsafe fn store_aligned(self, ptr: *mut u8) { // FIXME: use align_offset once it stabilizes debug_assert_eq!(ptr as usize & (mem::align_of::() - 1), 0); - ptr::write(ptr as *mut _, self.0); + ptr::write(ptr.cast(), self.0); } /// Returns a `BitMask` indicating all bytes in the group which *may* diff --git a/src/raw/mod.rs b/src/raw/mod.rs index 32fec98..3ae6980 100644 --- a/src/raw/mod.rs +++ b/src/raw/mod.rs @@ -1,12 +1,15 @@ -use crate::alloc::alloc::{alloc, dealloc, handle_alloc_error}; +use crate::alloc::alloc::{handle_alloc_error, Layout}; use crate::scopeguard::guard; use crate::TryReserveError; -use core::alloc::Layout; +#[cfg(feature = "nightly")] +use crate::UnavailableMutError; use core::hint; use core::iter::FusedIterator; use core::marker::PhantomData; use core::mem; use core::mem::ManuallyDrop; +#[cfg(feature = "nightly")] +use core::mem::MaybeUninit; use core::ptr::NonNull; cfg_if! { @@ -32,6 +35,9 @@ cfg_if! { } } +mod alloc; +pub(crate) use self::alloc::{do_alloc, Allocator, Global}; + mod bitmask; use self::bitmask::{BitMask, BitMaskIter}; @@ -41,14 +47,28 @@ use self::imp::Group; // consistently improves performance by 10-15%. #[cfg(feature = "nightly")] use core::intrinsics::{likely, unlikely}; + +// On stable we can use #[cold] to get a equivalent effect: this attributes +// suggests that the function is unlikely to be called +#[cfg(not(feature = "nightly"))] +#[inline] +#[cold] +fn cold() {} + #[cfg(not(feature = "nightly"))] #[inline] fn likely(b: bool) -> bool { + if !b { + cold() + } b } #[cfg(not(feature = "nightly"))] #[inline] fn unlikely(b: bool) -> bool { + if b { + cold() + } b } @@ -145,27 +165,22 @@ fn h2(hash: u64) -> u8 { /// Proof that the probe will visit every group in the table: /// struct ProbeSeq { - bucket_mask: usize, pos: usize, stride: usize, } -impl Iterator for ProbeSeq { - type Item = usize; - +impl ProbeSeq { #[inline] - fn next(&mut self) -> Option { + fn move_next(&mut self, bucket_mask: usize) { // We should have found an empty bucket by now and ended the probe. debug_assert!( - self.stride <= self.bucket_mask, + self.stride <= bucket_mask, "Went past end of probe sequence" ); - let result = self.pos; self.stride += Group::WIDTH; self.pos += self.stride; - self.pos &= self.bucket_mask; - Some(result) + self.pos &= bucket_mask; } } @@ -214,30 +229,39 @@ fn bucket_mask_to_capacity(bucket_mask: usize) -> usize { } } -/// Returns a Layout which describes the allocation required for a hash table, -/// and the offset of the control bytes in the allocation. -/// (the offset is also one past last element of buckets) -/// -/// Returns `None` if an overflow occurs. -#[cfg_attr(feature = "inline-more", inline)] -#[cfg(feature = "nightly")] -fn calculate_layout(buckets: usize) -> Option<(Layout, usize)> { - debug_assert!(buckets.is_power_of_two()); +/// Helper which allows the max calculation for ctrl_align to be statically computed for each T +/// while keeping the rest of `calculate_layout_for` independent of `T` +#[derive(Copy, Clone)] +struct TableLayout { + size: usize, + ctrl_align: usize, +} + +impl TableLayout { + #[inline] + fn new() -> Self { + let layout = Layout::new::(); + Self { + size: layout.size(), + ctrl_align: usize::max(layout.align(), Group::WIDTH), + } + } - // Array of buckets - let data = Layout::array::(buckets).ok()?; + #[inline] + fn calculate_layout_for(self, buckets: usize) -> Option<(Layout, usize)> { + debug_assert!(buckets.is_power_of_two()); - // Array of control bytes. This must be aligned to the group size. - // - // We add `Group::WIDTH` control bytes at the end of the array which - // replicate the bytes at the start of the array and thus avoids the need to - // perform bounds-checking while probing. - // - // There is no possible overflow here since buckets is a power of two and - // Group::WIDTH is a small number. - let ctrl = unsafe { Layout::from_size_align_unchecked(buckets + Group::WIDTH, Group::WIDTH) }; + let TableLayout { size, ctrl_align } = self; + // Manual layout calculation since Layout methods are not yet stable. + let ctrl_offset = + size.checked_mul(buckets)?.checked_add(ctrl_align - 1)? & !(ctrl_align - 1); + let len = ctrl_offset.checked_add(buckets + Group::WIDTH)?; - data.extend(ctrl).ok() + Some(( + unsafe { Layout::from_size_align_unchecked(len, ctrl_align) }, + ctrl_offset, + )) + } } /// Returns a Layout which describes the allocation required for a hash table, @@ -246,22 +270,8 @@ fn calculate_layout(buckets: usize) -> Option<(Layout, usize)> { /// /// Returns `None` if an overflow occurs. #[cfg_attr(feature = "inline-more", inline)] -#[cfg(not(feature = "nightly"))] fn calculate_layout(buckets: usize) -> Option<(Layout, usize)> { - debug_assert!(buckets.is_power_of_two()); - - // Manual layout calculation since Layout methods are not yet stable. - let ctrl_align = usize::max(mem::align_of::(), Group::WIDTH); - let ctrl_offset = mem::size_of::() - .checked_mul(buckets)? - .checked_add(ctrl_align - 1)? - & !(ctrl_align - 1); - let len = ctrl_offset.checked_add(buckets + Group::WIDTH)?; - - Some(( - unsafe { Layout::from_size_align_unchecked(len, ctrl_align) }, - ctrl_offset, - )) + TableLayout::new::().calculate_layout_for(buckets) } /// A reference to a hash table bucket containing a `T`. @@ -310,12 +320,12 @@ impl Bucket { } } #[cfg_attr(feature = "inline-more", inline)] - pub unsafe fn as_ptr(&self) -> *mut T { + pub fn as_ptr(&self) -> *mut T { if mem::size_of::() == 0 { // Just return an arbitrary ZST pointer which is properly aligned mem::align_of::() as *mut T } else { - self.ptr.as_ptr().sub(1) + unsafe { self.ptr.as_ptr().sub(1) } } } #[cfg_attr(feature = "inline-more", inline)] @@ -356,7 +366,15 @@ impl Bucket { } /// A raw hash table with an unsafe API. -pub struct RawTable { +pub struct RawTable { + table: RawTableInner, + // Tell dropck that we own instances of T. + marker: PhantomData, +} + +/// Non-generic part of `RawTable` which allows functions to be instantiated only once regardless +/// of how many different key-value types are used. +struct RawTableInner { // Mask to get an index from a hash value. The value is one less than the // number of buckets in the table. bucket_mask: usize, @@ -371,11 +389,10 @@ pub struct RawTable { // Number of elements in the table, only really used by len() items: usize, - // Tell dropck that we own instances of T. - marker: PhantomData, + alloc: A, } -impl RawTable { +impl RawTable { /// Creates a new empty hash table without allocating any memory. /// /// In effect this returns a table with exactly 1 bucket. However we can @@ -384,11 +401,36 @@ impl RawTable { #[cfg_attr(feature = "inline-more", inline)] pub const fn new() -> Self { Self { - // Be careful to cast the entire slice to a raw pointer. - ctrl: unsafe { NonNull::new_unchecked(Group::static_empty() as *const _ as *mut u8) }, - bucket_mask: 0, - items: 0, - growth_left: 0, + table: RawTableInner::new_in(Global), + marker: PhantomData, + } + } + + /// Attempts to allocate a new hash table with at least enough capacity + /// for inserting the given number of elements without reallocating. + #[cfg(feature = "raw")] + pub fn try_with_capacity(capacity: usize) -> Result { + Self::try_with_capacity_in(capacity, Global) + } + + /// Allocates a new hash table with at least enough capacity for inserting + /// the given number of elements without reallocating. + pub fn with_capacity(capacity: usize) -> Self { + Self::with_capacity_in(capacity, Global) + } +} + +impl RawTable { + /// Creates a new empty hash table without allocating any memory, using the + /// given allocator. + /// + /// In effect this returns a table with exactly 1 bucket. However we can + /// leave the data pointer dangling since that bucket is never written to + /// due to our load factor forcing us to always have at least 1 free bucket. + #[cfg_attr(feature = "inline-more", inline)] + pub fn new_in(alloc: A) -> Self { + Self { + table: RawTableInner::new_in(alloc), marker: PhantomData, } } @@ -398,26 +440,19 @@ impl RawTable { /// The control bytes are left uninitialized. #[cfg_attr(feature = "inline-more", inline)] unsafe fn new_uninitialized( + alloc: A, buckets: usize, - fallability: Fallibility, + fallibility: Fallibility, ) -> Result { debug_assert!(buckets.is_power_of_two()); - // Avoid `Option::ok_or_else` because it bloats LLVM IR. - let (layout, ctrl_offset) = match calculate_layout::(buckets) { - Some(lco) => lco, - None => return Err(fallability.capacity_overflow()), - }; - let ptr = match NonNull::new(alloc(layout)) { - Some(ptr) => ptr, - None => return Err(fallability.alloc_err(layout)), - }; - let ctrl = NonNull::new_unchecked(ptr.as_ptr().add(ctrl_offset)); Ok(Self { - ctrl, - bucket_mask: buckets - 1, - items: 0, - growth_left: bucket_mask_to_capacity(buckets - 1), + table: RawTableInner::new_uninitialized( + alloc, + TableLayout::new::(), + buckets, + fallibility, + )?, marker: PhantomData, }) } @@ -425,38 +460,33 @@ impl RawTable { /// Attempts to allocate a new hash table with at least enough capacity /// for inserting the given number of elements without reallocating. fn fallible_with_capacity( + alloc: A, capacity: usize, - fallability: Fallibility, + fallibility: Fallibility, ) -> Result { - if capacity == 0 { - Ok(Self::new()) - } else { - unsafe { - // Avoid `Option::ok_or_else` because it bloats LLVM IR. - let buckets = match capacity_to_buckets(capacity) { - Some(buckets) => buckets, - None => return Err(fallability.capacity_overflow()), - }; - let result = Self::new_uninitialized(buckets, fallability)?; - result.ctrl(0).write_bytes(EMPTY, result.num_ctrl_bytes()); - - Ok(result) - } - } + Ok(Self { + table: RawTableInner::fallible_with_capacity( + alloc, + TableLayout::new::(), + capacity, + fallibility, + )?, + marker: PhantomData, + }) } - /// Attempts to allocate a new hash table with at least enough capacity - /// for inserting the given number of elements without reallocating. + /// Attempts to allocate a new hash table using the given allocator, with at least enough + /// capacity for inserting the given number of elements without reallocating. #[cfg(feature = "raw")] - pub fn try_with_capacity(capacity: usize) -> Result { - Self::fallible_with_capacity(capacity, Fallibility::Fallible) + pub fn try_with_capacity_in(capacity: usize, alloc: A) -> Result { + Self::fallible_with_capacity(alloc, capacity, Fallibility::Fallible) } - /// Allocates a new hash table with at least enough capacity for inserting - /// the given number of elements without reallocating. - pub fn with_capacity(capacity: usize) -> Self { + /// Allocates a new hash table using the given allocator, with at least enough capacity for + /// inserting the given number of elements without reallocating. + pub fn with_capacity_in(capacity: usize, alloc: A) -> Self { // Avoid `Result::unwrap_or_else` because it bloats LLVM IR. - match Self::fallible_with_capacity(capacity, Fallibility::Infallible) { + match Self::fallible_with_capacity(alloc, capacity, Fallibility::Infallible) { Ok(capacity) => capacity, Err(_) => unsafe { hint::unreachable_unchecked() }, } @@ -465,18 +495,13 @@ impl RawTable { /// Deallocates the table without dropping any entries. #[cfg_attr(feature = "inline-more", inline)] unsafe fn free_buckets(&mut self) { - // Avoid `Option::unwrap_or_else` because it bloats LLVM IR. - let (layout, ctrl_offset) = match calculate_layout::(self.buckets()) { - Some(lco) => lco, - None => hint::unreachable_unchecked(), - }; - dealloc(self.ctrl.as_ptr().sub(ctrl_offset), layout); + self.table.free_buckets(TableLayout::new::()) } /// Returns pointer to one past last element of data table. #[cfg_attr(feature = "inline-more", inline)] pub unsafe fn data_end(&self) -> NonNull { - NonNull::new_unchecked(self.ctrl.as_ptr() as *mut T) + NonNull::new_unchecked(self.table.ctrl.as_ptr().cast()) } /// Returns pointer to start of data table. @@ -492,17 +517,10 @@ impl RawTable { bucket.to_base_index(self.data_end()) } - /// Returns a pointer to a control byte. - #[cfg_attr(feature = "inline-more", inline)] - unsafe fn ctrl(&self, index: usize) -> *mut u8 { - debug_assert!(index < self.num_ctrl_bytes()); - self.ctrl.as_ptr().add(index) - } - /// Returns a pointer to an element in the table. #[cfg_attr(feature = "inline-more", inline)] pub unsafe fn bucket(&self, index: usize) -> Bucket { - debug_assert_ne!(self.bucket_mask, 0); + debug_assert_ne!(self.table.bucket_mask, 0); debug_assert!(index < self.buckets()); Bucket::from_base_index(self.data_end(), index) } @@ -512,27 +530,7 @@ impl RawTable { #[deprecated(since = "0.8.1", note = "use erase or remove instead")] pub unsafe fn erase_no_drop(&mut self, item: &Bucket) { let index = self.bucket_index(item); - debug_assert!(is_full(*self.ctrl(index))); - let index_before = index.wrapping_sub(Group::WIDTH) & self.bucket_mask; - let empty_before = Group::load(self.ctrl(index_before)).match_empty(); - let empty_after = Group::load(self.ctrl(index)).match_empty(); - - // If we are inside a continuous block of Group::WIDTH full or deleted - // cells then a probe window may have seen a full block when trying to - // insert. We therefore need to keep that block non-empty so that - // lookups will continue searching to the next probe window. - // - // Note that in this context `leading_zeros` refers to the bytes at the - // end of a group, while `trailing_zeros` refers to the bytes at the - // begining of a group. - let ctrl = if empty_before.leading_zeros() + empty_after.trailing_zeros() >= Group::WIDTH { - DELETED - } else { - self.growth_left += 1; - EMPTY - }; - self.set_ctrl(index, ctrl); - self.items -= 1; + self.table.erase(index) } /// Erases an element from the table, dropping it in place. @@ -578,109 +576,26 @@ impl RawTable { } } - /// Returns an iterator for a probe sequence on the table. - /// - /// This iterator never terminates, but is guaranteed to visit each bucket - /// group exactly once. The loop using `probe_seq` must terminate upon - /// reaching a group containing an empty bucket. - #[cfg_attr(feature = "inline-more", inline)] - fn probe_seq(&self, hash: u64) -> ProbeSeq { - ProbeSeq { - bucket_mask: self.bucket_mask, - pos: h1(hash) & self.bucket_mask, - stride: 0, - } - } - - /// Sets a control byte, and possibly also the replicated control byte at - /// the end of the array. - #[cfg_attr(feature = "inline-more", inline)] - unsafe fn set_ctrl(&self, index: usize, ctrl: u8) { - // Replicate the first Group::WIDTH control bytes at the end of - // the array without using a branch: - // - If index >= Group::WIDTH then index == index2. - // - Otherwise index2 == self.bucket_mask + 1 + index. - // - // The very last replicated control byte is never actually read because - // we mask the initial index for unaligned loads, but we write it - // anyways because it makes the set_ctrl implementation simpler. - // - // If there are fewer buckets than Group::WIDTH then this code will - // replicate the buckets at the end of the trailing group. For example - // with 2 buckets and a group size of 4, the control bytes will look - // like this: - // - // Real | Replicated - // --------------------------------------------- - // | [A] | [B] | [EMPTY] | [EMPTY] | [A] | [B] | - // --------------------------------------------- - let index2 = ((index.wrapping_sub(Group::WIDTH)) & self.bucket_mask) + Group::WIDTH; - - *self.ctrl(index) = ctrl; - *self.ctrl(index2) = ctrl; - } - - /// Searches for an empty or deleted bucket which is suitable for inserting - /// a new element. - /// - /// There must be at least 1 empty bucket in the table. - #[cfg_attr(feature = "inline-more", inline)] - fn find_insert_slot(&self, hash: u64) -> usize { - for pos in self.probe_seq(hash) { - unsafe { - let group = Group::load(self.ctrl(pos)); - if let Some(bit) = group.match_empty_or_deleted().lowest_set_bit() { - let result = (pos + bit) & self.bucket_mask; - - // In tables smaller than the group width, trailing control - // bytes outside the range of the table are filled with - // EMPTY entries. These will unfortunately trigger a - // match, but once masked may point to a full bucket that - // is already occupied. We detect this situation here and - // perform a second scan starting at the begining of the - // table. This second scan is guaranteed to find an empty - // slot (due to the load factor) before hitting the trailing - // control bytes (containing EMPTY). - if unlikely(is_full(*self.ctrl(result))) { - debug_assert!(self.bucket_mask < Group::WIDTH); - debug_assert_ne!(pos, 0); - return Group::load_aligned(self.ctrl(0)) - .match_empty_or_deleted() - .lowest_set_bit_nonzero(); - } else { - return result; - } - } - } - } - - // probe_seq never returns. - unreachable!(); - } - /// Marks all table buckets as empty without dropping their contents. #[cfg_attr(feature = "inline-more", inline)] pub fn clear_no_drop(&mut self) { - if !self.is_empty_singleton() { - unsafe { - self.ctrl(0).write_bytes(EMPTY, self.num_ctrl_bytes()); - } - } - self.items = 0; - self.growth_left = bucket_mask_to_capacity(self.bucket_mask); + self.table.clear_no_drop() } /// Removes all elements from the table without freeing the backing memory. #[cfg_attr(feature = "inline-more", inline)] pub fn clear(&mut self) { // Ensure that the table is reset even if one of the drops panic - let self_ = guard(self, |self_| self_.clear_no_drop()); + let mut self_ = guard(self, |self_| self_.clear_no_drop()); + unsafe { + self_.drop_elements(); + } + } - if mem::needs_drop::() && self_.len() != 0 { - unsafe { - for item in self_.iter() { - item.drop(); - } + unsafe fn drop_elements(&mut self) { + if mem::needs_drop::() && self.len() != 0 { + for item in self.iter() { + item.drop(); } } } @@ -690,9 +605,9 @@ impl RawTable { pub fn shrink_to(&mut self, min_size: usize, hasher: impl Fn(&T) -> u64) { // Calculate the minimal number of elements that we need to reserve // space for. - let min_size = usize::max(self.items, min_size); + let min_size = usize::max(self.table.items, min_size); if min_size == 0 { - *self = Self::new(); + *self = Self::new_in(self.table.alloc.clone()); return; } @@ -708,8 +623,8 @@ impl RawTable { // If we have more buckets than we need, shrink the table. if min_buckets < self.buckets() { // Fast path if the table is empty - if self.items == 0 { - *self = Self::with_capacity(min_size) + if self.table.items == 0 { + *self = Self::with_capacity_in(min_size, self.table.alloc.clone()) } else { // Avoid `Result::unwrap_or_else` because it bloats LLVM IR. if self @@ -726,7 +641,7 @@ impl RawTable { /// without reallocation. #[cfg_attr(feature = "inline-more", inline)] pub fn reserve(&mut self, additional: usize, hasher: impl Fn(&T) -> u64) { - if additional > self.growth_left { + if additional > self.table.growth_left { // Avoid `Result::unwrap_or_else` because it bloats LLVM IR. if self .reserve_rehash(additional, hasher, Fallibility::Infallible) @@ -745,7 +660,7 @@ impl RawTable { additional: usize, hasher: impl Fn(&T) -> u64, ) -> Result<(), TryReserveError> { - if additional > self.growth_left { + if additional > self.table.growth_left { self.reserve_rehash(additional, hasher, Fallibility::Fallible) } else { Ok(()) @@ -759,14 +674,14 @@ impl RawTable { &mut self, additional: usize, hasher: impl Fn(&T) -> u64, - fallability: Fallibility, + fallibility: Fallibility, ) -> Result<(), TryReserveError> { // Avoid `Option::ok_or_else` because it bloats LLVM IR. - let new_items = match self.items.checked_add(additional) { + let new_items = match self.table.items.checked_add(additional) { Some(new_items) => new_items, - None => return Err(fallability.capacity_overflow()), + None => return Err(fallibility.capacity_overflow()), }; - let full_capacity = bucket_mask_to_capacity(self.bucket_mask); + let full_capacity = bucket_mask_to_capacity(self.table.bucket_mask); if new_items <= full_capacity / 2 { // Rehash in-place without re-allocating if we have plenty of spare // capacity that is locked up due to DELETED entries. @@ -778,7 +693,7 @@ impl RawTable { self.resize( usize::max(new_items, full_capacity + 1), hasher, - fallability, + fallibility, ) } } @@ -789,35 +704,18 @@ impl RawTable { /// If `hasher` panics then some the table's contents may be lost. fn rehash_in_place(&mut self, hasher: impl Fn(&T) -> u64) { unsafe { - // Bulk convert all full control bytes to DELETED, and all DELETED - // control bytes to EMPTY. This effectively frees up all buckets - // containing a DELETED entry. - for i in (0..self.buckets()).step_by(Group::WIDTH) { - let group = Group::load_aligned(self.ctrl(i)); - let group = group.convert_special_to_empty_and_full_to_deleted(); - group.store_aligned(self.ctrl(i)); - } - - // Fix up the trailing control bytes. See the comments in set_ctrl - // for the handling of tables smaller than the group width. - if self.buckets() < Group::WIDTH { - self.ctrl(0) - .copy_to(self.ctrl(Group::WIDTH), self.buckets()); - } else { - self.ctrl(0) - .copy_to(self.ctrl(self.buckets()), Group::WIDTH); - } - // If the hash function panics then properly clean up any elements // that we haven't rehashed yet. We unfortunately can't preserve the // element since we lost their hash and have no way of recovering it // without risking another panic. - let mut guard = guard(self, |self_| { + self.table.prepare_rehash_in_place(); + + let mut guard = guard(&mut self.table, move |self_| { if mem::needs_drop::() { for i in 0..self_.buckets() { if *self_.ctrl(i) == DELETED { self_.set_ctrl(i, EMPTY); - self_.bucket(i).drop(); + self_.bucket::(i).drop(); self_.items -= 1; } } @@ -832,6 +730,7 @@ impl RawTable { if *guard.ctrl(i) != DELETED { continue; } + 'inner: loop { // Hash the current item let item = guard.bucket(i); @@ -845,25 +744,19 @@ impl RawTable { // size. If both the new and old position fall within the // same unaligned group, then there is no benefit in moving // it and we can just continue to the next item. - let probe_index = |pos: usize| { - (pos.wrapping_sub(guard.probe_seq(hash).pos) & guard.bucket_mask) - / Group::WIDTH - }; - if likely(probe_index(i) == probe_index(new_i)) { - guard.set_ctrl(i, h2(hash)); + if likely(guard.is_in_same_group(i, new_i, hash)) { + guard.set_ctrl_h2(i, hash); continue 'outer; } // We are moving the current item to a new position. Write // our H2 to the control byte of the new position. - let prev_ctrl = *guard.ctrl(new_i); - guard.set_ctrl(new_i, h2(hash)); - + let prev_ctrl = guard.replace_ctrl_h2(new_i, hash); if prev_ctrl == EMPTY { + guard.set_ctrl(i, EMPTY); // If the target slot is empty, simply move the current // element into the new slot and clear the old control // byte. - guard.set_ctrl(i, EMPTY); guard.bucket(new_i).copy_from_nonoverlapping(&item); continue 'outer; } else { @@ -888,27 +781,12 @@ impl RawTable { &mut self, capacity: usize, hasher: impl Fn(&T) -> u64, - fallability: Fallibility, + fallibility: Fallibility, ) -> Result<(), TryReserveError> { unsafe { - debug_assert!(self.items <= capacity); - - // Allocate and initialize the new table. - let mut new_table = Self::fallible_with_capacity(capacity, fallability)?; - new_table.growth_left -= self.items; - new_table.items = self.items; - - // The hash function may panic, in which case we simply free the new - // table without dropping any elements that may have been copied into - // it. - // - // This guard is also used to free the old table on success, see - // the comment at the bottom of this function. - let mut new_table = guard(ManuallyDrop::new(new_table), |new_table| { - if !new_table.is_empty_singleton() { - new_table.free_buckets(); - } - }); + let mut new_table = + self.table + .prepare_resize(TableLayout::new::(), capacity, fallibility)?; // Copy all elements to the new table. for item in self.iter() { @@ -919,8 +797,7 @@ impl RawTable { // - there are no DELETED entries. // - we know there is enough space in the table. // - all elements are unique. - let index = new_table.find_insert_slot(hash); - new_table.set_ctrl(index, h2(hash)); + let (index, _) = new_table.prepare_insert_slot(hash); new_table.bucket(index).copy_from_nonoverlapping(&item); } @@ -928,7 +805,7 @@ impl RawTable { // self with the new table. The old table will have its memory freed but // the items will not be dropped (since they have been moved into the // new table). - mem::swap(self, &mut new_table); + mem::swap(&mut self.table, &mut new_table); Ok(()) } @@ -940,26 +817,46 @@ impl RawTable { #[cfg_attr(feature = "inline-more", inline)] pub fn insert(&mut self, hash: u64, value: T, hasher: impl Fn(&T) -> u64) -> Bucket { unsafe { - let mut index = self.find_insert_slot(hash); + let mut index = self.table.find_insert_slot(hash); // We can avoid growing the table once we have reached our load // factor if we are replacing a tombstone. This works since the // number of EMPTY slots does not change in this case. - let old_ctrl = *self.ctrl(index); - if unlikely(self.growth_left == 0 && special_is_empty(old_ctrl)) { + let old_ctrl = *self.table.ctrl(index); + if unlikely(self.table.growth_left == 0 && special_is_empty(old_ctrl)) { self.reserve(1, hasher); - index = self.find_insert_slot(hash); + index = self.table.find_insert_slot(hash); } + self.table.record_item_insert_at(index, old_ctrl, hash); + let bucket = self.bucket(index); - self.growth_left -= special_is_empty(old_ctrl) as usize; - self.set_ctrl(index, h2(hash)); bucket.write(value); - self.items += 1; bucket } } + /// Attempts to insert a new element without growing the table and return its raw bucket. + /// + /// Returns an `Err` containing the given element if inserting it would require growing the + /// table. + /// + /// This does not check if the given element already exists in the table. + #[cfg(feature = "raw")] + #[cfg_attr(feature = "inline-more", inline)] + pub fn try_insert_no_grow(&mut self, hash: u64, value: T) -> Result, T> { + unsafe { + match self.table.prepare_insert_no_grow(hash) { + Ok(index) => { + let bucket = self.bucket(index); + bucket.write(value); + Ok(bucket) + } + Err(()) => Err(value), + } + } + } + /// Inserts a new element into the table, and returns a mutable reference to it. /// /// This does not check if the given element already exists in the table. @@ -977,17 +874,15 @@ impl RawTable { #[cfg(any(feature = "raw", feature = "rustc-internal-api"))] pub fn insert_no_grow(&mut self, hash: u64, value: T) -> Bucket { unsafe { - let index = self.find_insert_slot(hash); - let bucket = self.bucket(index); + let (index, old_ctrl) = self.table.prepare_insert_slot(hash); + let bucket = self.table.bucket(index); // If we are replacing a DELETED entry then we don't need to update // the load counter. - let old_ctrl = *self.ctrl(index); - self.growth_left -= special_is_empty(old_ctrl) as usize; + self.table.growth_left -= special_is_empty(old_ctrl) as usize; - self.set_ctrl(index, h2(hash)); bucket.write(value); - self.items += 1; + self.table.items += 1; bucket } } @@ -1004,14 +899,14 @@ impl RawTable { F: FnOnce(T) -> Option, { let index = self.bucket_index(&bucket); - let old_ctrl = *self.ctrl(index); + let old_ctrl = *self.table.ctrl(index); debug_assert!(is_full(old_ctrl)); - let old_growth_left = self.growth_left; + let old_growth_left = self.table.growth_left; let item = self.remove(bucket); if let Some(new_item) = f(item) { - self.growth_left = old_growth_left; - self.set_ctrl(index, old_ctrl); - self.items += 1; + self.table.growth_left = old_growth_left; + self.table.set_ctrl(index, old_ctrl); + self.table.items += 1; self.bucket(index).write(new_item); true } else { @@ -1053,38 +948,83 @@ impl RawTable { } } + /// Attempts to get mutable references to `N` entries in the table at once. + /// + /// Returns an array of length `N` with the results of each query. For soundness, + /// at most one mutable reference will be returned to any entry. An + /// `Err(UnavailableMutError::Duplicate(i))` in the returned array indicates that a suitable + /// entry exists, but a mutable reference to it already occurs at index `i` in the returned + /// array. + /// + /// The `eq` argument should be a closure such that `eq(i, k)` returns true if `k` is equal to + /// the `i`th key to be looked up. + /// + /// This method is available only if the `nightly` feature is enabled. + #[cfg(feature = "nightly")] + pub fn get_each_mut( + &mut self, + hashes: [u64; N], + mut eq: impl FnMut(usize, &T) -> bool, + ) -> [Result<&'_ mut T, UnavailableMutError>; N] { + // Collect the requested buckets. + // TODO use `MaybeUninit::uninit_array` here instead once that's stable. + let mut buckets: [MaybeUninit>>; N] = + unsafe { MaybeUninit::uninit().assume_init() }; + for i in 0..N { + buckets[i] = MaybeUninit::new(self.find(hashes[i], |k| eq(i, k))); + } + let buckets: [Option>; N] = unsafe { MaybeUninit::array_assume_init(buckets) }; + + // Walk through the buckets, checking for duplicates and building up the output array. + // TODO use `MaybeUninit::uninit_array` here instead once that's stable. + let mut out: [MaybeUninit>; N] = + unsafe { MaybeUninit::uninit().assume_init() }; + for i in 0..N { + out[i] = MaybeUninit::new( + #[allow(clippy::never_loop)] + 'outer: loop { + for j in 0..i { + match (&buckets[j], &buckets[i]) { + // These two buckets are the same, and we can't safely return a second + // mutable reference to the same entry. + (Some(prev), Some(cur)) if prev.as_ptr() == cur.as_ptr() => { + break 'outer Err(UnavailableMutError::Duplicate(j)); + } + _ => {} + } + } + // This bucket is distinct from all previous buckets (or it doesn't exist), so + // we're clear to return the result of the lookup. + break match &buckets[i] { + None => Err(UnavailableMutError::Absent), + Some(bkt) => unsafe { Ok(bkt.as_mut()) }, + }; + }, + ) + } + + unsafe { MaybeUninit::array_assume_init(out) } + } + /// Returns the number of elements the map can hold without reallocating. /// /// This number is a lower bound; the table might be able to hold /// more, but is guaranteed to be able to hold at least this many. #[cfg_attr(feature = "inline-more", inline)] pub fn capacity(&self) -> usize { - self.items + self.growth_left + self.table.items + self.table.growth_left } /// Returns the number of elements in the table. #[cfg_attr(feature = "inline-more", inline)] pub fn len(&self) -> usize { - self.items + self.table.items } /// Returns the number of buckets in the table. #[cfg_attr(feature = "inline-more", inline)] pub fn buckets(&self) -> usize { - self.bucket_mask + 1 - } - - /// Returns the number of control bytes in the table. - #[cfg_attr(feature = "inline-more", inline)] - fn num_ctrl_bytes(&self) -> usize { - self.bucket_mask + 1 + Group::WIDTH - } - - /// Returns whether this table points to the empty singleton with a capacity - /// of 0. - #[cfg_attr(feature = "inline-more", inline)] - fn is_empty_singleton(&self) -> bool { - self.bucket_mask == 0 + self.table.bucket_mask + 1 } /// Returns an iterator over every element in the table. It is up to @@ -1095,8 +1035,8 @@ impl RawTable { pub unsafe fn iter(&self) -> RawIter { let data = Bucket::from_base_index(self.data_end(), 0); RawIter { - iter: RawIterRange::new(self.ctrl.as_ptr(), data, self.buckets()), - items: self.items, + iter: RawIterRange::new(self.table.ctrl.as_ptr(), data, self.table.buckets()), + items: self.table.items, } } @@ -1108,14 +1048,14 @@ impl RawTable { /// `RawIterHash`. Because we cannot make the `next` method unsafe on the /// `RawIterHash` struct, we have to make the `iter_hash` method unsafe. #[cfg_attr(feature = "inline-more", inline)] - pub unsafe fn iter_hash(&self, hash: u64) -> RawIterHash<'_, T> { + pub unsafe fn iter_hash(&self, hash: u64) -> RawIterHash<'_, T, A> { RawIterHash::new(self, hash) } /// Returns an iterator which removes all elements from the table without /// freeing the memory. #[cfg_attr(feature = "inline-more", inline)] - pub fn drain(&mut self) -> RawDrain<'_, T> { + pub fn drain(&mut self) -> RawDrain<'_, T, A> { unsafe { let iter = self.iter(); self.drain_iter_from(iter) @@ -1130,11 +1070,11 @@ impl RawTable { /// It is up to the caller to ensure that the iterator is valid for this /// `RawTable` and covers all items that remain in the table. #[cfg_attr(feature = "inline-more", inline)] - pub unsafe fn drain_iter_from(&mut self, iter: RawIter) -> RawDrain<'_, T> { + pub unsafe fn drain_iter_from(&mut self, iter: RawIter) -> RawDrain<'_, T, A> { debug_assert_eq!(iter.len(), self.len()); RawDrain { iter, - table: ManuallyDrop::new(mem::replace(self, Self::new())), + table: ManuallyDrop::new(mem::replace(self, Self::new_in(self.table.alloc.clone()))), orig_table: NonNull::from(self), marker: PhantomData, } @@ -1146,31 +1086,33 @@ impl RawTable { /// /// It is up to the caller to ensure that the iterator is valid for this /// `RawTable` and covers all items that remain in the table. - pub unsafe fn into_iter_from(self, iter: RawIter) -> RawIntoIter { + pub unsafe fn into_iter_from(self, iter: RawIter) -> RawIntoIter { debug_assert_eq!(iter.len(), self.len()); - let alloc = self.into_alloc(); + let alloc = self.table.alloc.clone(); + let allocation = self.into_allocation(); RawIntoIter { iter, - alloc, + allocation, marker: PhantomData, + alloc, } } /// Converts the table into a raw allocation. The contents of the table /// should be dropped using a `RawIter` before freeing the allocation. #[cfg_attr(feature = "inline-more", inline)] - pub(crate) fn into_alloc(self) -> Option<(NonNull, Layout)> { - let alloc = if self.is_empty_singleton() { + pub(crate) fn into_allocation(self) -> Option<(NonNull, Layout)> { + let alloc = if self.table.is_empty_singleton() { None } else { // Avoid `Option::unwrap_or_else` because it bloats LLVM IR. - let (layout, ctrl_offset) = match calculate_layout::(self.buckets()) { + let (layout, ctrl_offset) = match calculate_layout::(self.table.buckets()) { Some(lco) => lco, None => unsafe { hint::unreachable_unchecked() }, }; Some(( - unsafe { NonNull::new_unchecked(self.ctrl.as_ptr().sub(ctrl_offset)) }, + unsafe { NonNull::new_unchecked(self.table.ctrl.as_ptr().sub(ctrl_offset)) }, layout, )) }; @@ -1179,18 +1121,364 @@ impl RawTable { } } -unsafe impl Send for RawTable where T: Send {} -unsafe impl Sync for RawTable where T: Sync {} +unsafe impl Send for RawTable where T: Send {} +unsafe impl Sync for RawTable where T: Sync {} + +impl RawTableInner { + #[cfg_attr(feature = "inline-more", inline)] + const fn new_in(alloc: A) -> Self { + Self { + // Be careful to cast the entire slice to a raw pointer. + ctrl: unsafe { NonNull::new_unchecked(Group::static_empty() as *const _ as *mut u8) }, + bucket_mask: 0, + items: 0, + growth_left: 0, + alloc, + } + } +} + +impl RawTableInner { + #[cfg_attr(feature = "inline-more", inline)] + unsafe fn new_uninitialized( + alloc: A, + table_layout: TableLayout, + buckets: usize, + fallibility: Fallibility, + ) -> Result { + debug_assert!(buckets.is_power_of_two()); + + // Avoid `Option::ok_or_else` because it bloats LLVM IR. + let (layout, ctrl_offset) = match table_layout.calculate_layout_for(buckets) { + Some(lco) => lco, + None => return Err(fallibility.capacity_overflow()), + }; + + let ptr: NonNull = match do_alloc(&alloc, layout) { + Ok(block) => block.cast(), + Err(_) => return Err(fallibility.alloc_err(layout)), + }; + + let ctrl = NonNull::new_unchecked(ptr.as_ptr().add(ctrl_offset)); + Ok(Self { + ctrl, + bucket_mask: buckets - 1, + items: 0, + growth_left: bucket_mask_to_capacity(buckets - 1), + alloc, + }) + } + + #[inline] + fn fallible_with_capacity( + alloc: A, + table_layout: TableLayout, + capacity: usize, + fallibility: Fallibility, + ) -> Result { + if capacity == 0 { + Ok(Self::new_in(alloc)) + } else { + unsafe { + let buckets = + capacity_to_buckets(capacity).ok_or_else(|| fallibility.capacity_overflow())?; + + let result = Self::new_uninitialized(alloc, table_layout, buckets, fallibility)?; + result.ctrl(0).write_bytes(EMPTY, result.num_ctrl_bytes()); + + Ok(result) + } + } + } + + /// Searches for an empty or deleted bucket which is suitable for inserting + /// a new element and sets the hash for that slot. + /// + /// There must be at least 1 empty bucket in the table. + #[inline] + unsafe fn prepare_insert_slot(&self, hash: u64) -> (usize, u8) { + let index = self.find_insert_slot(hash); + let old_ctrl = *self.ctrl(index); + self.set_ctrl_h2(index, hash); + (index, old_ctrl) + } + + /// Searches for an empty or deleted bucket which is suitable for inserting + /// a new element. + /// + /// There must be at least 1 empty bucket in the table. + #[inline] + fn find_insert_slot(&self, hash: u64) -> usize { + let mut probe_seq = self.probe_seq(hash); + loop { + unsafe { + let group = Group::load(self.ctrl(probe_seq.pos)); + if let Some(bit) = group.match_empty_or_deleted().lowest_set_bit() { + let result = (probe_seq.pos + bit) & self.bucket_mask; + + // In tables smaller than the group width, trailing control + // bytes outside the range of the table are filled with + // EMPTY entries. These will unfortunately trigger a + // match, but once masked may point to a full bucket that + // is already occupied. We detect this situation here and + // perform a second scan starting at the begining of the + // table. This second scan is guaranteed to find an empty + // slot (due to the load factor) before hitting the trailing + // control bytes (containing EMPTY). + if unlikely(is_full(*self.ctrl(result))) { + debug_assert!(self.bucket_mask < Group::WIDTH); + debug_assert_ne!(probe_seq.pos, 0); + return Group::load_aligned(self.ctrl(0)) + .match_empty_or_deleted() + .lowest_set_bit_nonzero(); + } + + return result; + } + } + probe_seq.move_next(self.bucket_mask); + } + } + + #[allow(clippy::mut_mut)] + #[inline] + unsafe fn prepare_rehash_in_place(&mut self) { + // Bulk convert all full control bytes to DELETED, and all DELETED + // control bytes to EMPTY. This effectively frees up all buckets + // containing a DELETED entry. + for i in (0..self.buckets()).step_by(Group::WIDTH) { + let group = Group::load_aligned(self.ctrl(i)); + let group = group.convert_special_to_empty_and_full_to_deleted(); + group.store_aligned(self.ctrl(i)); + } + + // Fix up the trailing control bytes. See the comments in set_ctrl + // for the handling of tables smaller than the group width. + if self.buckets() < Group::WIDTH { + self.ctrl(0) + .copy_to(self.ctrl(Group::WIDTH), self.buckets()); + } else { + self.ctrl(0) + .copy_to(self.ctrl(self.buckets()), Group::WIDTH); + } + } + + #[cfg_attr(feature = "inline-more", inline)] + unsafe fn bucket(&self, index: usize) -> Bucket { + debug_assert_ne!(self.bucket_mask, 0); + debug_assert!(index < self.buckets()); + Bucket::from_base_index(self.data_end(), index) + } + + #[cfg_attr(feature = "inline-more", inline)] + unsafe fn data_end(&self) -> NonNull { + NonNull::new_unchecked(self.ctrl.as_ptr().cast()) + } + + /// Returns an iterator-like object for a probe sequence on the table. + /// + /// This iterator never terminates, but is guaranteed to visit each bucket + /// group exactly once. The loop using `probe_seq` must terminate upon + /// reaching a group containing an empty bucket. + #[inline] + fn probe_seq(&self, hash: u64) -> ProbeSeq { + ProbeSeq { + pos: h1(hash) & self.bucket_mask, + stride: 0, + } + } + + /// Returns the index of a bucket for which a value must be inserted if there is enough rooom + /// in the table, otherwise returns error + #[cfg(feature = "raw")] + #[inline] + unsafe fn prepare_insert_no_grow(&mut self, hash: u64) -> Result { + let index = self.find_insert_slot(hash); + let old_ctrl = *self.ctrl(index); + if unlikely(self.growth_left == 0 && special_is_empty(old_ctrl)) { + Err(()) + } else { + self.record_item_insert_at(index, old_ctrl, hash); + Ok(index) + } + } + + #[inline] + unsafe fn record_item_insert_at(&mut self, index: usize, old_ctrl: u8, hash: u64) { + self.growth_left -= special_is_empty(old_ctrl) as usize; + self.set_ctrl_h2(index, hash); + self.items += 1; + } + + #[inline] + fn is_in_same_group(&self, i: usize, new_i: usize, hash: u64) -> bool { + let probe_seq_pos = self.probe_seq(hash).pos; + let probe_index = + |pos: usize| (pos.wrapping_sub(probe_seq_pos) & self.bucket_mask) / Group::WIDTH; + probe_index(i) == probe_index(new_i) + } + + /// Sets a control byte to the hash, and possibly also the replicated control byte at + /// the end of the array. + #[inline] + unsafe fn set_ctrl_h2(&self, index: usize, hash: u64) { + self.set_ctrl(index, h2(hash)) + } + + #[inline] + unsafe fn replace_ctrl_h2(&self, index: usize, hash: u64) -> u8 { + let prev_ctrl = *self.ctrl(index); + self.set_ctrl_h2(index, hash); + prev_ctrl + } + + /// Sets a control byte, and possibly also the replicated control byte at + /// the end of the array. + #[inline] + unsafe fn set_ctrl(&self, index: usize, ctrl: u8) { + // Replicate the first Group::WIDTH control bytes at the end of + // the array without using a branch: + // - If index >= Group::WIDTH then index == index2. + // - Otherwise index2 == self.bucket_mask + 1 + index. + // + // The very last replicated control byte is never actually read because + // we mask the initial index for unaligned loads, but we write it + // anyways because it makes the set_ctrl implementation simpler. + // + // If there are fewer buckets than Group::WIDTH then this code will + // replicate the buckets at the end of the trailing group. For example + // with 2 buckets and a group size of 4, the control bytes will look + // like this: + // + // Real | Replicated + // --------------------------------------------- + // | [A] | [B] | [EMPTY] | [EMPTY] | [A] | [B] | + // --------------------------------------------- + let index2 = ((index.wrapping_sub(Group::WIDTH)) & self.bucket_mask) + Group::WIDTH; + + *self.ctrl(index) = ctrl; + *self.ctrl(index2) = ctrl; + } -impl Clone for RawTable { + /// Returns a pointer to a control byte. + #[inline] + unsafe fn ctrl(&self, index: usize) -> *mut u8 { + debug_assert!(index < self.num_ctrl_bytes()); + self.ctrl.as_ptr().add(index) + } + + #[inline] + fn buckets(&self) -> usize { + self.bucket_mask + 1 + } + + #[inline] + fn num_ctrl_bytes(&self) -> usize { + self.bucket_mask + 1 + Group::WIDTH + } + + #[inline] + fn is_empty_singleton(&self) -> bool { + self.bucket_mask == 0 + } + + #[allow(clippy::mut_mut)] + #[inline] + unsafe fn prepare_resize( + &self, + table_layout: TableLayout, + capacity: usize, + fallibility: Fallibility, + ) -> Result, TryReserveError> { + debug_assert!(self.items <= capacity); + + // Allocate and initialize the new table. + let mut new_table = RawTableInner::fallible_with_capacity( + self.alloc.clone(), + table_layout, + capacity, + fallibility, + )?; + new_table.growth_left -= self.items; + new_table.items = self.items; + + // The hash function may panic, in which case we simply free the new + // table without dropping any elements that may have been copied into + // it. + // + // This guard is also used to free the old table on success, see + // the comment at the bottom of this function. + Ok(guard(new_table, move |self_| { + if !self_.is_empty_singleton() { + self_.free_buckets(table_layout); + } + })) + } + + #[inline] + unsafe fn free_buckets(&mut self, table_layout: TableLayout) { + // Avoid `Option::unwrap_or_else` because it bloats LLVM IR. + let (layout, ctrl_offset) = match table_layout.calculate_layout_for(self.buckets()) { + Some(lco) => lco, + None => hint::unreachable_unchecked(), + }; + self.alloc.deallocate( + NonNull::new_unchecked(self.ctrl.as_ptr().sub(ctrl_offset)), + layout, + ); + } + + /// Marks all table buckets as empty without dropping their contents. + #[inline] + fn clear_no_drop(&mut self) { + if !self.is_empty_singleton() { + unsafe { + self.ctrl(0).write_bytes(EMPTY, self.num_ctrl_bytes()); + } + } + self.items = 0; + self.growth_left = bucket_mask_to_capacity(self.bucket_mask); + } + + #[inline] + unsafe fn erase(&mut self, index: usize) { + debug_assert!(is_full(*self.ctrl(index))); + let index_before = index.wrapping_sub(Group::WIDTH) & self.bucket_mask; + let empty_before = Group::load(self.ctrl(index_before)).match_empty(); + let empty_after = Group::load(self.ctrl(index)).match_empty(); + + // If we are inside a continuous block of Group::WIDTH full or deleted + // cells then a probe window may have seen a full block when trying to + // insert. We therefore need to keep that block non-empty so that + // lookups will continue searching to the next probe window. + // + // Note that in this context `leading_zeros` refers to the bytes at the + // end of a group, while `trailing_zeros` refers to the bytes at the + // begining of a group. + let ctrl = if empty_before.leading_zeros() + empty_after.trailing_zeros() >= Group::WIDTH { + DELETED + } else { + self.growth_left += 1; + EMPTY + }; + self.set_ctrl(index, ctrl); + self.items -= 1; + } +} + +impl Clone for RawTable { fn clone(&self) -> Self { - if self.is_empty_singleton() { - Self::new() + if self.table.is_empty_singleton() { + Self::new_in(self.table.alloc.clone()) } else { unsafe { let mut new_table = ManuallyDrop::new( // Avoid `Result::ok_or_else` because it bloats LLVM IR. - match Self::new_uninitialized(self.buckets(), Fallibility::Infallible) { + match Self::new_uninitialized( + self.table.alloc.clone(), + self.table.buckets(), + Fallibility::Infallible, + ) { Ok(table) => table, Err(_) => hint::unreachable_unchecked(), }, @@ -1208,26 +1496,26 @@ impl Clone for RawTable { } fn clone_from(&mut self, source: &Self) { - if source.is_empty_singleton() { - *self = Self::new(); + if source.table.is_empty_singleton() { + *self = Self::new_in(self.table.alloc.clone()); } else { unsafe { // First, drop all our elements without clearing the control bytes. - if mem::needs_drop::() && self.len() != 0 { - for item in self.iter() { - item.drop(); - } - } + self.drop_elements(); // If necessary, resize our table to match the source. if self.buckets() != source.buckets() { // Skip our drop by using ptr::write. - if !self.is_empty_singleton() { + if !self.table.is_empty_singleton() { self.free_buckets(); } (self as *mut Self).write( // Avoid `Result::unwrap_or_else` because it bloats LLVM IR. - match Self::new_uninitialized(source.buckets(), Fallibility::Infallible) { + match Self::new_uninitialized( + self.table.alloc.clone(), + source.buckets(), + Fallibility::Infallible, + ) { Ok(table) => table, Err(_) => hint::unreachable_unchecked(), }, @@ -1247,7 +1535,7 @@ impl Clone for RawTable { trait RawTableClone { unsafe fn clone_from_spec(&mut self, source: &Self, on_panic: impl FnMut(&mut Self)); } -impl RawTableClone for RawTable { +impl RawTableClone for RawTable { #[cfg_attr(feature = "inline-more", inline)] default_fn! { unsafe fn clone_from_spec(&mut self, source: &Self, on_panic: impl FnMut(&mut Self)) { @@ -1256,29 +1544,31 @@ impl RawTableClone for RawTable { } } #[cfg(feature = "nightly")] -impl RawTableClone for RawTable { +impl RawTableClone for RawTable { #[cfg_attr(feature = "inline-more", inline)] unsafe fn clone_from_spec(&mut self, source: &Self, _on_panic: impl FnMut(&mut Self)) { source + .table .ctrl(0) - .copy_to_nonoverlapping(self.ctrl(0), self.num_ctrl_bytes()); + .copy_to_nonoverlapping(self.table.ctrl(0), self.table.num_ctrl_bytes()); source .data_start() - .copy_to_nonoverlapping(self.data_start(), self.buckets()); + .copy_to_nonoverlapping(self.data_start(), self.table.buckets()); - self.items = source.items; - self.growth_left = source.growth_left; + self.table.items = source.table.items; + self.table.growth_left = source.table.growth_left; } } -impl RawTable { +impl RawTable { /// Common code for clone and clone_from. Assumes `self.buckets() == source.buckets()`. #[cfg_attr(feature = "inline-more", inline)] unsafe fn clone_from_impl(&mut self, source: &Self, mut on_panic: impl FnMut(&mut Self)) { // Copy the control bytes unchanged. We do this in a single pass source + .table .ctrl(0) - .copy_to_nonoverlapping(self.ctrl(0), self.num_ctrl_bytes()); + .copy_to_nonoverlapping(self.table.ctrl(0), self.table.num_ctrl_bytes()); // The cloning of elements may panic, in which case we need // to make sure we drop only the elements that have been @@ -1286,7 +1576,7 @@ impl RawTable { let mut guard = guard((0, &mut *self), |(index, self_)| { if mem::needs_drop::() && self_.len() != 0 { for i in 0..=*index { - if is_full(*self_.ctrl(i)) { + if is_full(*self_.table.ctrl(i)) { self_.bucket(i).drop(); } } @@ -1310,8 +1600,8 @@ impl RawTable { // Successfully cloned all items, no need to clean up. mem::forget(guard); - self.items = source.items; - self.growth_left = source.growth_left; + self.table.items = source.table.items; + self.table.growth_left = source.table.growth_left; } /// Variant of `clone_from` to use when a hasher is available. @@ -1321,8 +1611,8 @@ impl RawTable { // elements one by one. We don't do this if we have the same number of // buckets as the source since we can just copy the contents directly // in that case. - if self.buckets() != source.buckets() - && bucket_mask_to_capacity(self.bucket_mask) >= source.len() + if self.table.buckets() != source.table.buckets() + && bucket_mask_to_capacity(self.table.bucket_mask) >= source.len() { self.clear(); @@ -1343,8 +1633,7 @@ impl RawTable { // - there are no DELETED entries. // - we know there is enough space in the table. // - all elements are unique. - let index = guard_self.find_insert_slot(hash); - guard_self.set_ctrl(index, h2(hash)); + let (index, _) = guard_self.table.prepare_insert_slot(hash); guard_self.bucket(index).write(item); } } @@ -1352,53 +1641,52 @@ impl RawTable { // Successfully cloned all items, no need to clean up. mem::forget(guard_self); - self.items = source.items; - self.growth_left -= source.items; + self.table.items = source.table.items; + self.table.growth_left -= source.table.items; } else { self.clone_from(source); } } } +impl Default for RawTable { + #[cfg_attr(feature = "inline-more", inline)] + fn default() -> Self { + Self::new_in(Default::default()) + } +} + #[cfg(feature = "nightly")] -unsafe impl<#[may_dangle] T> Drop for RawTable { +unsafe impl<#[may_dangle] T, A: Allocator + Clone> Drop for RawTable { #[cfg_attr(feature = "inline-more", inline)] fn drop(&mut self) { - if !self.is_empty_singleton() { + if !self.table.is_empty_singleton() { unsafe { - if mem::needs_drop::() && self.len() != 0 { - for item in self.iter() { - item.drop(); - } - } + self.drop_elements(); self.free_buckets(); } } } } #[cfg(not(feature = "nightly"))] -impl Drop for RawTable { +impl Drop for RawTable { #[cfg_attr(feature = "inline-more", inline)] fn drop(&mut self) { - if !self.is_empty_singleton() { + if !self.table.is_empty_singleton() { unsafe { - if mem::needs_drop::() && self.len() != 0 { - for item in self.iter() { - item.drop(); - } - } + self.drop_elements(); self.free_buckets(); } } } } -impl IntoIterator for RawTable { +impl IntoIterator for RawTable { type Item = T; - type IntoIter = RawIntoIter; + type IntoIter = RawIntoIter; #[cfg_attr(feature = "inline-more", inline)] - fn into_iter(self) -> RawIntoIter { + fn into_iter(self) -> RawIntoIter { unsafe { let iter = self.iter(); self.into_iter_from(iter) @@ -1681,6 +1969,14 @@ impl RawIter { } } } + + unsafe fn drop_elements(&mut self) { + if mem::needs_drop::() && self.len() != 0 { + for item in self { + item.drop(); + } + } + } } impl Clone for RawIter { @@ -1720,62 +2016,55 @@ impl ExactSizeIterator for RawIter {} impl FusedIterator for RawIter {} /// Iterator which consumes a table and returns elements. -pub struct RawIntoIter { +pub struct RawIntoIter { iter: RawIter, - alloc: Option<(NonNull, Layout)>, + allocation: Option<(NonNull, Layout)>, marker: PhantomData, + alloc: A, } -impl RawIntoIter { +impl RawIntoIter { #[cfg_attr(feature = "inline-more", inline)] pub fn iter(&self) -> RawIter { self.iter.clone() } } -unsafe impl Send for RawIntoIter where T: Send {} -unsafe impl Sync for RawIntoIter where T: Sync {} +unsafe impl Send for RawIntoIter where T: Send {} +unsafe impl Sync for RawIntoIter where T: Sync {} #[cfg(feature = "nightly")] -unsafe impl<#[may_dangle] T> Drop for RawIntoIter { +unsafe impl<#[may_dangle] T, A: Allocator + Clone> Drop for RawIntoIter { #[cfg_attr(feature = "inline-more", inline)] fn drop(&mut self) { unsafe { // Drop all remaining elements - if mem::needs_drop::() && self.iter.len() != 0 { - while let Some(item) = self.iter.next() { - item.drop(); - } - } + self.iter.drop_elements(); // Free the table - if let Some((ptr, layout)) = self.alloc { - dealloc(ptr.as_ptr(), layout); + if let Some((ptr, layout)) = self.allocation { + self.alloc.deallocate(ptr, layout); } } } } #[cfg(not(feature = "nightly"))] -impl Drop for RawIntoIter { +impl Drop for RawIntoIter { #[cfg_attr(feature = "inline-more", inline)] fn drop(&mut self) { unsafe { // Drop all remaining elements - if mem::needs_drop::() && self.iter.len() != 0 { - while let Some(item) = self.iter.next() { - item.drop(); - } - } + self.iter.drop_elements(); // Free the table - if let Some((ptr, layout)) = self.alloc { - dealloc(ptr.as_ptr(), layout); + if let Some((ptr, layout)) = self.allocation { + self.alloc.deallocate(ptr, layout); } } } } -impl Iterator for RawIntoIter { +impl Iterator for RawIntoIter { type Item = T; #[cfg_attr(feature = "inline-more", inline)] @@ -1789,44 +2078,40 @@ impl Iterator for RawIntoIter { } } -impl ExactSizeIterator for RawIntoIter {} -impl FusedIterator for RawIntoIter {} +impl ExactSizeIterator for RawIntoIter {} +impl FusedIterator for RawIntoIter {} /// Iterator which consumes elements without freeing the table storage. -pub struct RawDrain<'a, T> { +pub struct RawDrain<'a, T, A: Allocator + Clone = Global> { iter: RawIter, // The table is moved into the iterator for the duration of the drain. This // ensures that an empty table is left if the drain iterator is leaked // without dropping. - table: ManuallyDrop>, - orig_table: NonNull>, + table: ManuallyDrop>, + orig_table: NonNull>, // We don't use a &'a mut RawTable because we want RawDrain to be // covariant over T. - marker: PhantomData<&'a RawTable>, + marker: PhantomData<&'a RawTable>, } -impl RawDrain<'_, T> { +impl RawDrain<'_, T, A> { #[cfg_attr(feature = "inline-more", inline)] pub fn iter(&self) -> RawIter { self.iter.clone() } } -unsafe impl Send for RawDrain<'_, T> where T: Send {} -unsafe impl Sync for RawDrain<'_, T> where T: Sync {} +unsafe impl Send for RawDrain<'_, T, A> where T: Send {} +unsafe impl Sync for RawDrain<'_, T, A> where T: Sync {} -impl Drop for RawDrain<'_, T> { +impl Drop for RawDrain<'_, T, A> { #[cfg_attr(feature = "inline-more", inline)] fn drop(&mut self) { unsafe { // Drop all remaining elements. Note that this may panic. - if mem::needs_drop::() && self.iter.len() != 0 { - while let Some(item) = self.iter.next() { - item.drop(); - } - } + self.iter.drop_elements(); // Reset the contents of the table now that all elements have been // dropped. @@ -1840,7 +2125,7 @@ impl Drop for RawDrain<'_, T> { } } -impl Iterator for RawDrain<'_, T> { +impl Iterator for RawDrain<'_, T, A> { type Item = T; #[cfg_attr(feature = "inline-more", inline)] @@ -1857,14 +2142,19 @@ impl Iterator for RawDrain<'_, T> { } } -impl ExactSizeIterator for RawDrain<'_, T> {} -impl FusedIterator for RawDrain<'_, T> {} +impl ExactSizeIterator for RawDrain<'_, T, A> {} +impl FusedIterator for RawDrain<'_, T, A> {} /// Iterator over occupied buckets that could match a given hash. /// /// In rare cases, the iterator may return a bucket with a different hash. -pub struct RawIterHash<'a, T> { - table: &'a RawTable, +pub struct RawIterHash<'a, T, A: Allocator + Clone = Global> { + inner: RawIterHashInner<'a, A>, + _marker: PhantomData, +} + +struct RawIterHashInner<'a, A: Allocator + Clone> { + table: &'a RawTableInner, // The top 7 bits of the hash. h2_hash: u8, @@ -1872,28 +2162,34 @@ pub struct RawIterHash<'a, T> { // The sequence of groups to probe in the search. probe_seq: ProbeSeq, - // The current group and its position. - pos: usize, group: Group, // The elements within the group with a matching h2-hash. bitmask: BitMaskIter, } -impl<'a, T> RawIterHash<'a, T> { - fn new(table: &'a RawTable, hash: u64) -> Self { +impl<'a, T, A: Allocator + Clone> RawIterHash<'a, T, A> { + #[cfg_attr(feature = "inline-more", inline)] + fn new(table: &'a RawTable, hash: u64) -> Self { + RawIterHash { + inner: RawIterHashInner::new(&table.table, hash), + _marker: PhantomData, + } + } +} +impl<'a, A: Allocator + Clone> RawIterHashInner<'a, A> { + #[cfg_attr(feature = "inline-more", inline)] + fn new(table: &'a RawTableInner, hash: u64) -> Self { unsafe { let h2_hash = h2(hash); - let mut probe_seq = table.probe_seq(hash); - let pos = probe_seq.next().unwrap(); - let group = Group::load(table.ctrl(pos)); + let probe_seq = table.probe_seq(hash); + let group = Group::load(table.ctrl(probe_seq.pos)); let bitmask = group.match_byte(h2_hash).into_iter(); - RawIterHash { + RawIterHashInner { table, h2_hash, probe_seq, - pos, group, bitmask, } @@ -1901,24 +2197,66 @@ impl<'a, T> RawIterHash<'a, T> { } } -impl<'a, T> Iterator for RawIterHash<'a, T> { +impl<'a, T, A: Allocator + Clone> Iterator for RawIterHash<'a, T, A> { type Item = Bucket; fn next(&mut self) -> Option> { + unsafe { + match self.inner.next() { + Some(index) => Some(self.inner.table.bucket(index)), + None => None, + } + } + } +} + +impl<'a, A: Allocator + Clone> Iterator for RawIterHashInner<'a, A> { + type Item = usize; + + fn next(&mut self) -> Option { unsafe { loop { if let Some(bit) = self.bitmask.next() { - let index = (self.pos + bit) & self.table.bucket_mask; - let bucket = self.table.bucket(index); - return Some(bucket); + let index = (self.probe_seq.pos + bit) & self.table.bucket_mask; + return Some(index); } if likely(self.group.match_empty().any_bit_set()) { return None; } - self.pos = self.probe_seq.next().unwrap(); - self.group = Group::load(self.table.ctrl(self.pos)); + self.probe_seq.move_next(self.table.bucket_mask); + self.group = Group::load(self.table.ctrl(self.probe_seq.pos)); self.bitmask = self.group.match_byte(self.h2_hash).into_iter(); } } } } + +#[cfg(test)] +mod test_map { + use super::*; + + #[test] + fn rehash() { + let mut table = RawTable::new(); + let hasher = |i: &u64| *i; + for i in 0..100 { + table.insert(i, i, hasher); + } + + for i in 0..100 { + unsafe { + assert_eq!(table.find(i, |x| *x == i).map(|b| b.read()), Some(i)); + } + assert!(table.find(i + 100, |x| *x == i + 100).is_none()); + } + + table.rehash_in_place(hasher); + + for i in 0..100 { + unsafe { + assert_eq!(table.find(i, |x| *x == i).map(|b| b.read()), Some(i)); + } + assert!(table.find(i + 100, |x| *x == i + 100).is_none()); + } + } +} diff --git a/src/raw/sse2.rs b/src/raw/sse2.rs index a27bc09..eed9684 100644 --- a/src/raw/sse2.rs +++ b/src/raw/sse2.rs @@ -28,12 +28,13 @@ impl Group { /// value for an empty hash table. /// /// This is guaranteed to be aligned to the group size. + #[allow(clippy::items_after_statements)] pub const fn static_empty() -> &'static [u8; Group::WIDTH] { #[repr(C)] struct AlignedBytes { _align: [Group; 0], bytes: [u8; Group::WIDTH], - }; + } const ALIGNED_BYTES: AlignedBytes = AlignedBytes { _align: [], bytes: [EMPTY; Group::WIDTH], @@ -45,7 +46,7 @@ impl Group { #[inline] #[allow(clippy::cast_ptr_alignment)] // unaligned load pub unsafe fn load(ptr: *const u8) -> Self { - Group(x86::_mm_loadu_si128(ptr as *const _)) + Group(x86::_mm_loadu_si128(ptr.cast())) } /// Loads a group of bytes starting at the given address, which must be @@ -55,7 +56,7 @@ impl Group { pub unsafe fn load_aligned(ptr: *const u8) -> Self { // FIXME: use align_offset once it stabilizes debug_assert_eq!(ptr as usize & (mem::align_of::() - 1), 0); - Group(x86::_mm_load_si128(ptr as *const _)) + Group(x86::_mm_load_si128(ptr.cast())) } /// Stores the group of bytes to the given address, which must be @@ -65,7 +66,7 @@ impl Group { pub unsafe fn store_aligned(self, ptr: *mut u8) { // FIXME: use align_offset once it stabilizes debug_assert_eq!(ptr as usize & (mem::align_of::() - 1), 0); - x86::_mm_store_si128(ptr as *mut _, self.0); + x86::_mm_store_si128(ptr.cast(), self.0); } /// Returns a `BitMask` indicating all bytes in the group which have diff --git a/src/rustc_entry.rs b/src/rustc_entry.rs index b6ea7bc..1793c4a 100644 --- a/src/rustc_entry.rs +++ b/src/rustc_entry.rs @@ -1,14 +1,15 @@ use self::RustcEntry::*; -use crate::map::{make_hash, Drain, HashMap, IntoIter, Iter, IterMut}; -use crate::raw::{Bucket, RawTable}; +use crate::map::{make_insert_hash, Drain, HashMap, IntoIter, Iter, IterMut}; +use crate::raw::{Allocator, Bucket, Global, RawTable}; use core::fmt::{self, Debug}; use core::hash::{BuildHasher, Hash}; use core::mem; -impl HashMap +impl HashMap where K: Eq + Hash, S: BuildHasher, + A: Allocator + Clone, { /// Gets the given key's corresponding entry in the map for in-place manipulation. /// @@ -30,8 +31,8 @@ where /// assert_eq!(letters.get(&'y'), None); /// ``` #[cfg_attr(feature = "inline-more", inline)] - pub fn rustc_entry(&mut self, key: K) -> RustcEntry<'_, K, V> { - let hash = make_hash(&self.hash_builder, &key); + pub fn rustc_entry(&mut self, key: K) -> RustcEntry<'_, K, V, A> { + let hash = make_insert_hash(&self.hash_builder, &key); if let Some(elem) = self.table.find(hash, |q| q.0.eq(&key)) { RustcEntry::Occupied(RustcOccupiedEntry { key: Some(key), @@ -59,15 +60,18 @@ where /// /// [`HashMap`]: struct.HashMap.html /// [`entry`]: struct.HashMap.html#method.rustc_entry -pub enum RustcEntry<'a, K, V> { +pub enum RustcEntry<'a, K, V, A = Global> +where + A: Allocator + Clone, +{ /// An occupied entry. - Occupied(RustcOccupiedEntry<'a, K, V>), + Occupied(RustcOccupiedEntry<'a, K, V, A>), /// A vacant entry. - Vacant(RustcVacantEntry<'a, K, V>), + Vacant(RustcVacantEntry<'a, K, V, A>), } -impl Debug for RustcEntry<'_, K, V> { +impl Debug for RustcEntry<'_, K, V, A> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match *self { Vacant(ref v) => f.debug_tuple("Entry").field(v).finish(), @@ -80,26 +84,31 @@ impl Debug for RustcEntry<'_, K, V> { /// It is part of the [`RustcEntry`] enum. /// /// [`RustcEntry`]: enum.RustcEntry.html -pub struct RustcOccupiedEntry<'a, K, V> { +pub struct RustcOccupiedEntry<'a, K, V, A = Global> +where + A: Allocator + Clone, +{ key: Option, elem: Bucket<(K, V)>, - table: &'a mut RawTable<(K, V)>, + table: &'a mut RawTable<(K, V), A>, } -unsafe impl Send for RustcOccupiedEntry<'_, K, V> +unsafe impl Send for RustcOccupiedEntry<'_, K, V, A> where K: Send, V: Send, + A: Allocator + Clone + Send, { } -unsafe impl Sync for RustcOccupiedEntry<'_, K, V> +unsafe impl Sync for RustcOccupiedEntry<'_, K, V, A> where K: Sync, V: Sync, + A: Allocator + Clone + Sync, { } -impl Debug for RustcOccupiedEntry<'_, K, V> { +impl Debug for RustcOccupiedEntry<'_, K, V, A> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("OccupiedEntry") .field("key", self.key()) @@ -112,19 +121,22 @@ impl Debug for RustcOccupiedEntry<'_, K, V> { /// It is part of the [`RustcEntry`] enum. /// /// [`RustcEntry`]: enum.RustcEntry.html -pub struct RustcVacantEntry<'a, K, V> { +pub struct RustcVacantEntry<'a, K, V, A = Global> +where + A: Allocator + Clone, +{ hash: u64, key: K, - table: &'a mut RawTable<(K, V)>, + table: &'a mut RawTable<(K, V), A>, } -impl Debug for RustcVacantEntry<'_, K, V> { +impl Debug for RustcVacantEntry<'_, K, V, A> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_tuple("VacantEntry").field(self.key()).finish() } } -impl<'a, K, V> RustcEntry<'a, K, V> { +impl<'a, K, V, A: Allocator + Clone> RustcEntry<'a, K, V, A> { /// Sets the value of the entry, and returns a RustcOccupiedEntry. /// /// # Examples @@ -137,7 +149,7 @@ impl<'a, K, V> RustcEntry<'a, K, V> { /// /// assert_eq!(entry.key(), &"horseyland"); /// ``` - pub fn insert(self, value: V) -> RustcOccupiedEntry<'a, K, V> { + pub fn insert(self, value: V) -> RustcOccupiedEntry<'a, K, V, A> { match self { Vacant(entry) => entry.insert_entry(value), Occupied(mut entry) => { @@ -253,7 +265,7 @@ impl<'a, K, V> RustcEntry<'a, K, V> { } } -impl<'a, K, V: Default> RustcEntry<'a, K, V> { +impl<'a, K, V: Default, A: Allocator + Clone> RustcEntry<'a, K, V, A> { /// Ensures a value is in the entry by inserting the default value if empty, /// and returns a mutable reference to the value in the entry. /// @@ -281,7 +293,7 @@ impl<'a, K, V: Default> RustcEntry<'a, K, V> { } } -impl<'a, K, V> RustcOccupiedEntry<'a, K, V> { +impl<'a, K, V, A: Allocator + Clone> RustcOccupiedEntry<'a, K, V, A> { /// Gets a reference to the key in the entry. /// /// # Examples @@ -508,7 +520,7 @@ impl<'a, K, V> RustcOccupiedEntry<'a, K, V> { } } -impl<'a, K, V> RustcVacantEntry<'a, K, V> { +impl<'a, K, V, A: Allocator + Clone> RustcVacantEntry<'a, K, V, A> { /// Gets a reference to the key that would be used when inserting a value /// through the `RustcVacantEntry`. /// @@ -583,7 +595,7 @@ impl<'a, K, V> RustcVacantEntry<'a, K, V> { /// } /// ``` #[cfg_attr(feature = "inline-more", inline)] - pub fn insert_entry(self, value: V) -> RustcOccupiedEntry<'a, K, V> { + pub fn insert_entry(self, value: V) -> RustcOccupiedEntry<'a, K, V, A> { let bucket = self.table.insert_no_grow(self.hash, (self.key, value)); RustcOccupiedEntry { key: None, diff --git a/src/scopeguard.rs b/src/scopeguard.rs index 32c9694..4e9bf04 100644 --- a/src/scopeguard.rs +++ b/src/scopeguard.rs @@ -9,7 +9,7 @@ where value: T, } -#[cfg_attr(feature = "inline-more", inline)] +#[inline] pub fn guard(value: T, dropfn: F) -> ScopeGuard where F: FnMut(&mut T), @@ -22,7 +22,7 @@ where F: FnMut(&mut T), { type Target = T; - #[cfg_attr(feature = "inline-more", inline)] + #[inline] fn deref(&self) -> &T { &self.value } @@ -32,7 +32,7 @@ impl DerefMut for ScopeGuard where F: FnMut(&mut T), { - #[cfg_attr(feature = "inline-more", inline)] + #[inline] fn deref_mut(&mut self) -> &mut T { &mut self.value } @@ -42,7 +42,7 @@ impl Drop for ScopeGuard where F: FnMut(&mut T), { - #[cfg_attr(feature = "inline-more", inline)] + #[inline] fn drop(&mut self) { (self.dropfn)(&mut self.value) } diff --git a/src/set.rs b/src/set.rs index b8460fd..d59183b 100644 --- a/src/set.rs +++ b/src/set.rs @@ -8,6 +8,7 @@ use core::mem; use core::ops::{BitAnd, BitOr, BitXor, Sub}; use super::map::{self, ConsumeAllOnDrop, DefaultHashBuilder, DrainFilterInner, HashMap, Keys}; +use crate::raw::{Allocator, Global}; // Future Optimization (FIXME!) // ============================= @@ -71,7 +72,7 @@ use super::map::{self, ConsumeAllOnDrop, DefaultHashBuilder, DrainFilterInner, H /// ``` /// /// The easiest way to use `HashSet` with a custom type is to derive -/// [`Eq`] and [`Hash`]. We must also derive [`PartialEq`], this will in the +/// [`Eq`] and [`Hash`]. We must also derive [`PartialEq`]. This will in the /// future be implied by [`Eq`]. /// /// ``` @@ -111,11 +112,11 @@ use super::map::{self, ConsumeAllOnDrop, DefaultHashBuilder, DrainFilterInner, H /// [`HashMap`]: struct.HashMap.html /// [`PartialEq`]: https://doc.rust-lang.org/std/cmp/trait.PartialEq.html /// [`RefCell`]: https://doc.rust-lang.org/std/cell/struct.RefCell.html -pub struct HashSet { - pub(crate) map: HashMap, +pub struct HashSet { + pub(crate) map: HashMap, } -impl Clone for HashSet { +impl Clone for HashSet { fn clone(&self) -> Self { HashSet { map: self.map.clone(), @@ -167,73 +168,47 @@ impl HashSet { } } -impl HashSet { - /// Creates a new empty hash set which will use the given hasher to hash - /// keys. - /// - /// The hash set is also created with the default initial capacity. - /// - /// Warning: `hasher` is normally randomly generated, and - /// is designed to allow `HashSet`s to be resistant to attacks that - /// cause many collisions and very poor performance. Setting it - /// manually using this function can expose a DoS attack vector. - /// - /// The `hash_builder` passed should implement the [`BuildHasher`] trait for - /// the HashMap to be useful, see its documentation for details. +#[cfg(feature = "ahash")] +impl HashSet { + /// Creates an empty `HashSet`. /// + /// The hash set is initially created with a capacity of 0, so it will not allocate until it + /// is first inserted into. /// /// # Examples /// /// ``` /// use hashbrown::HashSet; - /// use hashbrown::hash_map::DefaultHashBuilder; - /// - /// let s = DefaultHashBuilder::default(); - /// let mut set = HashSet::with_hasher(s); - /// set.insert(2); + /// let set: HashSet = HashSet::new(); /// ``` - /// - /// [`BuildHasher`]: ../../std/hash/trait.BuildHasher.html #[cfg_attr(feature = "inline-more", inline)] - pub const fn with_hasher(hasher: S) -> Self { + pub fn new_in(alloc: A) -> Self { Self { - map: HashMap::with_hasher(hasher), + map: HashMap::new_in(alloc), } } - /// Creates an empty `HashSet` with the specified capacity, using - /// `hasher` to hash the keys. + /// Creates an empty `HashSet` with the specified capacity. /// /// The hash set will be able to hold at least `capacity` elements without /// reallocating. If `capacity` is 0, the hash set will not allocate. /// - /// Warning: `hasher` is normally randomly generated, and - /// is designed to allow `HashSet`s to be resistant to attacks that - /// cause many collisions and very poor performance. Setting it - /// manually using this function can expose a DoS attack vector. - /// - /// The `hash_builder` passed should implement the [`BuildHasher`] trait for - /// the HashMap to be useful, see its documentation for details. - /// /// # Examples /// /// ``` /// use hashbrown::HashSet; - /// use hashbrown::hash_map::DefaultHashBuilder; - /// - /// let s = DefaultHashBuilder::default(); - /// let mut set = HashSet::with_capacity_and_hasher(10, s); - /// set.insert(1); + /// let set: HashSet = HashSet::with_capacity(10); + /// assert!(set.capacity() >= 10); /// ``` - /// - /// [`BuildHasher`]: ../../std/hash/trait.BuildHasher.html #[cfg_attr(feature = "inline-more", inline)] - pub fn with_capacity_and_hasher(capacity: usize, hasher: S) -> Self { + pub fn with_capacity_in(capacity: usize, alloc: A) -> Self { Self { - map: HashMap::with_capacity_and_hasher(capacity, hasher), + map: HashMap::with_capacity_in(capacity, alloc), } } +} +impl HashSet { /// Returns the number of elements the set can hold without reallocating. /// /// # Examples @@ -323,7 +298,7 @@ impl HashSet { /// assert!(set.is_empty()); /// ``` #[cfg_attr(feature = "inline-more", inline)] - pub fn drain(&mut self) -> Drain<'_, T> { + pub fn drain(&mut self) -> Drain<'_, T, A> { Drain { iter: self.map.drain(), } @@ -376,7 +351,7 @@ impl HashSet { /// assert_eq!(odds, vec![1, 3, 5, 7]); /// ``` #[cfg_attr(feature = "inline-more", inline)] - pub fn drain_filter(&mut self, f: F) -> DrainFilter<'_, T, F> + pub fn drain_filter(&mut self, f: F) -> DrainFilter<'_, T, F, A> where F: FnMut(&T) -> bool, { @@ -405,6 +380,134 @@ impl HashSet { pub fn clear(&mut self) { self.map.clear() } +} + +impl HashSet { + /// Creates a new empty hash set which will use the given hasher to hash + /// keys. + /// + /// The hash set is also created with the default initial capacity. + /// + /// Warning: `hasher` is normally randomly generated, and + /// is designed to allow `HashSet`s to be resistant to attacks that + /// cause many collisions and very poor performance. Setting it + /// manually using this function can expose a DoS attack vector. + /// + /// The `hash_builder` passed should implement the [`BuildHasher`] trait for + /// the HashMap to be useful, see its documentation for details. + /// + /// + /// # Examples + /// + /// ``` + /// use hashbrown::HashSet; + /// use hashbrown::hash_map::DefaultHashBuilder; + /// + /// let s = DefaultHashBuilder::default(); + /// let mut set = HashSet::with_hasher(s); + /// set.insert(2); + /// ``` + /// + /// [`BuildHasher`]: ../../std/hash/trait.BuildHasher.html + #[cfg_attr(feature = "inline-more", inline)] + pub const fn with_hasher(hasher: S) -> Self { + Self { + map: HashMap::with_hasher(hasher), + } + } + + /// Creates an empty `HashSet` with the specified capacity, using + /// `hasher` to hash the keys. + /// + /// The hash set will be able to hold at least `capacity` elements without + /// reallocating. If `capacity` is 0, the hash set will not allocate. + /// + /// Warning: `hasher` is normally randomly generated, and + /// is designed to allow `HashSet`s to be resistant to attacks that + /// cause many collisions and very poor performance. Setting it + /// manually using this function can expose a DoS attack vector. + /// + /// The `hash_builder` passed should implement the [`BuildHasher`] trait for + /// the HashMap to be useful, see its documentation for details. + /// + /// # Examples + /// + /// ``` + /// use hashbrown::HashSet; + /// use hashbrown::hash_map::DefaultHashBuilder; + /// + /// let s = DefaultHashBuilder::default(); + /// let mut set = HashSet::with_capacity_and_hasher(10, s); + /// set.insert(1); + /// ``` + /// + /// [`BuildHasher`]: ../../std/hash/trait.BuildHasher.html + #[cfg_attr(feature = "inline-more", inline)] + pub fn with_capacity_and_hasher(capacity: usize, hasher: S) -> Self { + Self { + map: HashMap::with_capacity_and_hasher(capacity, hasher), + } + } +} + +impl HashSet +where + A: Allocator + Clone, +{ + /// Creates a new empty hash set which will use the given hasher to hash + /// keys. + /// + /// The hash set is also created with the default initial capacity. + /// + /// Warning: `hasher` is normally randomly generated, and + /// is designed to allow `HashSet`s to be resistant to attacks that + /// cause many collisions and very poor performance. Setting it + /// manually using this function can expose a DoS attack vector. + /// + /// # Examples + /// + /// ``` + /// use hashbrown::HashSet; + /// use hashbrown::hash_map::DefaultHashBuilder; + /// + /// let s = DefaultHashBuilder::default(); + /// let mut set = HashSet::with_hasher(s); + /// set.insert(2); + /// ``` + #[cfg_attr(feature = "inline-more", inline)] + pub fn with_hasher_in(hasher: S, alloc: A) -> Self { + Self { + map: HashMap::with_hasher_in(hasher, alloc), + } + } + + /// Creates an empty `HashSet` with the specified capacity, using + /// `hasher` to hash the keys. + /// + /// The hash set will be able to hold at least `capacity` elements without + /// reallocating. If `capacity` is 0, the hash set will not allocate. + /// + /// Warning: `hasher` is normally randomly generated, and + /// is designed to allow `HashSet`s to be resistant to attacks that + /// cause many collisions and very poor performance. Setting it + /// manually using this function can expose a DoS attack vector. + /// + /// # Examples + /// + /// ``` + /// use hashbrown::HashSet; + /// use hashbrown::hash_map::DefaultHashBuilder; + /// + /// let s = DefaultHashBuilder::default(); + /// let mut set = HashSet::with_capacity_and_hasher(10, s); + /// set.insert(1); + /// ``` + #[cfg_attr(feature = "inline-more", inline)] + pub fn with_capacity_and_hasher_in(capacity: usize, hasher: S, alloc: A) -> Self { + Self { + map: HashMap::with_capacity_and_hasher_in(capacity, hasher, alloc), + } + } /// Returns a reference to the set's [`BuildHasher`]. /// @@ -426,10 +529,11 @@ impl HashSet { } } -impl HashSet +impl HashSet where T: Eq + Hash, S: BuildHasher, + A: Allocator + Clone, { /// Reserves capacity for at least `additional` more elements to be inserted /// in the `HashSet`. The collection may reserve more space to avoid @@ -544,7 +648,7 @@ where /// assert_eq!(diff, [4].iter().collect()); /// ``` #[cfg_attr(feature = "inline-more", inline)] - pub fn difference<'a>(&'a self, other: &'a Self) -> Difference<'a, T, S> { + pub fn difference<'a>(&'a self, other: &'a Self) -> Difference<'a, T, S, A> { Difference { iter: self.iter(), other, @@ -573,7 +677,7 @@ where /// assert_eq!(diff1, [1, 4].iter().collect()); /// ``` #[cfg_attr(feature = "inline-more", inline)] - pub fn symmetric_difference<'a>(&'a self, other: &'a Self) -> SymmetricDifference<'a, T, S> { + pub fn symmetric_difference<'a>(&'a self, other: &'a Self) -> SymmetricDifference<'a, T, S, A> { SymmetricDifference { iter: self.difference(other).chain(other.difference(self)), } @@ -598,7 +702,7 @@ where /// assert_eq!(intersection, [2, 3].iter().collect()); /// ``` #[cfg_attr(feature = "inline-more", inline)] - pub fn intersection<'a>(&'a self, other: &'a Self) -> Intersection<'a, T, S> { + pub fn intersection<'a>(&'a self, other: &'a Self) -> Intersection<'a, T, S, A> { let (smaller, larger) = if self.len() <= other.len() { (self, other) } else { @@ -629,8 +733,10 @@ where /// assert_eq!(union, [1, 2, 3, 4].iter().collect()); /// ``` #[cfg_attr(feature = "inline-more", inline)] - pub fn union<'a>(&'a self, other: &'a Self) -> Union<'a, T, S> { - let (smaller, larger) = if self.len() >= other.len() { + pub fn union<'a>(&'a self, other: &'a Self) -> Union<'a, T, S, A> { + // We'll iterate one set in full, and only the remaining difference from the other. + // Use the smaller set for the difference in order to reduce hash lookups. + let (smaller, larger) = if self.len() <= other.len() { (self, other) } else { (other, self) @@ -967,10 +1073,11 @@ where } } -impl PartialEq for HashSet +impl PartialEq for HashSet where T: Eq + Hash, S: BuildHasher, + A: Allocator + Clone, { fn eq(&self, other: &Self) -> bool { if self.len() != other.len() { @@ -981,40 +1088,53 @@ where } } -impl Eq for HashSet +impl Eq for HashSet where T: Eq + Hash, S: BuildHasher, + A: Allocator + Clone, { } -impl fmt::Debug for HashSet +impl fmt::Debug for HashSet where T: Eq + Hash + fmt::Debug, S: BuildHasher, + A: Allocator + Clone, { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_set().entries(self.iter()).finish() } } -impl FromIterator for HashSet +impl From> for HashSet +where + A: Allocator + Clone, +{ + fn from(map: HashMap) -> Self { + Self { map } + } +} + +impl FromIterator for HashSet where T: Eq + Hash, S: BuildHasher + Default, + A: Default + Allocator + Clone, { #[cfg_attr(feature = "inline-more", inline)] fn from_iter>(iter: I) -> Self { - let mut set = Self::with_hasher(Default::default()); + let mut set = Self::with_hasher_in(Default::default(), Default::default()); set.extend(iter); set } } -impl Extend for HashSet +impl Extend for HashSet where T: Eq + Hash, S: BuildHasher, + A: Allocator + Clone, { #[cfg_attr(feature = "inline-more", inline)] fn extend>(&mut self, iter: I) { @@ -1034,10 +1154,11 @@ where } } -impl<'a, T, S> Extend<&'a T> for HashSet +impl<'a, T, S, A> Extend<&'a T> for HashSet where T: 'a + Eq + Hash + Copy, S: BuildHasher, + A: Allocator + Clone, { #[cfg_attr(feature = "inline-more", inline)] fn extend>(&mut self, iter: I) { @@ -1057,9 +1178,10 @@ where } } -impl Default for HashSet +impl Default for HashSet where S: Default, + A: Default + Allocator + Clone, { /// Creates an empty `HashSet` with the `Default` value for the hasher. #[cfg_attr(feature = "inline-more", inline)] @@ -1070,10 +1192,11 @@ where } } -impl BitOr<&HashSet> for &HashSet +impl BitOr<&HashSet> for &HashSet where T: Eq + Hash + Clone, S: BuildHasher + Default, + A: Allocator + Clone, { type Output = HashSet; @@ -1097,15 +1220,16 @@ where /// } /// assert_eq!(i, expected.len()); /// ``` - fn bitor(self, rhs: &HashSet) -> HashSet { + fn bitor(self, rhs: &HashSet) -> HashSet { self.union(rhs).cloned().collect() } } -impl BitAnd<&HashSet> for &HashSet +impl BitAnd<&HashSet> for &HashSet where T: Eq + Hash + Clone, S: BuildHasher + Default, + A: Allocator + Clone, { type Output = HashSet; @@ -1129,7 +1253,7 @@ where /// } /// assert_eq!(i, expected.len()); /// ``` - fn bitand(self, rhs: &HashSet) -> HashSet { + fn bitand(self, rhs: &HashSet) -> HashSet { self.intersection(rhs).cloned().collect() } } @@ -1216,8 +1340,8 @@ pub struct Iter<'a, K> { /// /// [`HashSet`]: struct.HashSet.html /// [`into_iter`]: struct.HashSet.html#method.into_iter -pub struct IntoIter { - iter: map::IntoIter, +pub struct IntoIter { + iter: map::IntoIter, } /// A draining iterator over the items of a `HashSet`. @@ -1227,8 +1351,8 @@ pub struct IntoIter { /// /// [`HashSet`]: struct.HashSet.html /// [`drain`]: struct.HashSet.html#method.drain -pub struct Drain<'a, K> { - iter: map::Drain<'a, K, ()>, +pub struct Drain<'a, K, A: Allocator + Clone = Global> { + iter: map::Drain<'a, K, (), A>, } /// A draining iterator over entries of a `HashSet` which don't satisfy the predicate `f`. @@ -1238,12 +1362,12 @@ pub struct Drain<'a, K> { /// /// [`drain_filter`]: struct.HashSet.html#method.drain_filter /// [`HashSet`]: struct.HashSet.html -pub struct DrainFilter<'a, K, F> +pub struct DrainFilter<'a, K, F, A: Allocator + Clone = Global> where F: FnMut(&K) -> bool, { f: F, - inner: DrainFilterInner<'a, K, ()>, + inner: DrainFilterInner<'a, K, (), A>, } /// A lazy iterator producing elements in the intersection of `HashSet`s. @@ -1253,11 +1377,11 @@ where /// /// [`HashSet`]: struct.HashSet.html /// [`intersection`]: struct.HashSet.html#method.intersection -pub struct Intersection<'a, T, S> { +pub struct Intersection<'a, T, S, A: Allocator + Clone = Global> { // iterator of the first set iter: Iter<'a, T>, // the second set - other: &'a HashSet, + other: &'a HashSet, } /// A lazy iterator producing elements in the difference of `HashSet`s. @@ -1267,11 +1391,11 @@ pub struct Intersection<'a, T, S> { /// /// [`HashSet`]: struct.HashSet.html /// [`difference`]: struct.HashSet.html#method.difference -pub struct Difference<'a, T, S> { +pub struct Difference<'a, T, S, A: Allocator + Clone = Global> { // iterator of the first set iter: Iter<'a, T>, // the second set - other: &'a HashSet, + other: &'a HashSet, } /// A lazy iterator producing elements in the symmetric difference of `HashSet`s. @@ -1281,8 +1405,8 @@ pub struct Difference<'a, T, S> { /// /// [`HashSet`]: struct.HashSet.html /// [`symmetric_difference`]: struct.HashSet.html#method.symmetric_difference -pub struct SymmetricDifference<'a, T, S> { - iter: Chain, Difference<'a, T, S>>, +pub struct SymmetricDifference<'a, T, S, A: Allocator + Clone = Global> { + iter: Chain, Difference<'a, T, S, A>>, } /// A lazy iterator producing elements in the union of `HashSet`s. @@ -1292,11 +1416,11 @@ pub struct SymmetricDifference<'a, T, S> { /// /// [`HashSet`]: struct.HashSet.html /// [`union`]: struct.HashSet.html#method.union -pub struct Union<'a, T, S> { - iter: Chain, Difference<'a, T, S>>, +pub struct Union<'a, T, S, A: Allocator + Clone = Global> { + iter: Chain, Difference<'a, T, S, A>>, } -impl<'a, T, S> IntoIterator for &'a HashSet { +impl<'a, T, S, A: Allocator + Clone> IntoIterator for &'a HashSet { type Item = &'a T; type IntoIter = Iter<'a, T>; @@ -1306,9 +1430,9 @@ impl<'a, T, S> IntoIterator for &'a HashSet { } } -impl IntoIterator for HashSet { +impl IntoIterator for HashSet { type Item = T; - type IntoIter = IntoIter; + type IntoIter = IntoIter; /// Creates a consuming iterator, that is, one that moves each value out /// of the set in arbitrary order. The set cannot be used after calling @@ -1331,7 +1455,7 @@ impl IntoIterator for HashSet { /// } /// ``` #[cfg_attr(feature = "inline-more", inline)] - fn into_iter(self) -> IntoIter { + fn into_iter(self) -> IntoIter { IntoIter { iter: self.map.into_iter(), } @@ -1372,7 +1496,7 @@ impl fmt::Debug for Iter<'_, K> { } } -impl Iterator for IntoIter { +impl Iterator for IntoIter { type Item = K; #[cfg_attr(feature = "inline-more", inline)] @@ -1388,22 +1512,22 @@ impl Iterator for IntoIter { self.iter.size_hint() } } -impl ExactSizeIterator for IntoIter { +impl ExactSizeIterator for IntoIter { #[cfg_attr(feature = "inline-more", inline)] fn len(&self) -> usize { self.iter.len() } } -impl FusedIterator for IntoIter {} +impl FusedIterator for IntoIter {} -impl fmt::Debug for IntoIter { +impl fmt::Debug for IntoIter { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let entries_iter = self.iter.iter().map(|(k, _)| k); f.debug_list().entries(entries_iter).finish() } } -impl Iterator for Drain<'_, K> { +impl Iterator for Drain<'_, K, A> { type Item = K; #[cfg_attr(feature = "inline-more", inline)] @@ -1419,22 +1543,22 @@ impl Iterator for Drain<'_, K> { self.iter.size_hint() } } -impl ExactSizeIterator for Drain<'_, K> { +impl ExactSizeIterator for Drain<'_, K, A> { #[cfg_attr(feature = "inline-more", inline)] fn len(&self) -> usize { self.iter.len() } } -impl FusedIterator for Drain<'_, K> {} +impl FusedIterator for Drain<'_, K, A> {} -impl fmt::Debug for Drain<'_, K> { +impl fmt::Debug for Drain<'_, K, A> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let entries_iter = self.iter.iter().map(|(k, _)| k); f.debug_list().entries(entries_iter).finish() } } -impl<'a, K, F> Drop for DrainFilter<'a, K, F> +impl<'a, K, F, A: Allocator + Clone> Drop for DrainFilter<'a, K, F, A> where F: FnMut(&K) -> bool, { @@ -1448,7 +1572,7 @@ where } } -impl Iterator for DrainFilter<'_, K, F> +impl Iterator for DrainFilter<'_, K, F, A> where F: FnMut(&K) -> bool, { @@ -1467,9 +1591,12 @@ where } } -impl FusedIterator for DrainFilter<'_, K, F> where F: FnMut(&K) -> bool {} +impl FusedIterator for DrainFilter<'_, K, F, A> where + F: FnMut(&K) -> bool +{ +} -impl Clone for Intersection<'_, T, S> { +impl Clone for Intersection<'_, T, S, A> { #[cfg_attr(feature = "inline-more", inline)] fn clone(&self) -> Self { Intersection { @@ -1479,10 +1606,11 @@ impl Clone for Intersection<'_, T, S> { } } -impl<'a, T, S> Iterator for Intersection<'a, T, S> +impl<'a, T, S, A> Iterator for Intersection<'a, T, S, A> where T: Eq + Hash, S: BuildHasher, + A: Allocator + Clone, { type Item = &'a T; @@ -1503,24 +1631,26 @@ where } } -impl fmt::Debug for Intersection<'_, T, S> +impl fmt::Debug for Intersection<'_, T, S, A> where T: fmt::Debug + Eq + Hash, S: BuildHasher, + A: Allocator + Clone, { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_list().entries(self.clone()).finish() } } -impl FusedIterator for Intersection<'_, T, S> +impl FusedIterator for Intersection<'_, T, S, A> where T: Eq + Hash, S: BuildHasher, + A: Allocator + Clone, { } -impl Clone for Difference<'_, T, S> { +impl Clone for Difference<'_, T, S, A> { #[cfg_attr(feature = "inline-more", inline)] fn clone(&self) -> Self { Difference { @@ -1530,10 +1660,11 @@ impl Clone for Difference<'_, T, S> { } } -impl<'a, T, S> Iterator for Difference<'a, T, S> +impl<'a, T, S, A> Iterator for Difference<'a, T, S, A> where T: Eq + Hash, S: BuildHasher, + A: Allocator + Clone, { type Item = &'a T; @@ -1554,24 +1685,26 @@ where } } -impl FusedIterator for Difference<'_, T, S> +impl FusedIterator for Difference<'_, T, S, A> where T: Eq + Hash, S: BuildHasher, + A: Allocator + Clone, { } -impl fmt::Debug for Difference<'_, T, S> +impl fmt::Debug for Difference<'_, T, S, A> where T: fmt::Debug + Eq + Hash, S: BuildHasher, + A: Allocator + Clone, { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_list().entries(self.clone()).finish() } } -impl Clone for SymmetricDifference<'_, T, S> { +impl Clone for SymmetricDifference<'_, T, S, A> { #[cfg_attr(feature = "inline-more", inline)] fn clone(&self) -> Self { SymmetricDifference { @@ -1580,10 +1713,11 @@ impl Clone for SymmetricDifference<'_, T, S> { } } -impl<'a, T, S> Iterator for SymmetricDifference<'a, T, S> +impl<'a, T, S, A> Iterator for SymmetricDifference<'a, T, S, A> where T: Eq + Hash, S: BuildHasher, + A: Allocator + Clone, { type Item = &'a T; @@ -1597,24 +1731,26 @@ where } } -impl FusedIterator for SymmetricDifference<'_, T, S> +impl FusedIterator for SymmetricDifference<'_, T, S, A> where T: Eq + Hash, S: BuildHasher, + A: Allocator + Clone, { } -impl fmt::Debug for SymmetricDifference<'_, T, S> +impl fmt::Debug for SymmetricDifference<'_, T, S, A> where T: fmt::Debug + Eq + Hash, S: BuildHasher, + A: Allocator + Clone, { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_list().entries(self.clone()).finish() } } -impl Clone for Union<'_, T, S> { +impl Clone for Union<'_, T, S, A> { #[cfg_attr(feature = "inline-more", inline)] fn clone(&self) -> Self { Union { @@ -1623,27 +1759,30 @@ impl Clone for Union<'_, T, S> { } } -impl FusedIterator for Union<'_, T, S> +impl FusedIterator for Union<'_, T, S, A> where T: Eq + Hash, S: BuildHasher, + A: Allocator + Clone, { } -impl fmt::Debug for Union<'_, T, S> +impl fmt::Debug for Union<'_, T, S, A> where T: fmt::Debug + Eq + Hash, S: BuildHasher, + A: Allocator + Clone, { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_list().entries(self.clone()).finish() } } -impl<'a, T, S> Iterator for Union<'a, T, S> +impl<'a, T, S, A> Iterator for Union<'a, T, S, A> where T: Eq + Hash, S: BuildHasher, + A: Allocator + Clone, { type Item = &'a T; @@ -1665,30 +1804,34 @@ fn assert_covariance() { fn iter<'a, 'new>(v: Iter<'a, &'static str>) -> Iter<'a, &'new str> { v } - fn into_iter<'new>(v: IntoIter<&'static str>) -> IntoIter<&'new str> { + fn into_iter<'new, A: Allocator + Clone>( + v: IntoIter<&'static str, A>, + ) -> IntoIter<&'new str, A> { v } - fn difference<'a, 'new>( - v: Difference<'a, &'static str, DefaultHashBuilder>, - ) -> Difference<'a, &'new str, DefaultHashBuilder> { + fn difference<'a, 'new, A: Allocator + Clone>( + v: Difference<'a, &'static str, DefaultHashBuilder, A>, + ) -> Difference<'a, &'new str, DefaultHashBuilder, A> { v } - fn symmetric_difference<'a, 'new>( - v: SymmetricDifference<'a, &'static str, DefaultHashBuilder>, - ) -> SymmetricDifference<'a, &'new str, DefaultHashBuilder> { + fn symmetric_difference<'a, 'new, A: Allocator + Clone>( + v: SymmetricDifference<'a, &'static str, DefaultHashBuilder, A>, + ) -> SymmetricDifference<'a, &'new str, DefaultHashBuilder, A> { v } - fn intersection<'a, 'new>( - v: Intersection<'a, &'static str, DefaultHashBuilder>, - ) -> Intersection<'a, &'new str, DefaultHashBuilder> { + fn intersection<'a, 'new, A: Allocator + Clone>( + v: Intersection<'a, &'static str, DefaultHashBuilder, A>, + ) -> Intersection<'a, &'new str, DefaultHashBuilder, A> { v } - fn union<'a, 'new>( - v: Union<'a, &'static str, DefaultHashBuilder>, - ) -> Union<'a, &'new str, DefaultHashBuilder> { + fn union<'a, 'new, A: Allocator + Clone>( + v: Union<'a, &'static str, DefaultHashBuilder, A>, + ) -> Union<'a, &'new str, DefaultHashBuilder, A> { v } - fn drain<'new>(d: Drain<'static, &'static str>) -> Drain<'new, &'new str> { + fn drain<'new, A: Allocator + Clone>( + d: Drain<'static, &'static str, A>, + ) -> Drain<'new, &'new str, A> { d } } @@ -1904,6 +2047,23 @@ mod test_set { assert_eq!(i, expected.len()); } + #[test] + fn test_from_map() { + let mut a = crate::HashMap::new(); + a.insert(1, ()); + a.insert(2, ()); + a.insert(3, ()); + a.insert(4, ()); + + let a: HashSet<_> = a.into(); + + assert_eq!(a.len(), 4); + assert!(a.contains(&1)); + assert!(a.contains(&2)); + assert!(a.contains(&3)); + assert!(a.contains(&4)); + } + #[test] fn test_from_iter() { let xs = [1, 2, 2, 3, 4, 5, 6, 7, 8, 9]; @@ -2116,4 +2276,24 @@ mod test_set { set.insert(19); assert!(set.contains(&19)); } + + #[test] + fn rehash_in_place() { + let mut set = HashSet::new(); + + for i in 0..224 { + set.insert(i); + } + + assert_eq!( + set.capacity(), + 224, + "The set must be at or close to capacity to trigger a re hashing" + ); + + for i in 100..1400 { + set.remove(&(i - 100)); + set.insert(i); + } + } } diff --git a/tests/serde.rs b/tests/serde.rs index 570bf70..a642348 100644 --- a/tests/serde.rs +++ b/tests/serde.rs @@ -1,24 +1,24 @@ #![cfg(feature = "serde")] use core::hash::BuildHasherDefault; +use fnv::FnvHasher; use hashbrown::{HashMap, HashSet}; -use rustc_hash::FxHasher; use serde_test::{assert_tokens, Token}; -// We use FxHash for this test because we rely on the ordering -type FxHashMap = HashMap>; -type FxHashSet = HashSet>; +// We use FnvHash for this test because we rely on the ordering +type FnvHashMap = HashMap>; +type FnvHashSet = HashSet>; #[test] fn map_serde_tokens_empty() { - let map = FxHashMap::::default(); + let map = FnvHashMap::::default(); assert_tokens(&map, &[Token::Map { len: Some(0) }, Token::MapEnd]); } #[test] fn map_serde_tokens() { - let mut map = FxHashMap::default(); + let mut map = FnvHashMap::default(); map.insert('b', 20); map.insert('a', 10); map.insert('c', 30); @@ -29,10 +29,10 @@ fn map_serde_tokens() { Token::Map { len: Some(3) }, Token::Char('a'), Token::I32(10), - Token::Char('b'), - Token::I32(20), Token::Char('c'), Token::I32(30), + Token::Char('b'), + Token::I32(20), Token::MapEnd, ], ); @@ -40,14 +40,14 @@ fn map_serde_tokens() { #[test] fn set_serde_tokens_empty() { - let set = FxHashSet::::default(); + let set = FnvHashSet::::default(); assert_tokens(&set, &[Token::Seq { len: Some(0) }, Token::SeqEnd]); } #[test] fn set_serde_tokens() { - let mut set = FxHashSet::default(); + let mut set = FnvHashSet::default(); set.insert(20); set.insert(10); set.insert(30); @@ -56,9 +56,9 @@ fn set_serde_tokens() { &set, &[ Token::Seq { len: Some(3) }, + Token::I32(30), Token::I32(20), Token::I32(10), - Token::I32(30), Token::SeqEnd, ], ); -- cgit v1.2.3