aboutsummaryrefslogtreecommitdiff
path: root/src/specialized/aarch64.rs
blob: 49de6b0567de57668d1b2a62c11e757db6a6c07f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
use std::arch::aarch64 as arch;

#[derive(Clone)]
pub struct State {
    state: u32,
}

impl State {
    pub fn new(state: u32) -> Option<Self> {
        if std::arch::is_aarch64_feature_detected!("crc") {
            // SAFETY: The conditions above ensure that all
            //         required instructions are supported by the CPU.
            Some(Self { state })
        } else {
            None
        }
    }

    pub fn update(&mut self, buf: &[u8]) {
        // SAFETY: The `State::new` constructor ensures that all
        //         required instructions are supported by the CPU.
        self.state = unsafe { calculate(self.state, buf) }
    }

    pub fn finalize(self) -> u32 {
        self.state
    }

    pub fn reset(&mut self) {
        self.state = 0;
    }

    pub fn combine(&mut self, other: u32, amount: u64) {
        self.state = ::combine::combine(self.state, other, amount);
    }
}

// target_feature is necessary to allow rustc to inline the crc32* wrappers
#[target_feature(enable = "crc")]
pub unsafe fn calculate(crc: u32, data: &[u8]) -> u32 {
    let mut c32 = !crc;
    let (pre_quad, quads, post_quad) = data.align_to::<u64>();

    c32 = pre_quad.iter().fold(c32, |acc, &b| arch::__crc32b(acc, b));

    // unrolling increases performance by a lot
    let mut quad_iter = quads.chunks_exact(8);
    for chunk in &mut quad_iter {
        c32 = arch::__crc32d(c32, chunk[0]);
        c32 = arch::__crc32d(c32, chunk[1]);
        c32 = arch::__crc32d(c32, chunk[2]);
        c32 = arch::__crc32d(c32, chunk[3]);
        c32 = arch::__crc32d(c32, chunk[4]);
        c32 = arch::__crc32d(c32, chunk[5]);
        c32 = arch::__crc32d(c32, chunk[6]);
        c32 = arch::__crc32d(c32, chunk[7]);
    }
    c32 = quad_iter
        .remainder()
        .iter()
        .fold(c32, |acc, &q| arch::__crc32d(acc, q));

    c32 = post_quad.iter().fold(c32, |acc, &b| arch::__crc32b(acc, b));

    !c32
}

#[cfg(test)]
mod test {
    quickcheck! {
        fn check_against_baseline(init: u32, chunks: Vec<(Vec<u8>, usize)>) -> bool {
            let mut baseline = super::super::super::baseline::State::new(init);
            let mut aarch64 = super::State::new(init).expect("not supported");
            for (chunk, mut offset) in chunks {
                // simulate random alignments by offsetting the slice by up to 15 bytes
                offset &= 0xF;
                if chunk.len() <= offset {
                    baseline.update(&chunk);
                    aarch64.update(&chunk);
                } else {
                    baseline.update(&chunk[offset..]);
                    aarch64.update(&chunk[offset..]);
                }
            }
            aarch64.finalize() == baseline.finalize()
        }
    }
}