aboutsummaryrefslogtreecommitdiff
path: root/icing/util/crc32.cc
blob: d169acf4a562f9a6e024590cd85c8723b96c8cdc (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
// Copyright (C) 2019 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "icing/util/crc32.h"

#include <cstdint>

#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "icing/absl_ports/canonical_errors.h"
#include "icing/legacy/core/icing-string-util.h"
#include "icing/portable/zlib.h"

namespace icing {
namespace lib {

namespace {
uint32_t UpdateCrc32(uint32_t crc, const std::string_view str) {
  if (str.length() > 0) {
    // crc32() already includes a pre- and post-condition of taking the one's
    // complement of the value.
    crc =
        ~crc32(~crc, reinterpret_cast<const Bytef*>(str.data()), str.length());
  }
  return crc;
}
}  // namespace

uint32_t Crc32::Get() const { return crc_; }

uint32_t Crc32::Append(const std::string_view str) {
  crc_ = UpdateCrc32(crc_, str);
  return crc_;
}

libtextclassifier3::StatusOr<uint32_t> Crc32::UpdateWithXor(
    const std::string_view xored_str, int full_data_size, int position) {
  // For appending, use Append().
  if (position + xored_str.length() > full_data_size) {
    return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
        "offset position %d + length %zd > full data size %d", position,
        xored_str.length(), full_data_size));
  }

  // We have CRC(A|U|B) and we want CRC(A|V|B) where U is the slice
  // that updated to V.
  //
  // xored_str = X = U ^ V
  //
  // Some terminology:
  //   `|`: denotes concatenation, NOT the bitwise operator OR
  //
  //   (A|U|B): a concatenated string of A+U+B
  //
  //   CRC(A|U|B): The crc of a concatenated string of A+U+B
  //
  //   0_lenA: a string of 0's of the length of string A
  //
  //
  // (A|V|B) = (0_lenA|X|0_lenB) ^ (A|U|B)
  //
  // since CRC(D) = CRC(E) ^ CRC(F), where D = E ^ F:
  // CRC(A|V|B)
  //   = CRC(0_lenA|X|0_lenB) ^ CRC(A|U|B)
  //
  // and CRC(D|E) = CRC_COMBINE(D, E), so
  //   = CRC_COMBINE(CRC(0_lenA), CRC_COMBINE(CRC(X), CRC(0_lenB)) ^ CRC(A|U|B)
  //
  // and CRC(0) = 0, so
  //   = CRC_COMBINE(0, CRC_COMBINE(CRC(X), CRC(0_lenB)) ^ CRC(A|U|B)
  //
  // and CRC(0|B) = CRC(B), so
  //   = CRC_COMBINE(CRC(X), CRC(0_lenB)) ^ CRC(A|U|B)
  //
  // For more details, see this post by Mark Adler, one of the authors of zlib:
  // https://stackoverflow.com/questions/23122312/crc-calculation-of-a-mostly-static-data-stream/23126768#23126768

  uint32_t update_crc = UpdateCrc32(0, xored_str);
  update_crc = crc32_combine(update_crc, 0,
                             full_data_size - (position + xored_str.length()));
  crc_ ^= update_crc;
  return crc_;
}

}  // namespace lib
}  // namespace icing