aboutsummaryrefslogtreecommitdiff
path: root/icing/index/main/posting-list-accessor.h
blob: 3f93c3a12ac43df67178bd6802fc3366160a028e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
// Copyright (C) 2019 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef ICING_INDEX_POSTING_LIST_ACCESSOR_H_
#define ICING_INDEX_POSTING_LIST_ACCESSOR_H_

#include <cstdint>
#include <memory>
#include <vector>

#include "icing/text_classifier/lib3/utils/base/status.h"
#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "icing/file/posting_list/flash-index-storage.h"
#include "icing/file/posting_list/posting-list-identifier.h"
#include "icing/file/posting_list/posting-list-used.h"
#include "icing/index/hit/hit.h"
#include "icing/index/main/posting-list-used-hit-serializer.h"

namespace icing {
namespace lib {

// This class serves to:
//  1. Expose PostingListUseds to clients of FlashIndexStorage
//  2. Ensure the corresponding instance of IndexBlock has the same lifecycle as
//     the instance of PostingListUsed that the client has access to, while
//     not exposing IndexBlock's api surface.
//  3. Ensure that PostingListUseds can only be freed by calling methods which
//     will also properly maintain the FlashIndexStorage free list and prevent
//     callers from modifying the Posting List after freeing.

// This class is used to provide a simple abstraction for adding hits to posting
// lists. PostingListAccessor handles 1) selection of properly-sized posting
// lists for the accumulated hits during Finalize() and 2) chaining of max-sized
// posting lists.
class PostingListAccessor {
 public:
  // Creates an empty PostingListAccessor.
  //
  // RETURNS:
  //   - On success, a valid instance of PostingListAccessor
  //   - INVALID_ARGUMENT error if storage has an invalid block_size.
  static libtextclassifier3::StatusOr<PostingListAccessor> Create(
      FlashIndexStorage* storage, PostingListUsedHitSerializer* serializer);

  // Create a PostingListAccessor with an existing posting list identified by
  // existing_posting_list_id.
  //
  // The PostingListAccessor will add hits to this posting list until it is
  // necessary either to 1) chain the posting list (if it is max-sized) or 2)
  // move its hits to a larger posting list.
  //
  // RETURNS:
  //   - On success, a valid instance of PostingListAccessor
  //   - INVALID_ARGUMENT if storage has an invalid block_size.
  static libtextclassifier3::StatusOr<PostingListAccessor> CreateFromExisting(
      FlashIndexStorage* storage, PostingListUsedHitSerializer* serializer,
      PostingListIdentifier existing_posting_list_id);

  // Retrieve the next batch of hits for the posting list chain
  //
  // RETURNS:
  //   - On success, a vector of hits in the posting list chain
  //   - INTERNAL if called on an instance of PostingListAccessor that was
  //     created via PostingListAccessor::Create, if unable to read the next
  //     posting list in the chain or if the posting list has been corrupted
  //     somehow.
  libtextclassifier3::StatusOr<std::vector<Hit>> GetNextHitsBatch();

  // Prepend one hit. This may result in flushing the posting list to disk (if
  // the PostingListAccessor holds a max-sized posting list that is full) or
  // freeing a pre-existing posting list if it is too small to fit all hits
  // necessary.
  //
  // RETURNS:
  //   - OK, on success
  //   - INVALID_ARGUMENT if !hit.is_valid() or if hit is not less than the
  //   previously added hit.
  //   - RESOURCE_EXHAUSTED error if unable to grow the index to allocate a new
  //   posting list.
  libtextclassifier3::Status PrependHit(const Hit& hit);

  struct FinalizeResult {
    //   - OK on success
    //   - INVALID_ARGUMENT if there was no pre-existing posting list and no
    //     hits were added
    //   - RESOURCE_EXHAUSTED error if unable to grow the index to allocate a
    //     new posting list.
    libtextclassifier3::Status status;
    // Id of the posting list chain that was finalized. Guaranteed to be valid
    // if status is OK. May be valid if status is non-OK, but previous blocks
    // were written.
    PostingListIdentifier id;
  };
  // Write all accumulated hits to storage.
  //
  // If accessor points to a posting list chain with multiple posting lists in
  // the chain and unable to write the last posting list in the chain, Finalize
  // will return the error and also populate id with the id of the
  // second-to-last posting list.
  static FinalizeResult Finalize(PostingListAccessor accessor);

 private:
  explicit PostingListAccessor(
      FlashIndexStorage* storage, PostingListUsedHitSerializer* serializer,
      std::unique_ptr<uint8_t[]> posting_list_buffer_array,
      PostingListUsed posting_list_buffer)
      : storage_(storage),
        serializer_(serializer),
        prev_block_identifier_(PostingListIdentifier::kInvalid),
        posting_list_buffer_array_(std::move(posting_list_buffer_array)),
        posting_list_buffer_(std::move(posting_list_buffer)),
        has_reached_posting_list_chain_end_(false) {}

  // Flushes preexisting_posting_list_ to disk if it's a max-sized posting list
  // and populates prev_block_identifier.
  // If it's not a max-sized posting list, moves the contents of
  // preexisting_posting_list_ to posting_list_buffer_ and frees
  // preexisting_posting_list_.
  // Sets preexisting_posting_list_ to nullptr.
  void FlushPreexistingPostingList();

  // Flushes posting_list_buffer_ to a max-sized posting list on disk, setting
  // its next pointer to prev_block_identifier_ and updating
  // prev_block_identifier_ to point to the just-written posting list.
  libtextclassifier3::Status FlushInMemoryPostingList();

  // Frees all posting lists in the posting list chain starting at
  // prev_block_identifier_.
  libtextclassifier3::Status FreePostingListChain();

  FlashIndexStorage* storage_;  // Does not own.

  PostingListUsedHitSerializer* serializer_;  // Does not own.

  // The PostingListIdentifier of the first max-sized posting list in the
  // posting list chain or PostingListIdentifier::kInvalid if there is no
  // posting list chain.
  PostingListIdentifier prev_block_identifier_;

  // An editor to an existing posting list on disk. If available (non-NULL),
  // we'll try to add all hits to this posting list. Once this posting list
  // fills up, we'll either 1) chain it (if a max-sized posting list) and put
  // future hits in posting_list_buffer_ or 2) copy all of its hits into
  // posting_list_buffer_ and free this pl (if not a max-sized posting list).
  // TODO(tjbarron) provide a benchmark to demonstrate the effects that re-using
  // existing posting lists has on latency.
  std::unique_ptr<PostingListHolder> preexisting_posting_list_;

  // In-memory posting list used to buffer hits before writing them to the
  // smallest on-disk posting list that will fit them.
  // posting_list_buffer_array_ owns the memory region that posting_list_buffer_
  // interprets. Therefore, posting_list_buffer_array_ must have the same
  // lifecycle as posting_list_buffer_.
  std::unique_ptr<uint8_t[]> posting_list_buffer_array_;
  PostingListUsed posting_list_buffer_;

  bool has_reached_posting_list_chain_end_;
};

}  // namespace lib
}  // namespace icing

#endif  // ICING_INDEX_POSTING_LIST_ACCESSOR_H_