aboutsummaryrefslogtreecommitdiff
path: root/icing/store/document-log-creator.cc
blob: a035f93b27c347c66dceef1ada4cf5f424d18fa8 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
// Copyright (C) 2021 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "icing/store/document-log-creator.h"

#include <memory>
#include <string>
#include <utility>

#include "icing/text_classifier/lib3/utils/base/logging.h"
#include "icing/text_classifier/lib3/utils/base/status.h"
#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "icing/absl_ports/annotate.h"
#include "icing/absl_ports/canonical_errors.h"
#include "icing/absl_ports/str_cat.h"
#include "icing/file/file-backed-proto-log.h"
#include "icing/file/filesystem.h"
#include "icing/file/portable-file-backed-proto-log.h"
#include "icing/proto/document_wrapper.pb.h"
#include "icing/util/logging.h"
#include "icing/util/status-macros.h"

namespace icing {
namespace lib {

namespace {

// Used in DocumentId mapper to mark a document as deleted
constexpr char kDocumentLogFilename[] = "document_log";

std::string DocumentLogFilenameV0() {
  // Originally only had this one version, no suffix.
  return kDocumentLogFilename;
}

std::string DocumentLogFilenameV1() {
  return absl_ports::StrCat(kDocumentLogFilename, "_v1");
}

std::string MakeDocumentLogFilenameV0(const std::string& base_dir) {
  return absl_ports::StrCat(base_dir, "/", DocumentLogFilenameV0());
}

std::string MakeDocumentLogFilenameV1(const std::string& base_dir) {
  return absl_ports::StrCat(base_dir, "/", DocumentLogFilenameV1());
}

}  // namespace

std::string DocumentLogCreator::GetDocumentLogFilename() {
  // This should always return the latest version of the document log in use.
  // The current latest version is V1.
  return DocumentLogFilenameV1();
}

libtextclassifier3::StatusOr<DocumentLogCreator::CreateResult>
DocumentLogCreator::Create(const Filesystem* filesystem,
                           const std::string& base_dir) {
  bool v0_exists =
      filesystem->FileExists(MakeDocumentLogFilenameV0(base_dir).c_str());
  bool regen_derived_files = false;

#ifdef ENABLE_V1_MIGRATION
  bool v1_exists =
      filesystem->FileExists(MakeDocumentLogFilenameV1(base_dir).c_str());

  if (v0_exists && !v1_exists) {
    ICING_RETURN_IF_ERROR(MigrateFromV0ToV1(filesystem, base_dir));

    // Need to regenerate derived files since documents may be written to a
    // different file offset in the log.
    regen_derived_files = true;
  } else if (!v1_exists) {
    // First time initializing a v1 log. There are no existing derived files at
    // this point, so we should generate some. "regenerate" here also means
    // "generate for the first time", i.e. we shouldn't expect there to be any
    // existing derived files.
    regen_derived_files = true;
  }
#else  // !ENABLE_V1_MIGRATION
  if (v0_exists) {
    // If migration from v0 to v1 is not enabled, then simply delete the v0 file
    // and treat this as if it's our first time initializing a v1 log.
    regen_derived_files = true;
    filesystem->DeleteFile(MakeDocumentLogFilenameV0(base_dir).c_str());
  }
#endif  // ENABLED_V1_MIGRATION

  ICING_ASSIGN_OR_RETURN(
      PortableFileBackedProtoLog<DocumentWrapper>::CreateResult
          log_create_result,
      PortableFileBackedProtoLog<DocumentWrapper>::Create(
          filesystem, MakeDocumentLogFilenameV1(base_dir),
          PortableFileBackedProtoLog<DocumentWrapper>::Options(
              /*compress_in=*/true)));

  CreateResult create_result = {std::move(log_create_result),
                                regen_derived_files};
  return create_result;
}

libtextclassifier3::Status DocumentLogCreator::MigrateFromV0ToV1(
    const Filesystem* filesystem, const std::string& base_dir) {
  ICING_VLOG(1) << "Migrating from v0 to v1 document log.";

  // Our v0 proto log was non-portable, create it so we can read protos out from
  // it.
  auto v0_create_result_or = FileBackedProtoLog<DocumentWrapper>::Create(
      filesystem, MakeDocumentLogFilenameV0(base_dir),
      FileBackedProtoLog<DocumentWrapper>::Options(
          /*compress_in=*/true));
  if (!v0_create_result_or.ok()) {
    return absl_ports::Annotate(
        v0_create_result_or.status(),
        "Failed to initialize v0 document log while migrating.");
    return v0_create_result_or.status();
  }
  FileBackedProtoLog<DocumentWrapper>::CreateResult v0_create_result =
      std::move(v0_create_result_or).ValueOrDie();
  std::unique_ptr<FileBackedProtoLog<DocumentWrapper>> v0_proto_log =
      std::move(v0_create_result.proto_log);

  // Create a v1 portable proto log that we will write our protos to.
  auto v1_create_result_or =
      PortableFileBackedProtoLog<DocumentWrapper>::Create(
          filesystem, MakeDocumentLogFilenameV1(base_dir),
          PortableFileBackedProtoLog<DocumentWrapper>::Options(
              /*compress_in=*/true));
  if (!v1_create_result_or.ok()) {
    return absl_ports::Annotate(
        v1_create_result_or.status(),
        "Failed to initialize v1 document log while migrating.");
  }
  PortableFileBackedProtoLog<DocumentWrapper>::CreateResult v1_create_result =
      std::move(v1_create_result_or).ValueOrDie();
  std::unique_ptr<PortableFileBackedProtoLog<DocumentWrapper>> v1_proto_log =
      std::move(v1_create_result.proto_log);

  // Dummy empty document to be used when copying over deleted documents.
  DocumentProto empty_document;

  // Start reading out from the old log and putting them in the new log.
  auto iterator = v0_proto_log->GetIterator();
  auto iterator_status = iterator.Advance();
  while (iterator_status.ok()) {
    libtextclassifier3::StatusOr<DocumentWrapper> document_wrapper_or =
        v0_proto_log->ReadProto(iterator.GetOffset());

    bool deleted_document = false;
    DocumentWrapper document_wrapper;
    if (absl_ports::IsNotFound(document_wrapper_or.status())) {
      // Proto was erased, we can skip copying this into our new log.
      *document_wrapper.mutable_document() = empty_document;
      deleted_document = true;
    } else if (!document_wrapper_or.ok()) {
      // Some real error, pass up
      return document_wrapper_or.status();
    } else {
      document_wrapper = std::move(document_wrapper_or).ValueOrDie();
    }

    auto offset_or = v1_proto_log->WriteProto(document_wrapper);
    if (!offset_or.ok()) {
      return absl_ports::Annotate(
          offset_or.status(),
          "Failed to write proto to v1 document log while migrating.");
    }

    // If the original document was deleted, erase the proto we just wrote.
    // We do this to maintain the document_ids, i.e. we still want document_id 2
    // to point to a deleted document even though we may not have the document
    // contents anymore. DocumentStore guarantees that the document_ids don't
    // change unless an Optimize is triggered.
    if (deleted_document) {
      int64_t offset = offset_or.ValueOrDie();
      auto erased_status = v1_proto_log->EraseProto(offset);
      if (!erased_status.ok()) {
        return absl_ports::Annotate(
            erased_status,
            "Failed to erase proto in v1 document log while migrating.");
      }
    }

    iterator_status = iterator.Advance();
  }

  // Close out our file log pointers.
  v0_proto_log.reset();
  v1_proto_log.reset();

  return libtextclassifier3::Status::OK;
}

}  // namespace lib
}  // namespace icing