aboutsummaryrefslogtreecommitdiff
path: root/proto/icing/proto/schema.proto
blob: ffb6f2c162e43c32a8e814a4317c4b04b76c9861 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
// Copyright 2019 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

syntax = "proto2";

package icing.lib;

import "icing/proto/status.proto";
import "icing/proto/term.proto";

option java_package = "com.google.android.icing.proto";
option java_multiple_files = true;
option objc_class_prefix = "ICNG";

// Defines the schema that every Document of a specific "type" should adhere
// to. These can be considered as definitions of rich structured types for
// Documents accepted by IcingSearchEngine.
//
// NOTE: Instances of SchemaTypeConfigProto are strongly recommended to be
// based on types defined in schema.org. This makes the data/config/code more
// shareable and easier to extend in the future.
//
// TODO(cassiewang) Define a sample proto file that can be used by tests and for
// documentation.
//
// Next tag: 6
message SchemaTypeConfigProto {
  // REQUIRED: Named type that uniquely identifies the structured, logical
  // schema being defined.
  //
  // Recommended format: Human readable string that's one of the types defined
  // in http://schema.org. Eg: DigitalDocument, Message, Person, etc.
  optional string schema_type = 1;

  // List of all properties that are supported by Documents of this type.
  // An Document should never have properties that are not listed here.
  //
  // TODO(cassiewang) Figure out if we should disallow, ignore or accept
  // unknown properties. Accepting them could make switching between versions
  // easier.
  repeated PropertyConfigProto properties = 4;

  // Version is an arbitrary number that the client may use to keep track of
  // different incarnations of the schema. Icing library imposes no requirements
  // on this field and will not validate it in anyway. If a client calls
  // SetSchema with a schema that contains one or more new version numbers, then
  // those version numbers will be updated so long as the SetSchema call
  // succeeds. Clients are free to leave the version number unset, in which case
  // it will default to value == 0.
  optional int32 version = 5;

  reserved 2, 3;
}

// Describes how a string property should be indexed.
// Next tag: 3
message StringIndexingConfig {
  // Indicates how the content of this property should be matched in the index.
  //
  // TermMatchType.Code=UNKNOWN
  // Content in this property will not be tokenized or indexed. Useful if the
  // data type is not indexable. See schema-util for details.
  //
  // TermMatchType.Code=EXACT_ONLY
  // Content in this property should only be returned for queries matching the
  // exact tokens appearing in this property.
  // Ex. A property with "fool" should NOT match a query for "foo".
  //
  // TermMatchType.Code=PREFIX
  // Content in this property should be returned for queries that are either
  // exact matches or query matches of the tokens appearing in this property.
  // Ex. A property with "fool" *should* match a query for "foo".
  optional TermMatchType.Code term_match_type = 1;

  message TokenizerType {
    enum Code {
      // It is only valid for tokenizer_type to be 'NONE' if the data type is
      // not indexed.
      NONE = 0;

      // Tokenization for plain text.
      PLAIN = 1;

      // Tokenizes text in verbatim. This means no normalization or segmentation
      // is applied to string values that are tokenized using this type.
      // Therefore, the output token is equivalent to the raw string text. For
      // example, "Hello, world!" would be tokenized as "Hello, world!"
      // preserving punctuation and capitalization, and not creating separate
      // tokens between the space.
      VERBATIM = 2;
    }
  }
  optional TokenizerType.Code tokenizer_type = 2;
}

// Describes how a document property should be indexed.
// Next tag: 2
message DocumentIndexingConfig {
  // OPTIONAL: Whether nested properties within the document property should be
  // indexed. If true, then the nested properties will be indexed according to
  // the property's own indexing configurations. If false, nested documents'
  // properties will not be indexed even if they have an indexing configuration.
  //
  // The default value is false.
  optional bool index_nested_properties = 1;
}

// Describes the schema of a single property of Documents that belong to a
// specific SchemaTypeConfigProto. These can be considered as a rich, structured
// type for each property of Documents accepted by IcingSearchEngine.
// Next tag: 7
message PropertyConfigProto {
  // REQUIRED: Name that uniquely identifies a property within an Document of
  // a specific SchemaTypeConfigProto.
  //
  // Recommended format: Human readable string that's one of the properties
  // defined in schema.org for the parent SchemaTypeConfigProto.
  // Eg: 'author' for http://schema.org/DigitalDocument.
  // Eg: 'address' for http://schema.org/Place.
  optional string property_name = 1;

  // REQUIRED: Physical data-types of the contents of the property.
  message DataType {
    enum Code {
      // This value should never purposely be used. This is used for backwards
      // compatibility reasons.
      UNKNOWN = 0;

      STRING = 1;
      INT64 = 2;
      DOUBLE = 3;
      BOOLEAN = 4;

      // Unstructured BLOB.
      BYTES = 5;

      // Indicates that the property itself is an Document, making it part
      // a hierarchical Document schema. Any property using this data_type
      // MUST have a valid 'schema_type'.
      DOCUMENT = 6;
    }
  }
  optional DataType.Code data_type = 2;

  // REQUIRED if (data_type == DOCUMENT). OPTIONAL otherwise.
  // Indicates the logical schema-type of the contents of this property.
  //
  // TODO(cassiewang): This could be useful for non-document properties, e.g.
  // to set this field as a schema.org/address for some string property.
  // Re-evaluate what recommendation we should give clients if we want to start
  // using this for non-document properties as well.
  //
  // Recommended format: Human readable string that is one of the types defined
  // in schema.org, matching the SchemaTypeConfigProto.schema_type of another
  // type.
  optional string schema_type = 3;

  // REQUIRED: The cardinality of the property.
  message Cardinality {
    // NOTE: The order of the cardinality is purposefully set to be from least
    // restrictive (REPEATED) to most restrictive (REQUIRED). This makes it
    // easier to check if a field is backwards compatible by doing a simple
    // greater-than/less-than check on the enum ints. Changing/adding new
    // cardinalities should be done cautiously.
    enum Code {
      // This should never purposely be set. This is used for backwards
      // compatibility reasons.
      UNKNOWN = 0;

      // Any number of items (including zero) [0...*].
      REPEATED = 1;

      // Zero or one value [0,1].
      OPTIONAL = 2;

      // Exactly one value [1].
      REQUIRED = 3;
    }
  }
  optional Cardinality.Code cardinality = 4;

  // OPTIONAL: Describes how string properties should be indexed. String
  // properties that do not set the indexing config will not be indexed.
  optional StringIndexingConfig string_indexing_config = 5;

  // OPTIONAL: Describes how document properties should be indexed.
  optional DocumentIndexingConfig document_indexing_config = 6;
}

// List of all supported types constitutes the schema used by Icing.
// Next tag: 2
message SchemaProto {
  repeated SchemaTypeConfigProto types = 1;
}

// Result of a call to IcingSearchEngine.SetSchema
// Next tag: 8
message SetSchemaResultProto {
  // Status code can be one of:
  //   OK
  //   INVALID_ARGUMENT
  //   FAILED_PRECONDITION
  //   INTERNAL
  //
  // See status.proto for more details.
  //
  // TODO(b/147699081): Fix error codes: +ABORTED, +WARNING_DATA_LOSS,
  // -INTERNAL. go/icing-library-apis.
  optional StatusProto status = 1;

  // Schema types that existed in the previous schema, but were deleted from the
  // new schema. If ignore_errors_and_delete_documents=true, then all documents
  // of these types were also deleted.
  repeated string deleted_schema_types = 2;

  // Schema types that existed in the previous schema and were incompatible with
  // the new schema type. If ignore_errors_and_delete_documents=true, then any
  // documents that fail validation against the new schema types would also be
  // deleted.
  repeated string incompatible_schema_types = 3;

  // Schema types that did not exist in the previous schema and were added with
  // the new schema type.
  repeated string new_schema_types = 4;

  // Schema types that were changed in a way that was backwards compatible and
  // didn't invalidate the index.
  repeated string fully_compatible_changed_schema_types = 5;

  // Schema types that were changed in a way that was backwards compatible, but
  // invalidated the index.
  repeated string index_incompatible_changed_schema_types = 6;

  // Overall time used for the function call.
  optional int32 latency_ms = 7;
}

// Result of a call to IcingSearchEngine.GetSchema
// Next tag: 3
message GetSchemaResultProto {
  // Status code can be one of:
  //   OK
  //   FAILED_PRECONDITION
  //   NOT_FOUND
  //   INTERNAL
  //
  // See status.proto for more details.
  //
  // TODO(b/147699081): Fix error codes: +ABORTED, -INTERNAL
  // go/icing-library-apis.
  optional StatusProto status = 1;

  // Copy of the Schema proto. Modifying this does not affect the Schema that
  // IcingSearchEngine holds.
  optional SchemaProto schema = 2;
}

// Result of a call to IcingSearchEngine.GetSchemaType
// Next tag: 3
message GetSchemaTypeResultProto {
  // Status code can be one of:
  //   OK
  //   FAILED_PRECONDITION
  //   NOT_FOUND
  //   INTERNAL
  //
  // See status.proto for more details.
  //
  // TODO(b/147699081): Fix error codes: +ABORTED, -INTERNAL
  // go/icing-library-apis.
  optional StatusProto status = 1;

  // Copy of the SchemaTypeConfig proto with the specified schema_type.
  // Modifying this does not affect the SchemaTypeConfig that IcingSearchEngine
  // holds.
  optional SchemaTypeConfigProto schema_type_config = 2;
}