diff options
Diffstat (limited to 'google/privacy/dlp/v2')
-rw-r--r-- | google/privacy/dlp/v2/BUILD.bazel | 152 | ||||
-rw-r--r-- | google/privacy/dlp/v2/dlp.proto | 3182 | ||||
-rw-r--r-- | google/privacy/dlp/v2/dlp_gapic.yaml | 542 | ||||
-rw-r--r-- | google/privacy/dlp/v2/storage.proto | 639 |
4 files changed, 4515 insertions, 0 deletions
diff --git a/google/privacy/dlp/v2/BUILD.bazel b/google/privacy/dlp/v2/BUILD.bazel new file mode 100644 index 000000000..89be7665e --- /dev/null +++ b/google/privacy/dlp/v2/BUILD.bazel @@ -0,0 +1,152 @@ +# This is an API workspace, having public visibility by default makes perfect sense. +package(default_visibility = ["//visibility:public"]) + +############################################################################## +# Common +############################################################################## +load("@com_google_api_codegen//rules_gapic:gapic.bzl", "proto_library_with_info") + +proto_library( + name = "dlp_proto", + srcs = [ + "dlp.proto", + "storage.proto", + ], + deps = [ + "//google/api:annotations_proto", + "//google/rpc:status_proto", + "//google/type:date_proto", + "//google/type:dayofweek_proto", + "//google/type:timeofday_proto", + "@com_google_protobuf//:duration_proto", + "@com_google_protobuf//:empty_proto", + "@com_google_protobuf//:field_mask_proto", + "@com_google_protobuf//:timestamp_proto", + ], +) + +proto_library_with_info( + name = "dlp_proto_with_info", + deps = [ + ":dlp_proto", + ], +) + +############################################################################## +# Java +############################################################################## +load("@io_grpc_grpc_java//:java_grpc_library.bzl", "java_grpc_library") +load( + "@com_google_api_codegen//rules_gapic/java:java_gapic.bzl", + "java_gapic_library", + "java_resource_name_proto_library", +) +load("@com_google_api_codegen//rules_gapic/java:java_gapic_pkg.bzl", "java_gapic_assembly_gradle_pkg") + +_JAVA_GRPC_DEPS = [ + "@com_google_api_grpc_proto_google_common_protos//jar", +] + +java_proto_library( + name = "dlp_java_proto", + deps = [":dlp_proto"], +) + +java_grpc_library( + name = "dlp_java_grpc", + srcs = [":dlp_proto"], + deps = [":dlp_java_proto"] + _JAVA_GRPC_DEPS, +) + +java_resource_name_proto_library( + name = "dlp_resource_name_java_proto", + gapic_yaml = "dlp_gapic.yaml", + deps = [":dlp_proto"], +) + +java_gapic_library( + name = "dlp_java_gapic", + src = ":dlp_proto_with_info", + gapic_yaml = "dlp_gapic.yaml", + service_yaml = "//google/privacy/dlp:dlp_v2.yaml", + test_deps = [ + ":dlp_java_grpc", + ], + deps = [ + ":dlp_java_proto", + ":dlp_resource_name_java_proto", + ] + _JAVA_GRPC_DEPS, +) + +[java_test( + name = test_name, + test_class = test_name, + runtime_deps = [":dlp_java_gapic_test"], +) for test_name in [ + # "com.google.cloud.dlp.v2.DlpServiceClientTest", +]] + +# Opensource Packages +java_gapic_assembly_gradle_pkg( + name = "google-cloud-dlp-v2-java", + client_deps = [":dlp_java_gapic"], + client_group = "com.google.cloud", + client_test_deps = [":dlp_java_gapic_test"], + grpc_deps = [":dlp_java_grpc"], + grpc_group = "com.google.api.grpc", + proto_deps = [ + ":dlp_java_proto", + ":dlp_proto", + ":dlp_resource_name_java_proto", + ] + _JAVA_GRPC_DEPS, + version = "0.0.0-SNAPSHOT", +) + +############################################################################## +# Go +############################################################################## +load("@io_bazel_rules_go//proto:def.bzl", "go_proto_library") +load("@com_google_api_codegen//rules_gapic/go:go_gapic.bzl", "go_gapic_srcjar", "go_gapic_library") +load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") +load("@com_google_api_codegen//rules_gapic/go:go_gapic_pkg.bzl", "go_gapic_assembly_pkg") + +go_proto_library( + name = "dlp_go_proto", + compilers = ["@io_bazel_rules_go//proto:go_grpc"], + importpath = "google.golang.org/genproto/googleapis/privacy/dlp/v2", + protos = [":dlp_proto_with_info"], + deps = [ + "//google/api:annotations_go_proto", + "//google/rpc:status_go_proto", + "//google/type:date_go_proto", + "//google/type:dayofweek_go_proto", + "//google/type:timeofday_go_proto", + ], +) + +go_gapic_library( + name = "dlp_go_gapic", + src = ":dlp_proto_with_info", + gapic_yaml = "dlp_gapic.yaml", + importpath = "cloud.google.com/go/dlp/apiv2", + service_yaml = "//google/privacy/dlp:dlp_v2.yaml", + deps = [":dlp_go_proto"], +) + +go_test( + name = "dlp_go_gapic_test", + srcs = [":dlp_go_gapic_srcjar_test"], + embed = [":dlp_go_gapic"], + importpath = "cloud.google.com/go/dlp/apiv2", +) + +# Opensource Packages +go_gapic_assembly_pkg( + name = "gapi-cloud-dlp-v2-go", + deps = [ + ":dlp_go_gapic", + ":dlp_go_gapic_srcjar-smoke-test.srcjar", + ":dlp_go_gapic_srcjar-test.srcjar", + ":dlp_go_proto", + ], +) diff --git a/google/privacy/dlp/v2/dlp.proto b/google/privacy/dlp/v2/dlp.proto new file mode 100644 index 000000000..3a5dde11f --- /dev/null +++ b/google/privacy/dlp/v2/dlp.proto @@ -0,0 +1,3182 @@ +// Copyright 2019 Google LLC. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +syntax = "proto3"; + +package google.privacy.dlp.v2; + +import "google/api/annotations.proto"; +import "google/api/resource.proto"; +import "google/privacy/dlp/v2/storage.proto"; +import "google/protobuf/duration.proto"; +import "google/protobuf/empty.proto"; +import "google/protobuf/field_mask.proto"; +import "google/protobuf/timestamp.proto"; +import "google/rpc/status.proto"; +import "google/type/date.proto"; +import "google/type/dayofweek.proto"; +import "google/type/timeofday.proto"; + +option csharp_namespace = "Google.Cloud.Dlp.V2"; +option go_package = "google.golang.org/genproto/googleapis/privacy/dlp/v2;dlp"; +option java_multiple_files = true; +option java_outer_classname = "DlpProto"; +option java_package = "com.google.privacy.dlp.v2"; +option php_namespace = "Google\\Cloud\\Dlp\\V2"; + +// The Cloud Data Loss Prevention (DLP) API is a service that allows clients +// to detect the presence of Personally Identifiable Information (PII) and other +// privacy-sensitive data in user-supplied, unstructured data streams, like text +// blocks or images. +// The service also includes methods for sensitive data redaction and +// scheduling of data scans on Google Cloud Platform based data sets. +// +// To learn more about concepts and find how-to guides see +// https://cloud.google.com/dlp/docs/. +service DlpService { + // Finds potentially sensitive info in content. + // This method has limits on input size, processing time, and output size. + // + // When no InfoTypes or CustomInfoTypes are specified in this request, the + // system will automatically choose what detectors to run. By default this may + // be all types, but may change over time as detectors are updated. + // + // For how to guides, see https://cloud.google.com/dlp/docs/inspecting-images + // and https://cloud.google.com/dlp/docs/inspecting-text, + rpc InspectContent(InspectContentRequest) returns (InspectContentResponse) { + option (google.api.http) = { + post: "/v2/{parent=projects/*}/content:inspect" + body: "*" + }; + } + + // Redacts potentially sensitive info from an image. + // This method has limits on input size, processing time, and output size. + // See https://cloud.google.com/dlp/docs/redacting-sensitive-data-images to + // learn more. + // + // When no InfoTypes or CustomInfoTypes are specified in this request, the + // system will automatically choose what detectors to run. By default this may + // be all types, but may change over time as detectors are updated. + rpc RedactImage(RedactImageRequest) returns (RedactImageResponse) { + option (google.api.http) = { + post: "/v2/{parent=projects/*}/image:redact" + body: "*" + }; + } + + // De-identifies potentially sensitive info from a ContentItem. + // This method has limits on input size and output size. + // See https://cloud.google.com/dlp/docs/deidentify-sensitive-data to + // learn more. + // + // When no InfoTypes or CustomInfoTypes are specified in this request, the + // system will automatically choose what detectors to run. By default this may + // be all types, but may change over time as detectors are updated. + rpc DeidentifyContent(DeidentifyContentRequest) returns (DeidentifyContentResponse) { + option (google.api.http) = { + post: "/v2/{parent=projects/*}/content:deidentify" + body: "*" + }; + } + + // Re-identifies content that has been de-identified. + // See + // https://cloud.google.com/dlp/docs/pseudonymization#re-identification_in_free_text_code_example + // to learn more. + rpc ReidentifyContent(ReidentifyContentRequest) returns (ReidentifyContentResponse) { + option (google.api.http) = { + post: "/v2/{parent=projects/*}/content:reidentify" + body: "*" + }; + } + + // Returns a list of the sensitive information types that the DLP API + // supports. See https://cloud.google.com/dlp/docs/infotypes-reference to + // learn more. + rpc ListInfoTypes(ListInfoTypesRequest) returns (ListInfoTypesResponse) { + option (google.api.http) = { + get: "/v2/infoTypes" + }; + } + + // Creates an InspectTemplate for re-using frequently used configuration + // for inspecting content, images, and storage. + // See https://cloud.google.com/dlp/docs/creating-templates to learn more. + rpc CreateInspectTemplate(CreateInspectTemplateRequest) returns (InspectTemplate) { + option (google.api.http) = { + post: "/v2/{parent=organizations/*}/inspectTemplates" + body: "*" + additional_bindings { + post: "/v2/{parent=projects/*}/inspectTemplates" + body: "*" + } + }; + } + + // Updates the InspectTemplate. + // See https://cloud.google.com/dlp/docs/creating-templates to learn more. + rpc UpdateInspectTemplate(UpdateInspectTemplateRequest) returns (InspectTemplate) { + option (google.api.http) = { + patch: "/v2/{name=organizations/*/inspectTemplates/*}" + body: "*" + additional_bindings { + patch: "/v2/{name=projects/*/inspectTemplates/*}" + body: "*" + } + }; + } + + // Gets an InspectTemplate. + // See https://cloud.google.com/dlp/docs/creating-templates to learn more. + rpc GetInspectTemplate(GetInspectTemplateRequest) returns (InspectTemplate) { + option (google.api.http) = { + get: "/v2/{name=organizations/*/inspectTemplates/*}" + additional_bindings { + get: "/v2/{name=projects/*/inspectTemplates/*}" + } + }; + } + + // Lists InspectTemplates. + // See https://cloud.google.com/dlp/docs/creating-templates to learn more. + rpc ListInspectTemplates(ListInspectTemplatesRequest) returns (ListInspectTemplatesResponse) { + option (google.api.http) = { + get: "/v2/{parent=organizations/*}/inspectTemplates" + additional_bindings { + get: "/v2/{parent=projects/*}/inspectTemplates" + } + }; + } + + // Deletes an InspectTemplate. + // See https://cloud.google.com/dlp/docs/creating-templates to learn more. + rpc DeleteInspectTemplate(DeleteInspectTemplateRequest) returns (google.protobuf.Empty) { + option (google.api.http) = { + delete: "/v2/{name=organizations/*/inspectTemplates/*}" + additional_bindings { + delete: "/v2/{name=projects/*/inspectTemplates/*}" + } + }; + } + + // Creates a DeidentifyTemplate for re-using frequently used configuration + // for de-identifying content, images, and storage. + // See https://cloud.google.com/dlp/docs/creating-templates-deid to learn + // more. + rpc CreateDeidentifyTemplate(CreateDeidentifyTemplateRequest) returns (DeidentifyTemplate) { + option (google.api.http) = { + post: "/v2/{parent=organizations/*}/deidentifyTemplates" + body: "*" + additional_bindings { + post: "/v2/{parent=projects/*}/deidentifyTemplates" + body: "*" + } + }; + } + + // Updates the DeidentifyTemplate. + // See https://cloud.google.com/dlp/docs/creating-templates-deid to learn + // more. + rpc UpdateDeidentifyTemplate(UpdateDeidentifyTemplateRequest) returns (DeidentifyTemplate) { + option (google.api.http) = { + patch: "/v2/{name=organizations/*/deidentifyTemplates/*}" + body: "*" + additional_bindings { + patch: "/v2/{name=projects/*/deidentifyTemplates/*}" + body: "*" + } + }; + } + + // Gets a DeidentifyTemplate. + // See https://cloud.google.com/dlp/docs/creating-templates-deid to learn + // more. + rpc GetDeidentifyTemplate(GetDeidentifyTemplateRequest) returns (DeidentifyTemplate) { + option (google.api.http) = { + get: "/v2/{name=organizations/*/deidentifyTemplates/*}" + additional_bindings { + get: "/v2/{name=projects/*/deidentifyTemplates/*}" + } + }; + } + + // Lists DeidentifyTemplates. + // See https://cloud.google.com/dlp/docs/creating-templates-deid to learn + // more. + rpc ListDeidentifyTemplates(ListDeidentifyTemplatesRequest) returns (ListDeidentifyTemplatesResponse) { + option (google.api.http) = { + get: "/v2/{parent=organizations/*}/deidentifyTemplates" + additional_bindings { + get: "/v2/{parent=projects/*}/deidentifyTemplates" + } + }; + } + + // Deletes a DeidentifyTemplate. + // See https://cloud.google.com/dlp/docs/creating-templates-deid to learn + // more. + rpc DeleteDeidentifyTemplate(DeleteDeidentifyTemplateRequest) returns (google.protobuf.Empty) { + option (google.api.http) = { + delete: "/v2/{name=organizations/*/deidentifyTemplates/*}" + additional_bindings { + delete: "/v2/{name=projects/*/deidentifyTemplates/*}" + } + }; + } + + // Creates a job trigger to run DLP actions such as scanning storage for + // sensitive information on a set schedule. + // See https://cloud.google.com/dlp/docs/creating-job-triggers to learn more. + rpc CreateJobTrigger(CreateJobTriggerRequest) returns (JobTrigger) { + option (google.api.http) = { + post: "/v2/{parent=projects/*}/jobTriggers" + body: "*" + }; + } + + // Updates a job trigger. + // See https://cloud.google.com/dlp/docs/creating-job-triggers to learn more. + rpc UpdateJobTrigger(UpdateJobTriggerRequest) returns (JobTrigger) { + option (google.api.http) = { + patch: "/v2/{name=projects/*/jobTriggers/*}" + body: "*" + }; + } + + // Gets a job trigger. + // See https://cloud.google.com/dlp/docs/creating-job-triggers to learn more. + rpc GetJobTrigger(GetJobTriggerRequest) returns (JobTrigger) { + option (google.api.http) = { + get: "/v2/{name=projects/*/jobTriggers/*}" + }; + } + + // Lists job triggers. + // See https://cloud.google.com/dlp/docs/creating-job-triggers to learn more. + rpc ListJobTriggers(ListJobTriggersRequest) returns (ListJobTriggersResponse) { + option (google.api.http) = { + get: "/v2/{parent=projects/*}/jobTriggers" + }; + } + + // Deletes a job trigger. + // See https://cloud.google.com/dlp/docs/creating-job-triggers to learn more. + rpc DeleteJobTrigger(DeleteJobTriggerRequest) returns (google.protobuf.Empty) { + option (google.api.http) = { + delete: "/v2/{name=projects/*/jobTriggers/*}" + }; + } + + // Activate a job trigger. Causes the immediate execute of a trigger + // instead of waiting on the trigger event to occur. + rpc ActivateJobTrigger(ActivateJobTriggerRequest) returns (DlpJob) { + option (google.api.http) = { + post: "/v2/{name=projects/*/jobTriggers/*}:activate" + body: "*" + }; + } + + // Creates a new job to inspect storage or calculate risk metrics. + // See https://cloud.google.com/dlp/docs/inspecting-storage and + // https://cloud.google.com/dlp/docs/compute-risk-analysis to learn more. + // + // When no InfoTypes or CustomInfoTypes are specified in inspect jobs, the + // system will automatically choose what detectors to run. By default this may + // be all types, but may change over time as detectors are updated. + rpc CreateDlpJob(CreateDlpJobRequest) returns (DlpJob) { + option (google.api.http) = { + post: "/v2/{parent=projects/*}/dlpJobs" + body: "*" + }; + } + + // Lists DlpJobs that match the specified filter in the request. + // See https://cloud.google.com/dlp/docs/inspecting-storage and + // https://cloud.google.com/dlp/docs/compute-risk-analysis to learn more. + rpc ListDlpJobs(ListDlpJobsRequest) returns (ListDlpJobsResponse) { + option (google.api.http) = { + get: "/v2/{parent=projects/*}/dlpJobs" + }; + } + + // Gets the latest state of a long-running DlpJob. + // See https://cloud.google.com/dlp/docs/inspecting-storage and + // https://cloud.google.com/dlp/docs/compute-risk-analysis to learn more. + rpc GetDlpJob(GetDlpJobRequest) returns (DlpJob) { + option (google.api.http) = { + get: "/v2/{name=projects/*/dlpJobs/*}" + }; + } + + // Deletes a long-running DlpJob. This method indicates that the client is + // no longer interested in the DlpJob result. The job will be cancelled if + // possible. + // See https://cloud.google.com/dlp/docs/inspecting-storage and + // https://cloud.google.com/dlp/docs/compute-risk-analysis to learn more. + rpc DeleteDlpJob(DeleteDlpJobRequest) returns (google.protobuf.Empty) { + option (google.api.http) = { + delete: "/v2/{name=projects/*/dlpJobs/*}" + }; + } + + // Starts asynchronous cancellation on a long-running DlpJob. The server + // makes a best effort to cancel the DlpJob, but success is not + // guaranteed. + // See https://cloud.google.com/dlp/docs/inspecting-storage and + // https://cloud.google.com/dlp/docs/compute-risk-analysis to learn more. + rpc CancelDlpJob(CancelDlpJobRequest) returns (google.protobuf.Empty) { + option (google.api.http) = { + post: "/v2/{name=projects/*/dlpJobs/*}:cancel" + body: "*" + }; + } + + // Creates a pre-built stored infoType to be used for inspection. + // See https://cloud.google.com/dlp/docs/creating-stored-infotypes to + // learn more. + rpc CreateStoredInfoType(CreateStoredInfoTypeRequest) returns (StoredInfoType) { + option (google.api.http) = { + post: "/v2/{parent=organizations/*}/storedInfoTypes" + body: "*" + additional_bindings { + post: "/v2/{parent=projects/*}/storedInfoTypes" + body: "*" + } + }; + } + + // Updates the stored infoType by creating a new version. The existing version + // will continue to be used until the new version is ready. + // See https://cloud.google.com/dlp/docs/creating-stored-infotypes to + // learn more. + rpc UpdateStoredInfoType(UpdateStoredInfoTypeRequest) returns (StoredInfoType) { + option (google.api.http) = { + patch: "/v2/{name=organizations/*/storedInfoTypes/*}" + body: "*" + additional_bindings { + patch: "/v2/{name=projects/*/storedInfoTypes/*}" + body: "*" + } + }; + } + + // Gets a stored infoType. + // See https://cloud.google.com/dlp/docs/creating-stored-infotypes to + // learn more. + rpc GetStoredInfoType(GetStoredInfoTypeRequest) returns (StoredInfoType) { + option (google.api.http) = { + get: "/v2/{name=organizations/*/storedInfoTypes/*}" + additional_bindings { + get: "/v2/{name=projects/*/storedInfoTypes/*}" + } + }; + } + + // Lists stored infoTypes. + // See https://cloud.google.com/dlp/docs/creating-stored-infotypes to + // learn more. + rpc ListStoredInfoTypes(ListStoredInfoTypesRequest) returns (ListStoredInfoTypesResponse) { + option (google.api.http) = { + get: "/v2/{parent=organizations/*}/storedInfoTypes" + additional_bindings { + get: "/v2/{parent=projects/*}/storedInfoTypes" + } + }; + } + + // Deletes a stored infoType. + // See https://cloud.google.com/dlp/docs/creating-stored-infotypes to + // learn more. + rpc DeleteStoredInfoType(DeleteStoredInfoTypeRequest) returns (google.protobuf.Empty) { + option (google.api.http) = { + delete: "/v2/{name=organizations/*/storedInfoTypes/*}" + additional_bindings { + delete: "/v2/{name=projects/*/storedInfoTypes/*}" + } + }; + } +} + +// List of exclude infoTypes. +message ExcludeInfoTypes { + // InfoType list in ExclusionRule rule drops a finding when it overlaps or + // contained within with a finding of an infoType from this list. For + // example, for `InspectionRuleSet.info_types` containing "PHONE_NUMBER"` and + // `exclusion_rule` containing `exclude_info_types.info_types` with + // "EMAIL_ADDRESS" the phone number findings are dropped if they overlap + // with EMAIL_ADDRESS finding. + // That leads to "555-222-2222@example.org" to generate only a single + // finding, namely email address. + repeated InfoType info_types = 1; +} + +// Options describing which parts of the provided content should be scanned. +enum ContentOption { + // Includes entire content of a file or a data stream. + CONTENT_UNSPECIFIED = 0; + + // Text content within the data, excluding any metadata. + CONTENT_TEXT = 1; + + // Images found in the data. + CONTENT_IMAGE = 2; +} + +// The rule that specifies conditions when findings of infoTypes specified in +// `InspectionRuleSet` are removed from results. +message ExclusionRule { + oneof type { + // Dictionary which defines the rule. + CustomInfoType.Dictionary dictionary = 1; + + // Regular expression which defines the rule. + CustomInfoType.Regex regex = 2; + + // Set of infoTypes for which findings would affect this rule. + ExcludeInfoTypes exclude_info_types = 3; + } + + // How the rule is applied, see MatchingType documentation for details. + MatchingType matching_type = 4; +} + +// A single inspection rule to be applied to infoTypes, specified in +// `InspectionRuleSet`. +message InspectionRule { + oneof type { + // Hotword-based detection rule. + CustomInfoType.DetectionRule.HotwordRule hotword_rule = 1; + + // Exclusion rule. + ExclusionRule exclusion_rule = 2; + } +} + +// Rule set for modifying a set of infoTypes to alter behavior under certain +// circumstances, depending on the specific details of the rules within the set. +message InspectionRuleSet { + // List of infoTypes this rule set is applied to. + repeated InfoType info_types = 1; + + // Set of rules to be applied to infoTypes. The rules are applied in order. + repeated InspectionRule rules = 2; +} + +// Configuration description of the scanning process. +// When used with redactContent only info_types and min_likelihood are currently +// used. +message InspectConfig { + message FindingLimits { + // Max findings configuration per infoType, per content item or long + // running DlpJob. + message InfoTypeLimit { + // Type of information the findings limit applies to. Only one limit per + // info_type should be provided. If InfoTypeLimit does not have an + // info_type, the DLP API applies the limit against all info_types that + // are found but not specified in another InfoTypeLimit. + InfoType info_type = 1; + + // Max findings limit for the given infoType. + int32 max_findings = 2; + } + + // Max number of findings that will be returned for each item scanned. + // When set within `InspectDataSourceRequest`, + // the maximum returned is 2000 regardless if this is set higher. + // When set within `InspectContentRequest`, this field is ignored. + int32 max_findings_per_item = 1; + + // Max number of findings that will be returned per request/job. + // When set within `InspectContentRequest`, the maximum returned is 2000 + // regardless if this is set higher. + int32 max_findings_per_request = 2; + + // Configuration of findings limit given for specified infoTypes. + repeated InfoTypeLimit max_findings_per_info_type = 3; + } + + // Restricts what info_types to look for. The values must correspond to + // InfoType values returned by ListInfoTypes or listed at + // https://cloud.google.com/dlp/docs/infotypes-reference. + // + // When no InfoTypes or CustomInfoTypes are specified in a request, the + // system may automatically choose what detectors to run. By default this may + // be all types, but may change over time as detectors are updated. + // + // The special InfoType name "ALL_BASIC" can be used to trigger all detectors, + // but may change over time as new InfoTypes are added. If you need precise + // control and predictability as to what detectors are run you should specify + // specific InfoTypes listed in the reference. + repeated InfoType info_types = 1; + + // Only returns findings equal or above this threshold. The default is + // POSSIBLE. + // See https://cloud.google.com/dlp/docs/likelihood to learn more. + Likelihood min_likelihood = 2; + + FindingLimits limits = 3; + + // When true, a contextual quote from the data that triggered a finding is + // included in the response; see Finding.quote. + bool include_quote = 4; + + // When true, excludes type information of the findings. + bool exclude_info_types = 5; + + // CustomInfoTypes provided by the user. See + // https://cloud.google.com/dlp/docs/creating-custom-infotypes to learn more. + repeated CustomInfoType custom_info_types = 6; + + // List of options defining data content to scan. + // If empty, text, images, and other content will be included. + repeated ContentOption content_options = 8; + + // Set of rules to apply to the findings for this InspectConfig. + // Exclusion rules, contained in the set are executed in the end, other + // rules are executed in the order they are specified for each info type. + repeated InspectionRuleSet rule_set = 10; +} + +// Container for bytes to inspect or redact. +message ByteContentItem { + enum BytesType { + BYTES_TYPE_UNSPECIFIED = 0; + + IMAGE = 6; + + IMAGE_JPEG = 1; + + IMAGE_BMP = 2; + + IMAGE_PNG = 3; + + IMAGE_SVG = 4; + + TEXT_UTF8 = 5; + } + + // The type of data stored in the bytes string. Default will be TEXT_UTF8. + BytesType type = 1; + + // Content data to inspect or redact. + bytes data = 2; +} + +// Container structure for the content to inspect. +message ContentItem { + // Data of the item either in the byte array or UTF-8 string form, or table. + oneof data_item { + // String data to inspect or redact. + string value = 3; + + // Structured content for inspection. See + // https://cloud.google.com/dlp/docs/inspecting-text#inspecting_a_table to + // learn more. + Table table = 4; + + // Content data to inspect or redact. Replaces `type` and `data`. + ByteContentItem byte_item = 5; + } +} + +// Structured content to inspect. Up to 50,000 `Value`s per request allowed. +// See https://cloud.google.com/dlp/docs/inspecting-text#inspecting_a_table to +// learn more. +message Table { + message Row { + repeated Value values = 1; + } + + repeated FieldId headers = 1; + + repeated Row rows = 2; +} + +// All the findings for a single scanned item. +message InspectResult { + // List of findings for an item. + repeated Finding findings = 1; + + // If true, then this item might have more findings than were returned, + // and the findings returned are an arbitrary subset of all findings. + // The findings list might be truncated because the input items were too + // large, or because the server reached the maximum amount of resources + // allowed for a single API call. For best results, divide the input into + // smaller batches. + bool findings_truncated = 2; +} + +// Represents a piece of potentially sensitive content. +message Finding { + // The content that was found. Even if the content is not textual, it + // may be converted to a textual representation here. + // Provided if `include_quote` is true and the finding is + // less than or equal to 4096 bytes long. If the finding exceeds 4096 bytes + // in length, the quote may be omitted. + string quote = 1; + + // The type of content that might have been found. + // Provided if `excluded_types` is false. + InfoType info_type = 2; + + // Confidence of how likely it is that the `info_type` is correct. + Likelihood likelihood = 3; + + // Where the content was found. + Location location = 4; + + // Timestamp when finding was detected. + google.protobuf.Timestamp create_time = 6; + + // Contains data parsed from quotes. Only populated if include_quote was set + // to true and a supported infoType was requested. Currently supported + // infoTypes: DATE, DATE_OF_BIRTH and TIME. + QuoteInfo quote_info = 7; +} + +// Specifies the location of the finding. +message Location { + // Zero-based byte offsets delimiting the finding. + // These are relative to the finding's containing element. + // Note that when the content is not textual, this references + // the UTF-8 encoded textual representation of the content. + // Omitted if content is an image. + Range byte_range = 1; + + // Unicode character offsets delimiting the finding. + // These are relative to the finding's containing element. + // Provided when the content is text. + Range codepoint_range = 2; + + // List of nested objects pointing to the precise location of the finding + // within the file or record. + repeated ContentLocation content_locations = 7; +} + +// Type of the match which can be applied to different ways of matching, like +// Dictionary, regular expression and intersecting with findings of another +// info type. +enum MatchingType { + // Invalid. + MATCHING_TYPE_UNSPECIFIED = 0; + + // Full match. + // + // - Dictionary: join of Dictionary results matched complete finding quote + // - Regex: all regex matches fill a finding quote start to end + // - Exclude info type: completely inside affecting info types findings + MATCHING_TYPE_FULL_MATCH = 1; + + // Partial match. + // + // - Dictionary: at least one of the tokens in the finding matches + // - Regex: substring of the finding matches + // - Exclude info type: intersects with affecting info types findings + MATCHING_TYPE_PARTIAL_MATCH = 2; + + // Inverse match. + // + // - Dictionary: no tokens in the finding match the dictionary + // - Regex: finding doesn't match the regex + // - Exclude info type: no intersection with affecting info types findings + MATCHING_TYPE_INVERSE_MATCH = 3; +} + +// Findings container location data. +message ContentLocation { + // Name of the container where the finding is located. + // The top level name is the source file name or table name. Names of some + // common storage containers are formatted as follows: + // + // * BigQuery tables: `<project_id>:<dataset_id>.<table_id>` + // * Cloud Storage files: `gs://<bucket>/<path>` + // * Datastore namespace: <namespace> + // + // Nested names could be absent if the embedded object has no string + // identifier (for an example an image contained within a document). + string container_name = 1; + + // Type of the container within the file with location of the finding. + oneof location { + // Location within a row or record of a database table. + RecordLocation record_location = 2; + + // Location within an image's pixels. + ImageLocation image_location = 3; + + // Location data for document files. + DocumentLocation document_location = 5; + } + + // Findings container modification timestamp, if applicable. + // For Google Cloud Storage contains last file modification timestamp. + // For BigQuery table contains last_modified_time property. + // For Datastore - not populated. + google.protobuf.Timestamp container_timestamp = 6; + + // Findings container version, if available + // ("generation" for Google Cloud Storage). + string container_version = 7; +} + +// Location of a finding within a document. +message DocumentLocation { + // Offset of the line, from the beginning of the file, where the finding + // is located. + int64 file_offset = 1; +} + +// Location of a finding within a row or record. +message RecordLocation { + // Key of the finding. + RecordKey record_key = 1; + + // Field id of the field containing the finding. + FieldId field_id = 2; + + // Location within a `ContentItem.Table`. + TableLocation table_location = 3; +} + +// Location of a finding within a table. +message TableLocation { + // The zero-based index of the row where the finding is located. + int64 row_index = 1; +} + +// Generic half-open interval [start, end) +message Range { + // Index of the first character of the range (inclusive). + int64 start = 1; + + // Index of the last character of the range (exclusive). + int64 end = 2; +} + +// Location of the finding within an image. +message ImageLocation { + // Bounding boxes locating the pixels within the image containing the finding. + repeated BoundingBox bounding_boxes = 1; +} + +// Bounding box encompassing detected text within an image. +message BoundingBox { + // Top coordinate of the bounding box. (0,0) is upper left. + int32 top = 1; + + // Left coordinate of the bounding box. (0,0) is upper left. + int32 left = 2; + + // Width of the bounding box in pixels. + int32 width = 3; + + // Height of the bounding box in pixels. + int32 height = 4; +} + +// Request to search for potentially sensitive info in an image and redact it +// by covering it with a colored rectangle. +message RedactImageRequest { + // Configuration for determining how redaction of images should occur. + message ImageRedactionConfig { + // Type of information to redact from images. + oneof target { + // Only one per info_type should be provided per request. If not + // specified, and redact_all_text is false, the DLP API will redact all + // text that it matches against all info_types that are found, but not + // specified in another ImageRedactionConfig. + InfoType info_type = 1; + + // If true, all text found in the image, regardless whether it matches an + // info_type, is redacted. Only one should be provided. + bool redact_all_text = 2; + } + + // The color to use when redacting content from an image. If not specified, + // the default is black. + Color redaction_color = 3; + } + + // The parent resource name, for example projects/my-project-id. + string parent = 1; + + // Configuration for the inspector. + InspectConfig inspect_config = 2; + + // The configuration for specifying what content to redact from images. + repeated ImageRedactionConfig image_redaction_configs = 5; + + // Whether the response should include findings along with the redacted + // image. + bool include_findings = 6; + + // The content must be PNG, JPEG, SVG or BMP. + ByteContentItem byte_item = 7; +} + +// Represents a color in the RGB color space. +message Color { + // The amount of red in the color as a value in the interval [0, 1]. + float red = 1; + + // The amount of green in the color as a value in the interval [0, 1]. + float green = 2; + + // The amount of blue in the color as a value in the interval [0, 1]. + float blue = 3; +} + +// Results of redacting an image. +message RedactImageResponse { + // The redacted image. The type will be the same as the original image. + bytes redacted_image = 1; + + // If an image was being inspected and the InspectConfig's include_quote was + // set to true, then this field will include all text, if any, that was found + // in the image. + string extracted_text = 2; + + // The findings. Populated when include_findings in the request is true. + InspectResult inspect_result = 3; +} + +// Request to de-identify a list of items. +message DeidentifyContentRequest { + // The parent resource name, for example projects/my-project-id. + string parent = 1; + + // Configuration for the de-identification of the content item. + // Items specified here will override the template referenced by the + // deidentify_template_name argument. + DeidentifyConfig deidentify_config = 2; + + // Configuration for the inspector. + // Items specified here will override the template referenced by the + // inspect_template_name argument. + InspectConfig inspect_config = 3; + + // The item to de-identify. Will be treated as text. + ContentItem item = 4; + + // Optional template to use. Any configuration directly specified in + // inspect_config will override those set in the template. Singular fields + // that are set in this request will replace their corresponding fields in the + // template. Repeated fields are appended. Singular sub-messages and groups + // are recursively merged. + string inspect_template_name = 5; + + // Optional template to use. Any configuration directly specified in + // deidentify_config will override those set in the template. Singular fields + // that are set in this request will replace their corresponding fields in the + // template. Repeated fields are appended. Singular sub-messages and groups + // are recursively merged. + string deidentify_template_name = 6; +} + +// Results of de-identifying a ContentItem. +message DeidentifyContentResponse { + // The de-identified item. + ContentItem item = 1; + + // An overview of the changes that were made on the `item`. + TransformationOverview overview = 2; +} + +// Request to re-identify an item. +message ReidentifyContentRequest { + // The parent resource name. + string parent = 1; + + // Configuration for the re-identification of the content item. + // This field shares the same proto message type that is used for + // de-identification, however its usage here is for the reversal of the + // previous de-identification. Re-identification is performed by examining + // the transformations used to de-identify the items and executing the + // reverse. This requires that only reversible transformations + // be provided here. The reversible transformations are: + // + // - `CryptoReplaceFfxFpeConfig` + DeidentifyConfig reidentify_config = 2; + + // Configuration for the inspector. + InspectConfig inspect_config = 3; + + // The item to re-identify. Will be treated as text. + ContentItem item = 4; + + // Optional template to use. Any configuration directly specified in + // `inspect_config` will override those set in the template. Singular fields + // that are set in this request will replace their corresponding fields in the + // template. Repeated fields are appended. Singular sub-messages and groups + // are recursively merged. + string inspect_template_name = 5; + + // Optional template to use. References an instance of `DeidentifyTemplate`. + // Any configuration directly specified in `reidentify_config` or + // `inspect_config` will override those set in the template. Singular fields + // that are set in this request will replace their corresponding fields in the + // template. Repeated fields are appended. Singular sub-messages and groups + // are recursively merged. + string reidentify_template_name = 6; +} + +// Results of re-identifying a item. +message ReidentifyContentResponse { + // The re-identified item. + ContentItem item = 1; + + // An overview of the changes that were made to the `item`. + TransformationOverview overview = 2; +} + +// Request to search for potentially sensitive info in a ContentItem. +message InspectContentRequest { + // The parent resource name, for example projects/my-project-id. + string parent = 1; + + // Configuration for the inspector. What specified here will override + // the template referenced by the inspect_template_name argument. + InspectConfig inspect_config = 2; + + // The item to inspect. + ContentItem item = 3; + + // Optional template to use. Any configuration directly specified in + // inspect_config will override those set in the template. Singular fields + // that are set in this request will replace their corresponding fields in the + // template. Repeated fields are appended. Singular sub-messages and groups + // are recursively merged. + string inspect_template_name = 4; +} + +// Results of inspecting an item. +message InspectContentResponse { + // The findings. + InspectResult result = 1; +} + +// Cloud repository for storing output. +message OutputStorageConfig { + // Predefined schemas for storing findings. + enum OutputSchema { + OUTPUT_SCHEMA_UNSPECIFIED = 0; + + // Basic schema including only `info_type`, `quote`, `certainty`, and + // `timestamp`. + BASIC_COLUMNS = 1; + + // Schema tailored to findings from scanning Google Cloud Storage. + GCS_COLUMNS = 2; + + // Schema tailored to findings from scanning Google Datastore. + DATASTORE_COLUMNS = 3; + + // Schema tailored to findings from scanning Google BigQuery. + BIG_QUERY_COLUMNS = 4; + + // Schema containing all columns. + ALL_COLUMNS = 5; + } + + oneof type { + // Store findings in an existing table or a new table in an existing + // dataset. If table_id is not set a new one will be generated + // for you with the following format: + // dlp_googleapis_yyyy_mm_dd_[dlp_job_id]. Pacific timezone will be used for + // generating the date details. + // + // For Inspect, each column in an existing output table must have the same + // name, type, and mode of a field in the `Finding` object. + // + // For Risk, an existing output table should be the output of a previous + // Risk analysis job run on the same source table, with the same privacy + // metric and quasi-identifiers. Risk jobs that analyze the same table but + // compute a different privacy metric, or use different sets of + // quasi-identifiers, cannot store their results in the same table. + BigQueryTable table = 1; + } + + // Schema used for writing the findings for Inspect jobs. This field is only + // used for Inspect and must be unspecified for Risk jobs. Columns are derived + // from the `Finding` object. If appending to an existing table, any columns + // from the predefined schema that are missing will be added. No columns in + // the existing table will be deleted. + // + // If unspecified, then all available columns will be used for a new table or + // an (existing) table with no schema, and no changes will be made to an + // existing table that has a schema. + OutputSchema output_schema = 3; +} + +// Statistics regarding a specific InfoType. +message InfoTypeStats { + // The type of finding this stat is for. + InfoType info_type = 1; + + // Number of findings for this infoType. + int64 count = 2; +} + +// The results of an inspect DataSource job. +message InspectDataSourceDetails { + message RequestedOptions { + // If run with an InspectTemplate, a snapshot of its state at the time of + // this run. + InspectTemplate snapshot_inspect_template = 1; + + InspectJobConfig job_config = 3; + } + + // All result fields mentioned below are updated while the job is processing. + message Result { + // Total size in bytes that were processed. + int64 processed_bytes = 1; + + // Estimate of the number of bytes to process. + int64 total_estimated_bytes = 2; + + // Statistics of how many instances of each info type were found during + // inspect job. + repeated InfoTypeStats info_type_stats = 3; + } + + // The configuration used for this job. + RequestedOptions requested_options = 2; + + // A summary of the outcome of this inspect job. + Result result = 3; +} + +// InfoType description. +message InfoTypeDescription { + // Internal name of the infoType. + string name = 1; + + // Human readable form of the infoType name. + string display_name = 2; + + // Which parts of the API supports this InfoType. + repeated InfoTypeSupportedBy supported_by = 3; + + // Description of the infotype. Translated when language is provided in the + // request. + string description = 4; +} + +// Request for the list of infoTypes. +message ListInfoTypesRequest { + // Optional BCP-47 language code for localized infoType friendly + // names. If omitted, or if localized strings are not available, + // en-US strings will be returned. + string language_code = 1; + + // Optional filter to only return infoTypes supported by certain parts of the + // API. Defaults to supported_by=INSPECT. + string filter = 2; +} + +// Response to the ListInfoTypes request. +message ListInfoTypesResponse { + // Set of sensitive infoTypes. + repeated InfoTypeDescription info_types = 1; +} + +// Configuration for a risk analysis job. See +// https://cloud.google.com/dlp/docs/concepts-risk-analysis to learn more. +message RiskAnalysisJobConfig { + // Privacy metric to compute. + PrivacyMetric privacy_metric = 1; + + // Input dataset to compute metrics over. + BigQueryTable source_table = 2; + + // Actions to execute at the completion of the job. Are executed in the order + // provided. + repeated Action actions = 3; +} + +// A column with a semantic tag attached. +message QuasiId { + // Identifies the column. [required] + FieldId field = 1; + + // Semantic tag that identifies what a column contains, to determine which + // statistical model to use to estimate the reidentifiability of each + // value. [required] + oneof tag { + // A column can be tagged with a InfoType to use the relevant public + // dataset as a statistical model of population, if available. We + // currently support US ZIP codes, region codes, ages and genders. + // To programmatically obtain the list of supported InfoTypes, use + // ListInfoTypes with the supported_by=RISK_ANALYSIS filter. + InfoType info_type = 2; + + // A column can be tagged with a custom tag. In this case, the user must + // indicate an auxiliary table that contains statistical information on + // the possible values of this column (below). + string custom_tag = 3; + + // If no semantic tag is indicated, we infer the statistical model from + // the distribution of values in the input data + google.protobuf.Empty inferred = 4; + } +} + +// An auxiliary table containing statistical information on the relative +// frequency of different quasi-identifiers values. It has one or several +// quasi-identifiers columns, and one column that indicates the relative +// frequency of each quasi-identifier tuple. +// If a tuple is present in the data but not in the auxiliary table, the +// corresponding relative frequency is assumed to be zero (and thus, the +// tuple is highly reidentifiable). +message StatisticalTable { + // A quasi-identifier column has a custom_tag, used to know which column + // in the data corresponds to which column in the statistical model. + message QuasiIdentifierField { + FieldId field = 1; + + string custom_tag = 2; + } + + // Auxiliary table location. [required] + BigQueryTable table = 3; + + // Quasi-identifier columns. [required] + repeated QuasiIdentifierField quasi_ids = 1; + + // The relative frequency column must contain a floating-point number + // between 0 and 1 (inclusive). Null values are assumed to be zero. + // [required] + FieldId relative_frequency = 2; +} + +// Privacy metric to compute for reidentification risk analysis. +message PrivacyMetric { + // Compute numerical stats over an individual column, including + // min, max, and quantiles. + message NumericalStatsConfig { + // Field to compute numerical stats on. Supported types are + // integer, float, date, datetime, timestamp, time. + FieldId field = 1; + } + + // Compute numerical stats over an individual column, including + // number of distinct values and value count distribution. + message CategoricalStatsConfig { + // Field to compute categorical stats on. All column types are + // supported except for arrays and structs. However, it may be more + // informative to use NumericalStats when the field type is supported, + // depending on the data. + FieldId field = 1; + } + + // k-anonymity metric, used for analysis of reidentification risk. + message KAnonymityConfig { + // Set of fields to compute k-anonymity over. When multiple fields are + // specified, they are considered a single composite key. Structs and + // repeated data types are not supported; however, nested fields are + // supported so long as they are not structs themselves or nested within + // a repeated field. + repeated FieldId quasi_ids = 1; + + // Optional message indicating that multiple rows might be associated to a + // single individual. If the same entity_id is associated to multiple + // quasi-identifier tuples over distinct rows, we consider the entire + // collection of tuples as the composite quasi-identifier. This collection + // is a multiset: the order in which the different tuples appear in the + // dataset is ignored, but their frequency is taken into account. + // + // Important note: a maximum of 1000 rows can be associated to a single + // entity ID. If more rows are associated with the same entity ID, some + // might be ignored. + EntityId entity_id = 2; + } + + // l-diversity metric, used for analysis of reidentification risk. + message LDiversityConfig { + // Set of quasi-identifiers indicating how equivalence classes are + // defined for the l-diversity computation. When multiple fields are + // specified, they are considered a single composite key. + repeated FieldId quasi_ids = 1; + + // Sensitive field for computing the l-value. + FieldId sensitive_attribute = 2; + } + + // Reidentifiability metric. This corresponds to a risk model similar to what + // is called "journalist risk" in the literature, except the attack dataset is + // statistically modeled instead of being perfectly known. This can be done + // using publicly available data (like the US Census), or using a custom + // statistical model (indicated as one or several BigQuery tables), or by + // extrapolating from the distribution of values in the input dataset. + // A column with a semantic tag attached. + message KMapEstimationConfig { + message TaggedField { + // Identifies the column. [required] + FieldId field = 1; + + // Semantic tag that identifies what a column contains, to determine which + // statistical model to use to estimate the reidentifiability of each + // value. [required] + oneof tag { + // A column can be tagged with a InfoType to use the relevant public + // dataset as a statistical model of population, if available. We + // currently support US ZIP codes, region codes, ages and genders. + // To programmatically obtain the list of supported InfoTypes, use + // ListInfoTypes with the supported_by=RISK_ANALYSIS filter. + InfoType info_type = 2; + + // A column can be tagged with a custom tag. In this case, the user must + // indicate an auxiliary table that contains statistical information on + // the possible values of this column (below). + string custom_tag = 3; + + // If no semantic tag is indicated, we infer the statistical model from + // the distribution of values in the input data + google.protobuf.Empty inferred = 4; + } + } + + // An auxiliary table contains statistical information on the relative + // frequency of different quasi-identifiers values. It has one or several + // quasi-identifiers columns, and one column that indicates the relative + // frequency of each quasi-identifier tuple. + // If a tuple is present in the data but not in the auxiliary table, the + // corresponding relative frequency is assumed to be zero (and thus, the + // tuple is highly reidentifiable). + message AuxiliaryTable { + // A quasi-identifier column has a custom_tag, used to know which column + // in the data corresponds to which column in the statistical model. + message QuasiIdField { + FieldId field = 1; + + string custom_tag = 2; + } + + // Auxiliary table location. [required] + BigQueryTable table = 3; + + // Quasi-identifier columns. [required] + repeated QuasiIdField quasi_ids = 1; + + // The relative frequency column must contain a floating-point number + // between 0 and 1 (inclusive). Null values are assumed to be zero. + // [required] + FieldId relative_frequency = 2; + } + + // Fields considered to be quasi-identifiers. No two columns can have the + // same tag. [required] + repeated TaggedField quasi_ids = 1; + + // ISO 3166-1 alpha-2 region code to use in the statistical modeling. + // Required if no column is tagged with a region-specific InfoType (like + // US_ZIP_5) or a region code. + string region_code = 2; + + // Several auxiliary tables can be used in the analysis. Each custom_tag + // used to tag a quasi-identifiers column must appear in exactly one column + // of one auxiliary table. + repeated AuxiliaryTable auxiliary_tables = 3; + } + + // δ-presence metric, used to estimate how likely it is for an attacker to + // figure out that one given individual appears in a de-identified dataset. + // Similarly to the k-map metric, we cannot compute δ-presence exactly without + // knowing the attack dataset, so we use a statistical model instead. + message DeltaPresenceEstimationConfig { + // Fields considered to be quasi-identifiers. No two fields can have the + // same tag. [required] + repeated QuasiId quasi_ids = 1; + + // ISO 3166-1 alpha-2 region code to use in the statistical modeling. + // Required if no column is tagged with a region-specific InfoType (like + // US_ZIP_5) or a region code. + string region_code = 2; + + // Several auxiliary tables can be used in the analysis. Each custom_tag + // used to tag a quasi-identifiers field must appear in exactly one + // field of one auxiliary table. + repeated StatisticalTable auxiliary_tables = 3; + } + + oneof type { + NumericalStatsConfig numerical_stats_config = 1; + + CategoricalStatsConfig categorical_stats_config = 2; + + KAnonymityConfig k_anonymity_config = 3; + + LDiversityConfig l_diversity_config = 4; + + KMapEstimationConfig k_map_estimation_config = 5; + + DeltaPresenceEstimationConfig delta_presence_estimation_config = 6; + } +} + +// Result of a risk analysis operation request. +message AnalyzeDataSourceRiskDetails { + // Result of the numerical stats computation. + message NumericalStatsResult { + // Minimum value appearing in the column. + Value min_value = 1; + + // Maximum value appearing in the column. + Value max_value = 2; + + // List of 99 values that partition the set of field values into 100 equal + // sized buckets. + repeated Value quantile_values = 4; + } + + // Result of the categorical stats computation. + message CategoricalStatsResult { + message CategoricalStatsHistogramBucket { + // Lower bound on the value frequency of the values in this bucket. + int64 value_frequency_lower_bound = 1; + + // Upper bound on the value frequency of the values in this bucket. + int64 value_frequency_upper_bound = 2; + + // Total number of values in this bucket. + int64 bucket_size = 3; + + // Sample of value frequencies in this bucket. The total number of + // values returned per bucket is capped at 20. + repeated ValueFrequency bucket_values = 4; + + // Total number of distinct values in this bucket. + int64 bucket_value_count = 5; + } + + // Histogram of value frequencies in the column. + repeated CategoricalStatsHistogramBucket value_frequency_histogram_buckets = 5; + } + + // Result of the k-anonymity computation. + message KAnonymityResult { + // The set of columns' values that share the same ldiversity value + message KAnonymityEquivalenceClass { + // Set of values defining the equivalence class. One value per + // quasi-identifier column in the original KAnonymity metric message. + // The order is always the same as the original request. + repeated Value quasi_ids_values = 1; + + // Size of the equivalence class, for example number of rows with the + // above set of values. + int64 equivalence_class_size = 2; + } + + message KAnonymityHistogramBucket { + // Lower bound on the size of the equivalence classes in this bucket. + int64 equivalence_class_size_lower_bound = 1; + + // Upper bound on the size of the equivalence classes in this bucket. + int64 equivalence_class_size_upper_bound = 2; + + // Total number of equivalence classes in this bucket. + int64 bucket_size = 3; + + // Sample of equivalence classes in this bucket. The total number of + // classes returned per bucket is capped at 20. + repeated KAnonymityEquivalenceClass bucket_values = 4; + + // Total number of distinct equivalence classes in this bucket. + int64 bucket_value_count = 5; + } + + // Histogram of k-anonymity equivalence classes. + repeated KAnonymityHistogramBucket equivalence_class_histogram_buckets = 5; + } + + // Result of the l-diversity computation. + message LDiversityResult { + // The set of columns' values that share the same ldiversity value. + message LDiversityEquivalenceClass { + // Quasi-identifier values defining the k-anonymity equivalence + // class. The order is always the same as the original request. + repeated Value quasi_ids_values = 1; + + // Size of the k-anonymity equivalence class. + int64 equivalence_class_size = 2; + + // Number of distinct sensitive values in this equivalence class. + int64 num_distinct_sensitive_values = 3; + + // Estimated frequencies of top sensitive values. + repeated ValueFrequency top_sensitive_values = 4; + } + + message LDiversityHistogramBucket { + // Lower bound on the sensitive value frequencies of the equivalence + // classes in this bucket. + int64 sensitive_value_frequency_lower_bound = 1; + + // Upper bound on the sensitive value frequencies of the equivalence + // classes in this bucket. + int64 sensitive_value_frequency_upper_bound = 2; + + // Total number of equivalence classes in this bucket. + int64 bucket_size = 3; + + // Sample of equivalence classes in this bucket. The total number of + // classes returned per bucket is capped at 20. + repeated LDiversityEquivalenceClass bucket_values = 4; + + // Total number of distinct equivalence classes in this bucket. + int64 bucket_value_count = 5; + } + + // Histogram of l-diversity equivalence class sensitive value frequencies. + repeated LDiversityHistogramBucket sensitive_value_frequency_histogram_buckets = 5; + } + + // Result of the reidentifiability analysis. Note that these results are an + // estimation, not exact values. + message KMapEstimationResult { + // A tuple of values for the quasi-identifier columns. + message KMapEstimationQuasiIdValues { + // The quasi-identifier values. + repeated Value quasi_ids_values = 1; + + // The estimated anonymity for these quasi-identifier values. + int64 estimated_anonymity = 2; + } + + // A KMapEstimationHistogramBucket message with the following values: + // min_anonymity: 3 + // max_anonymity: 5 + // frequency: 42 + // means that there are 42 records whose quasi-identifier values correspond + // to 3, 4 or 5 people in the overlying population. An important particular + // case is when min_anonymity = max_anonymity = 1: the frequency field then + // corresponds to the number of uniquely identifiable records. + message KMapEstimationHistogramBucket { + // Always positive. + int64 min_anonymity = 1; + + // Always greater than or equal to min_anonymity. + int64 max_anonymity = 2; + + // Number of records within these anonymity bounds. + int64 bucket_size = 5; + + // Sample of quasi-identifier tuple values in this bucket. The total + // number of classes returned per bucket is capped at 20. + repeated KMapEstimationQuasiIdValues bucket_values = 6; + + // Total number of distinct quasi-identifier tuple values in this bucket. + int64 bucket_value_count = 7; + } + + // The intervals [min_anonymity, max_anonymity] do not overlap. If a value + // doesn't correspond to any such interval, the associated frequency is + // zero. For example, the following records: + // {min_anonymity: 1, max_anonymity: 1, frequency: 17} + // {min_anonymity: 2, max_anonymity: 3, frequency: 42} + // {min_anonymity: 5, max_anonymity: 10, frequency: 99} + // mean that there are no record with an estimated anonymity of 4, 5, or + // larger than 10. + repeated KMapEstimationHistogramBucket k_map_estimation_histogram = 1; + } + + // Result of the δ-presence computation. Note that these results are an + // estimation, not exact values. + message DeltaPresenceEstimationResult { + // A tuple of values for the quasi-identifier columns. + message DeltaPresenceEstimationQuasiIdValues { + // The quasi-identifier values. + repeated Value quasi_ids_values = 1; + + // The estimated probability that a given individual sharing these + // quasi-identifier values is in the dataset. This value, typically called + // δ, is the ratio between the number of records in the dataset with these + // quasi-identifier values, and the total number of individuals (inside + // *and* outside the dataset) with these quasi-identifier values. + // For example, if there are 15 individuals in the dataset who share the + // same quasi-identifier values, and an estimated 100 people in the entire + // population with these values, then δ is 0.15. + double estimated_probability = 2; + } + + // A DeltaPresenceEstimationHistogramBucket message with the following + // values: + // min_probability: 0.1 + // max_probability: 0.2 + // frequency: 42 + // means that there are 42 records for which δ is in [0.1, 0.2). An + // important particular case is when min_probability = max_probability = 1: + // then, every individual who shares this quasi-identifier combination is in + // the dataset. + message DeltaPresenceEstimationHistogramBucket { + // Between 0 and 1. + double min_probability = 1; + + // Always greater than or equal to min_probability. + double max_probability = 2; + + // Number of records within these probability bounds. + int64 bucket_size = 5; + + // Sample of quasi-identifier tuple values in this bucket. The total + // number of classes returned per bucket is capped at 20. + repeated DeltaPresenceEstimationQuasiIdValues bucket_values = 6; + + // Total number of distinct quasi-identifier tuple values in this bucket. + int64 bucket_value_count = 7; + } + + // The intervals [min_probability, max_probability) do not overlap. If a + // value doesn't correspond to any such interval, the associated frequency + // is zero. For example, the following records: + // {min_probability: 0, max_probability: 0.1, frequency: 17} + // {min_probability: 0.2, max_probability: 0.3, frequency: 42} + // {min_probability: 0.3, max_probability: 0.4, frequency: 99} + // mean that there are no record with an estimated probability in [0.1, 0.2) + // nor larger or equal to 0.4. + repeated DeltaPresenceEstimationHistogramBucket delta_presence_estimation_histogram = 1; + } + + // Privacy metric to compute. + PrivacyMetric requested_privacy_metric = 1; + + // Input dataset to compute metrics over. + BigQueryTable requested_source_table = 2; + + // Values associated with this metric. + oneof result { + NumericalStatsResult numerical_stats_result = 3; + + CategoricalStatsResult categorical_stats_result = 4; + + KAnonymityResult k_anonymity_result = 5; + + LDiversityResult l_diversity_result = 6; + + KMapEstimationResult k_map_estimation_result = 7; + + DeltaPresenceEstimationResult delta_presence_estimation_result = 9; + } +} + +// A value of a field, including its frequency. +message ValueFrequency { + // A value contained in the field in question. + Value value = 1; + + // How many times the value is contained in the field. + int64 count = 2; +} + +// Set of primitive values supported by the system. +// Note that for the purposes of inspection or transformation, the number +// of bytes considered to comprise a 'Value' is based on its representation +// as a UTF-8 encoded string. For example, if 'integer_value' is set to +// 123456789, the number of bytes would be counted as 9, even though an +// int64 only holds up to 8 bytes of data. +message Value { + oneof type { + int64 integer_value = 1; + + double float_value = 2; + + string string_value = 3; + + bool boolean_value = 4; + + google.protobuf.Timestamp timestamp_value = 5; + + google.type.TimeOfDay time_value = 6; + + google.type.Date date_value = 7; + + google.type.DayOfWeek day_of_week_value = 8; + } +} + +// Message for infoType-dependent details parsed from quote. +message QuoteInfo { + // Object representation of the quote. + oneof parsed_quote { + // The date time indicated by the quote. + DateTime date_time = 2; + } +} + +// Message for a date time object. +// e.g. 2018-01-01, 5th August. +message DateTime { + message TimeZone { + // Set only if the offset can be determined. Positive for time ahead of UTC. + // E.g. For "UTC-9", this value is -540. + int32 offset_minutes = 1; + } + + // One or more of the following must be set. All fields are optional, but + // when set must be valid date or time values. + google.type.Date date = 1; + + google.type.DayOfWeek day_of_week = 2; + + google.type.TimeOfDay time = 3; + + TimeZone time_zone = 4; +} + +// The configuration that controls how the data will change. +message DeidentifyConfig { + oneof transformation { + // Treat the dataset as free-form text and apply the same free text + // transformation everywhere. + InfoTypeTransformations info_type_transformations = 1; + + // Treat the dataset as structured. Transformations can be applied to + // specific locations within structured datasets, such as transforming + // a column within a table. + RecordTransformations record_transformations = 2; + } +} + +// A rule for transforming a value. +message PrimitiveTransformation { + oneof transformation { + ReplaceValueConfig replace_config = 1; + + RedactConfig redact_config = 2; + + CharacterMaskConfig character_mask_config = 3; + + CryptoReplaceFfxFpeConfig crypto_replace_ffx_fpe_config = 4; + + FixedSizeBucketingConfig fixed_size_bucketing_config = 5; + + BucketingConfig bucketing_config = 6; + + ReplaceWithInfoTypeConfig replace_with_info_type_config = 7; + + TimePartConfig time_part_config = 8; + + CryptoHashConfig crypto_hash_config = 9; + + DateShiftConfig date_shift_config = 11; + + CryptoDeterministicConfig crypto_deterministic_config = 12; + } +} + +// For use with `Date`, `Timestamp`, and `TimeOfDay`, extract or preserve a +// portion of the value. +message TimePartConfig { + enum TimePart { + TIME_PART_UNSPECIFIED = 0; + + // [0-9999] + YEAR = 1; + + // [1-12] + MONTH = 2; + + // [1-31] + DAY_OF_MONTH = 3; + + // [1-7] + DAY_OF_WEEK = 4; + + // [1-52] + WEEK_OF_YEAR = 5; + + // [0-23] + HOUR_OF_DAY = 6; + } + + TimePart part_to_extract = 1; +} + +// Pseudonymization method that generates surrogates via cryptographic hashing. +// Uses SHA-256. +// The key size must be either 32 or 64 bytes. +// Outputs a base64 encoded representation of the hashed output +// (for example, L7k0BHmF1ha5U3NfGykjro4xWi1MPVQPjhMAZbSV9mM=). +// Currently, only string and integer values can be hashed. +// See https://cloud.google.com/dlp/docs/pseudonymization to learn more. +message CryptoHashConfig { + // The key used by the hash function. + CryptoKey crypto_key = 1; +} + +// Pseudonymization method that generates deterministic encryption for the given +// input. Outputs a base64 encoded representation of the encrypted output. +// Uses AES-SIV based on the RFC https://tools.ietf.org/html/rfc5297. +message CryptoDeterministicConfig { + // The key used by the encryption function. + CryptoKey crypto_key = 1; + + // The custom info type to annotate the surrogate with. + // This annotation will be applied to the surrogate by prefixing it with + // the name of the custom info type followed by the number of + // characters comprising the surrogate. The following scheme defines the + // format: <info type name>(<surrogate character count>):<surrogate> + // + // For example, if the name of custom info type is 'MY_TOKEN_INFO_TYPE' and + // the surrogate is 'abc', the full replacement value + // will be: 'MY_TOKEN_INFO_TYPE(3):abc' + // + // This annotation identifies the surrogate when inspecting content using the + // custom info type 'Surrogate'. This facilitates reversal of the + // surrogate when it occurs in free text. + // + // In order for inspection to work properly, the name of this info type must + // not occur naturally anywhere in your data; otherwise, inspection may either + // + // - reverse a surrogate that does not correspond to an actual identifier + // - be unable to parse the surrogate and result in an error + // + // Therefore, choose your custom info type name carefully after considering + // what your data looks like. One way to select a name that has a high chance + // of yielding reliable detection is to include one or more unicode characters + // that are highly improbable to exist in your data. + // For example, assuming your data is entered from a regular ASCII keyboard, + // the symbol with the hex code point 29DD might be used like so: + // ⧝MY_TOKEN_TYPE + InfoType surrogate_info_type = 2; + + // Optional. A context may be used for higher security and maintaining + // referential integrity such that the same identifier in two different + // contexts will be given a distinct surrogate. The context is appended to + // plaintext value being encrypted. On decryption the provided context is + // validated against the value used during encryption. If a context was + // provided during encryption, same context must be provided during decryption + // as well. + // + // If the context is not set, plaintext would be used as is for encryption. + // If the context is set but: + // + // 1. there is no record present when transforming a given value or + // 2. the field is not present when transforming a given value, + // + // plaintext would be used as is for encryption. + // + // Note that case (1) is expected when an `InfoTypeTransformation` is + // applied to both structured and non-structured `ContentItem`s. + FieldId context = 3; +} + +// Replace each input value with a given `Value`. +message ReplaceValueConfig { + // Value to replace it with. + Value new_value = 1; +} + +// Replace each matching finding with the name of the info_type. +message ReplaceWithInfoTypeConfig { + +} + +// Redact a given value. For example, if used with an `InfoTypeTransformation` +// transforming PHONE_NUMBER, and input 'My phone number is 206-555-0123', the +// output would be 'My phone number is '. +message RedactConfig { + +} + +// Characters to skip when doing deidentification of a value. These will be left +// alone and skipped. +message CharsToIgnore { + enum CommonCharsToIgnore { + COMMON_CHARS_TO_IGNORE_UNSPECIFIED = 0; + + // 0-9 + NUMERIC = 1; + + // A-Z + ALPHA_UPPER_CASE = 2; + + // a-z + ALPHA_LOWER_CASE = 3; + + // US Punctuation, one of !"#$%&'()*+,-./:;<=>?@[\]^_`{|}~ + PUNCTUATION = 4; + + // Whitespace character, one of [ \t\n\x0B\f\r] + WHITESPACE = 5; + } + + oneof characters { + string characters_to_skip = 1; + + CommonCharsToIgnore common_characters_to_ignore = 2; + } +} + +// Partially mask a string by replacing a given number of characters with a +// fixed character. Masking can start from the beginning or end of the string. +// This can be used on data of any type (numbers, longs, and so on) and when +// de-identifying structured data we'll attempt to preserve the original data's +// type. (This allows you to take a long like 123 and modify it to a string like +// **3. +message CharacterMaskConfig { + // Character to mask the sensitive values—for example, "*" for an + // alphabetic string such as name, or "0" for a numeric string such as ZIP + // code or credit card number. String must have length 1. If not supplied, we + // will default to "*" for strings, 0 for digits. + string masking_character = 1; + + // Number of characters to mask. If not set, all matching chars will be + // masked. Skipped characters do not count towards this tally. + int32 number_to_mask = 2; + + // Mask characters in reverse order. For example, if `masking_character` is + // '0', number_to_mask is 14, and `reverse_order` is false, then + // 1234-5678-9012-3456 -> 00000000000000-3456 + // If `masking_character` is '*', `number_to_mask` is 3, and `reverse_order` + // is true, then 12345 -> 12*** + bool reverse_order = 3; + + // When masking a string, items in this list will be skipped when replacing. + // For example, if your string is 555-555-5555 and you ask us to skip `-` and + // mask 5 chars with * we would produce ***-*55-5555. + repeated CharsToIgnore characters_to_ignore = 4; +} + +// Buckets values based on fixed size ranges. The +// Bucketing transformation can provide all of this functionality, +// but requires more configuration. This message is provided as a convenience to +// the user for simple bucketing strategies. +// +// The transformed value will be a hyphenated string of +// <lower_bound>-<upper_bound>, i.e if lower_bound = 10 and upper_bound = 20 +// all values that are within this bucket will be replaced with "10-20". +// +// This can be used on data of type: double, long. +// +// If the bound Value type differs from the type of data +// being transformed, we will first attempt converting the type of the data to +// be transformed to match the type of the bound before comparing. +// +// See https://cloud.google.com/dlp/docs/concepts-bucketing to learn more. +message FixedSizeBucketingConfig { + // Lower bound value of buckets. All values less than `lower_bound` are + // grouped together into a single bucket; for example if `lower_bound` = 10, + // then all values less than 10 are replaced with the value “-10”. [Required]. + Value lower_bound = 1; + + // Upper bound value of buckets. All values greater than upper_bound are + // grouped together into a single bucket; for example if `upper_bound` = 89, + // then all values greater than 89 are replaced with the value “89+”. + // [Required]. + Value upper_bound = 2; + + // Size of each bucket (except for minimum and maximum buckets). So if + // `lower_bound` = 10, `upper_bound` = 89, and `bucket_size` = 10, then the + // following buckets would be used: -10, 10-20, 20-30, 30-40, 40-50, 50-60, + // 60-70, 70-80, 80-89, 89+. Precision up to 2 decimals works. [Required]. + double bucket_size = 3; +} + +// Generalization function that buckets values based on ranges. The ranges and +// replacement values are dynamically provided by the user for custom behavior, +// such as 1-30 -> LOW 31-65 -> MEDIUM 66-100 -> HIGH +// This can be used on +// data of type: number, long, string, timestamp. +// If the bound `Value` type differs from the type of data being transformed, we +// will first attempt converting the type of the data to be transformed to match +// the type of the bound before comparing. +// See https://cloud.google.com/dlp/docs/concepts-bucketing to learn more. +message BucketingConfig { + // Bucket is represented as a range, along with replacement values. + message Bucket { + // Lower bound of the range, inclusive. Type should be the same as max if + // used. + Value min = 1; + + // Upper bound of the range, exclusive; type must match min. + Value max = 2; + + // Replacement value for this bucket. If not provided + // the default behavior will be to hyphenate the min-max range. + Value replacement_value = 3; + } + + // Set of buckets. Ranges must be non-overlapping. + repeated Bucket buckets = 1; +} + +// Replaces an identifier with a surrogate using Format Preserving Encryption +// (FPE) with the FFX mode of operation; however when used in the +// `ReidentifyContent` API method, it serves the opposite function by reversing +// the surrogate back into the original identifier. The identifier must be +// encoded as ASCII. For a given crypto key and context, the same identifier +// will be replaced with the same surrogate. Identifiers must be at least two +// characters long. In the case that the identifier is the empty string, it will +// be skipped. See https://cloud.google.com/dlp/docs/pseudonymization to learn +// more. +// +// Note: We recommend using CryptoDeterministicConfig for all use cases which +// do not require preserving the input alphabet space and size, plus warrant +// referential integrity. +message CryptoReplaceFfxFpeConfig { + // These are commonly used subsets of the alphabet that the FFX mode + // natively supports. In the algorithm, the alphabet is selected using + // the "radix". Therefore each corresponds to particular radix. + enum FfxCommonNativeAlphabet { + FFX_COMMON_NATIVE_ALPHABET_UNSPECIFIED = 0; + + // [0-9] (radix of 10) + NUMERIC = 1; + + // [0-9A-F] (radix of 16) + HEXADECIMAL = 2; + + // [0-9A-Z] (radix of 36) + UPPER_CASE_ALPHA_NUMERIC = 3; + + // [0-9A-Za-z] (radix of 62) + ALPHA_NUMERIC = 4; + } + + // The key used by the encryption algorithm. [required] + CryptoKey crypto_key = 1; + + // The 'tweak', a context may be used for higher security since the same + // identifier in two different contexts won't be given the same surrogate. If + // the context is not set, a default tweak will be used. + // + // If the context is set but: + // + // 1. there is no record present when transforming a given value or + // 1. the field is not present when transforming a given value, + // + // a default tweak will be used. + // + // Note that case (1) is expected when an `InfoTypeTransformation` is + // applied to both structured and non-structured `ContentItem`s. + // Currently, the referenced field may be of value type integer or string. + // + // The tweak is constructed as a sequence of bytes in big endian byte order + // such that: + // + // - a 64 bit integer is encoded followed by a single byte of value 1 + // - a string is encoded in UTF-8 format followed by a single byte of value 2 + FieldId context = 2; + + oneof alphabet { + FfxCommonNativeAlphabet common_alphabet = 4; + + // This is supported by mapping these to the alphanumeric characters + // that the FFX mode natively supports. This happens before/after + // encryption/decryption. + // Each character listed must appear only once. + // Number of characters must be in the range [2, 62]. + // This must be encoded as ASCII. + // The order of characters does not matter. + string custom_alphabet = 5; + + // The native way to select the alphabet. Must be in the range [2, 62]. + int32 radix = 6; + } + + // The custom infoType to annotate the surrogate with. + // This annotation will be applied to the surrogate by prefixing it with + // the name of the custom infoType followed by the number of + // characters comprising the surrogate. The following scheme defines the + // format: info_type_name(surrogate_character_count):surrogate + // + // For example, if the name of custom infoType is 'MY_TOKEN_INFO_TYPE' and + // the surrogate is 'abc', the full replacement value + // will be: 'MY_TOKEN_INFO_TYPE(3):abc' + // + // This annotation identifies the surrogate when inspecting content using the + // custom infoType + // [`SurrogateType`](/dlp/docs/reference/rest/v2/InspectConfig#surrogatetype). + // This facilitates reversal of the surrogate when it occurs in free text. + // + // In order for inspection to work properly, the name of this infoType must + // not occur naturally anywhere in your data; otherwise, inspection may + // find a surrogate that does not correspond to an actual identifier. + // Therefore, choose your custom infoType name carefully after considering + // what your data looks like. One way to select a name that has a high chance + // of yielding reliable detection is to include one or more unicode characters + // that are highly improbable to exist in your data. + // For example, assuming your data is entered from a regular ASCII keyboard, + // the symbol with the hex code point 29DD might be used like so: + // ⧝MY_TOKEN_TYPE + InfoType surrogate_info_type = 8; +} + +// This is a data encryption key (DEK) (as opposed to +// a key encryption key (KEK) stored by KMS). +// When using KMS to wrap/unwrap DEKs, be sure to set an appropriate +// IAM policy on the KMS CryptoKey (KEK) to ensure an attacker cannot +// unwrap the data crypto key. +message CryptoKey { + oneof source { + TransientCryptoKey transient = 1; + + UnwrappedCryptoKey unwrapped = 2; + + KmsWrappedCryptoKey kms_wrapped = 3; + } +} + +// Use this to have a random data crypto key generated. +// It will be discarded after the request finishes. +message TransientCryptoKey { + // Name of the key. [required] + // This is an arbitrary string used to differentiate different keys. + // A unique key is generated per name: two separate `TransientCryptoKey` + // protos share the same generated key if their names are the same. + // When the data crypto key is generated, this name is not used in any way + // (repeating the api call will result in a different key being generated). + string name = 1; +} + +// Parts of the APIs which use certain infoTypes. +enum InfoTypeSupportedBy { + ENUM_TYPE_UNSPECIFIED = 0; + + // Supported by the inspect operations. + INSPECT = 1; + + // Supported by the risk analysis operations. + RISK_ANALYSIS = 2; +} + +// Using raw keys is prone to security risks due to accidentally +// leaking the key. Choose another type of key if possible. +message UnwrappedCryptoKey { + // A 128/192/256 bit key. [required] + bytes key = 1; +} + +// Include to use an existing data crypto key wrapped by KMS. +// The wrapped key must be a 128/192/256 bit key. +// Authorization requires the following IAM permissions when sending a request +// to perform a crypto transformation using a kms-wrapped crypto key: +// dlp.kms.encrypt +message KmsWrappedCryptoKey { + // The wrapped data crypto key. [required] + bytes wrapped_key = 1; + + // The resource name of the KMS CryptoKey to use for unwrapping. [required] + string crypto_key_name = 2; +} + +// Shifts dates by random number of days, with option to be consistent for the +// same context. See https://cloud.google.com/dlp/docs/concepts-date-shifting +// to learn more. +message DateShiftConfig { + // Range of shift in days. Actual shift will be selected at random within this + // range (inclusive ends). Negative means shift to earlier in time. Must not + // be more than 365250 days (1000 years) each direction. + // + // For example, 3 means shift date to at most 3 days into the future. + // [Required] + int32 upper_bound_days = 1; + + // For example, -5 means shift date to at most 5 days back in the past. + // [Required] + int32 lower_bound_days = 2; + + // Points to the field that contains the context, for example, an entity id. + // If set, must also set method. If set, shift will be consistent for the + // given context. + FieldId context = 3; + + // Method for calculating shift that takes context into consideration. If + // set, must also set context. Can only be applied to table items. + oneof method { + // Causes the shift to be computed based on this key and the context. This + // results in the same shift for the same context and crypto_key. + CryptoKey crypto_key = 4; + } +} + +// A type of transformation that will scan unstructured text and +// apply various `PrimitiveTransformation`s to each finding, where the +// transformation is applied to only values that were identified as a specific +// info_type. +message InfoTypeTransformations { + // A transformation to apply to text that is identified as a specific + // info_type. + message InfoTypeTransformation { + // InfoTypes to apply the transformation to. An empty list will cause + // this transformation to apply to all findings that correspond to + // infoTypes that were requested in `InspectConfig`. + repeated InfoType info_types = 1; + + // Primitive transformation to apply to the infoType. [required] + PrimitiveTransformation primitive_transformation = 2; + } + + // Transformation for each infoType. Cannot specify more than one + // for a given infoType. [required] + repeated InfoTypeTransformation transformations = 1; +} + +// The transformation to apply to the field. +message FieldTransformation { + // Input field(s) to apply the transformation to. [required] + repeated FieldId fields = 1; + + // Only apply the transformation if the condition evaluates to true for the + // given `RecordCondition`. The conditions are allowed to reference fields + // that are not used in the actual transformation. [optional] + // + // Example Use Cases: + // + // - Apply a different bucket transformation to an age column if the zip code + // column for the same record is within a specific range. + // - Redact a field if the date of birth field is greater than 85. + RecordCondition condition = 3; + + // Transformation to apply. [required] + oneof transformation { + // Apply the transformation to the entire field. + PrimitiveTransformation primitive_transformation = 4; + + // Treat the contents of the field as free text, and selectively + // transform content that matches an `InfoType`. + InfoTypeTransformations info_type_transformations = 5; + } +} + +// A type of transformation that is applied over structured data such as a +// table. +message RecordTransformations { + // Transform the record by applying various field transformations. + repeated FieldTransformation field_transformations = 1; + + // Configuration defining which records get suppressed entirely. Records that + // match any suppression rule are omitted from the output [optional]. + repeated RecordSuppression record_suppressions = 2; +} + +// Configuration to suppress records whose suppression conditions evaluate to +// true. +message RecordSuppression { + // A condition that when it evaluates to true will result in the record being + // evaluated to be suppressed from the transformed content. + RecordCondition condition = 1; +} + +// A condition for determining whether a transformation should be applied to +// a field. +message RecordCondition { + // The field type of `value` and `field` do not need to match to be + // considered equal, but not all comparisons are possible. + // EQUAL_TO and NOT_EQUAL_TO attempt to compare even with incompatible types, + // but all other comparisons are invalid with incompatible types. + // A `value` of type: + // + // - `string` can be compared against all other types + // - `boolean` can only be compared against other booleans + // - `integer` can be compared against doubles or a string if the string value + // can be parsed as an integer. + // - `double` can be compared against integers or a string if the string can + // be parsed as a double. + // - `Timestamp` can be compared against strings in RFC 3339 date string + // format. + // - `TimeOfDay` can be compared against timestamps and strings in the format + // of 'HH:mm:ss'. + // + // If we fail to compare do to type mismatch, a warning will be given and + // the condition will evaluate to false. + message Condition { + // Field within the record this condition is evaluated against. [required] + FieldId field = 1; + + // Operator used to compare the field or infoType to the value. [required] + RelationalOperator operator = 3; + + // Value to compare against. [Required, except for `EXISTS` tests.] + Value value = 4; + } + + // A collection of conditions. + message Conditions { + repeated Condition conditions = 1; + } + + // An expression, consisting or an operator and conditions. + message Expressions { + enum LogicalOperator { + LOGICAL_OPERATOR_UNSPECIFIED = 0; + + AND = 1; + } + + // The operator to apply to the result of conditions. Default and currently + // only supported value is `AND`. + LogicalOperator logical_operator = 1; + + oneof type { + Conditions conditions = 3; + } + } + + // An expression. + Expressions expressions = 3; +} + +// Overview of the modifications that occurred. +message TransformationOverview { + // Total size in bytes that were transformed in some way. + int64 transformed_bytes = 2; + + // Transformations applied to the dataset. + repeated TransformationSummary transformation_summaries = 3; +} + +// Summary of a single transformation. +// Only one of 'transformation', 'field_transformation', or 'record_suppress' +// will be set. +message TransformationSummary { + // A collection that informs the user the number of times a particular + // `TransformationResultCode` and error details occurred. + message SummaryResult { + int64 count = 1; + + TransformationResultCode code = 2; + + // A place for warnings or errors to show up if a transformation didn't + // work as expected. + string details = 3; + } + + // Possible outcomes of transformations. + enum TransformationResultCode { + TRANSFORMATION_RESULT_CODE_UNSPECIFIED = 0; + + SUCCESS = 1; + + ERROR = 2; + } + + // Set if the transformation was limited to a specific InfoType. + InfoType info_type = 1; + + // Set if the transformation was limited to a specific FieldId. + FieldId field = 2; + + // The specific transformation these stats apply to. + PrimitiveTransformation transformation = 3; + + // The field transformation that was applied. + // If multiple field transformations are requested for a single field, + // this list will contain all of them; otherwise, only one is supplied. + repeated FieldTransformation field_transformations = 5; + + // The specific suppression option these stats apply to. + RecordSuppression record_suppress = 6; + + repeated SummaryResult results = 4; + + // Total size in bytes that were transformed in some way. + int64 transformed_bytes = 7; +} + +// Schedule for triggeredJobs. +message Schedule { + oneof option { + // With this option a job is started a regular periodic basis. For + // example: every day (86400 seconds). + // + // A scheduled start time will be skipped if the previous + // execution has not ended when its scheduled time occurs. + // + // This value must be set to a time duration greater than or equal + // to 1 day and can be no longer than 60 days. + google.protobuf.Duration recurrence_period_duration = 1; + } +} + +// The inspectTemplate contains a configuration (set of types of sensitive data +// to be detected) to be used anywhere you otherwise would normally specify +// InspectConfig. See https://cloud.google.com/dlp/docs/concepts-templates +// to learn more. +message InspectTemplate { + // The template name. Output only. + // + // The template will have one of the following formats: + // `projects/PROJECT_ID/inspectTemplates/TEMPLATE_ID` OR + // `organizations/ORGANIZATION_ID/inspectTemplates/TEMPLATE_ID` + string name = 1; + + // Display name (max 256 chars). + string display_name = 2; + + // Short description (max 256 chars). + string description = 3; + + // The creation timestamp of a inspectTemplate, output only field. + google.protobuf.Timestamp create_time = 4; + + // The last update timestamp of a inspectTemplate, output only field. + google.protobuf.Timestamp update_time = 5; + + // The core content of the template. Configuration of the scanning process. + InspectConfig inspect_config = 6; +} + +// The DeidentifyTemplates contains instructions on how to deidentify content. +// See https://cloud.google.com/dlp/docs/concepts-templates to learn more. +message DeidentifyTemplate { + // The template name. Output only. + // + // The template will have one of the following formats: + // `projects/PROJECT_ID/deidentifyTemplates/TEMPLATE_ID` OR + // `organizations/ORGANIZATION_ID/deidentifyTemplates/TEMPLATE_ID` + string name = 1; + + // Display name (max 256 chars). + string display_name = 2; + + // Short description (max 256 chars). + string description = 3; + + // The creation timestamp of a inspectTemplate, output only field. + google.protobuf.Timestamp create_time = 4; + + // The last update timestamp of a inspectTemplate, output only field. + google.protobuf.Timestamp update_time = 5; + + // ///////////// // The core content of the template // /////////////// + DeidentifyConfig deidentify_config = 6; +} + +// Details information about an error encountered during job execution or +// the results of an unsuccessful activation of the JobTrigger. +// Output only field. +message Error { + google.rpc.Status details = 1; + + // The times the error occurred. + repeated google.protobuf.Timestamp timestamps = 2; +} + +// Contains a configuration to make dlp api calls on a repeating basis. +// See https://cloud.google.com/dlp/docs/concepts-job-triggers to learn more. +message JobTrigger { + // What event needs to occur for a new job to be started. + message Trigger { + oneof trigger { + // Create a job on a repeating basis based on the elapse of time. + Schedule schedule = 1; + } + } + + // Whether the trigger is currently active. If PAUSED or CANCELLED, no jobs + // will be created with this configuration. The service may automatically + // pause triggers experiencing frequent errors. To restart a job, set the + // status to HEALTHY after correcting user errors. + enum Status { + STATUS_UNSPECIFIED = 0; + + // Trigger is healthy. + HEALTHY = 1; + + // Trigger is temporarily paused. + PAUSED = 2; + + // Trigger is cancelled and can not be resumed. + CANCELLED = 3; + } + + // Unique resource name for the triggeredJob, assigned by the service when the + // triggeredJob is created, for example + // `projects/dlp-test-project/triggeredJobs/53234423`. + string name = 1; + + // Display name (max 100 chars) + string display_name = 2; + + // User provided description (max 256 chars) + string description = 3; + + // The configuration details for the specific type of job to run. + oneof job { + InspectJobConfig inspect_job = 4; + } + + // A list of triggers which will be OR'ed together. Only one in the list + // needs to trigger for a job to be started. The list may contain only + // a single Schedule trigger and must have at least one object. + repeated Trigger triggers = 5; + + // A stream of errors encountered when the trigger was activated. Repeated + // errors may result in the JobTrigger automatically being paused. + // Will return the last 100 errors. Whenever the JobTrigger is modified + // this list will be cleared. Output only field. + repeated Error errors = 6; + + // The creation timestamp of a triggeredJob, output only field. + google.protobuf.Timestamp create_time = 7; + + // The last update timestamp of a triggeredJob, output only field. + google.protobuf.Timestamp update_time = 8; + + // The timestamp of the last time this trigger executed, output only field. + google.protobuf.Timestamp last_run_time = 9; + + // A status for this trigger. [required] + Status status = 10; +} + +// A task to execute on the completion of a job. +// See https://cloud.google.com/dlp/docs/concepts-actions to learn more. +message Action { + // If set, the detailed findings will be persisted to the specified + // OutputStorageConfig. Only a single instance of this action can be + // specified. + // Compatible with: Inspect, Risk + message SaveFindings { + OutputStorageConfig output_config = 1; + } + + // Publish the results of a DlpJob to a pub sub channel. + // Compatible with: Inspect, Risk + message PublishToPubSub { + // Cloud Pub/Sub topic to send notifications to. The topic must have given + // publishing access rights to the DLP API service account executing + // the long running DlpJob sending the notifications. + // Format is projects/{project}/topics/{topic}. + string topic = 1; + } + + // Publish the result summary of a DlpJob to the Cloud Security + // Command Center (CSCC Alpha). + // This action is only available for projects which are parts of + // an organization and whitelisted for the alpha Cloud Security Command + // Center. + // The action will publish count of finding instances and their info types. + // The summary of findings will be persisted in CSCC and are governed by CSCC + // service-specific policy, see https://cloud.google.com/terms/service-terms + // Only a single instance of this action can be specified. + // Compatible with: Inspect + message PublishSummaryToCscc { + + } + + // Enable email notification to project owners and editors on jobs's + // completion/failure. + message JobNotificationEmails { + + } + + oneof action { + // Save resulting findings in a provided location. + SaveFindings save_findings = 1; + + // Publish a notification to a pubsub topic. + PublishToPubSub pub_sub = 2; + + // Publish summary to Cloud Security Command Center (Alpha). + PublishSummaryToCscc publish_summary_to_cscc = 3; + + // Enable email notification to project owners and editors on job's + // completion/failure. + JobNotificationEmails job_notification_emails = 8; + } +} + +// Request message for CreateInspectTemplate. +message CreateInspectTemplateRequest { + // The parent resource name, for example projects/my-project-id or + // organizations/my-org-id. + string parent = 1; + + // The InspectTemplate to create. + InspectTemplate inspect_template = 2; + + // The template id can contain uppercase and lowercase letters, + // numbers, and hyphens; that is, it must match the regular + // expression: `[a-zA-Z\\d-_]+`. The maximum length is 100 + // characters. Can be empty to allow the system to generate one. + string template_id = 3; +} + +// Request message for UpdateInspectTemplate. +message UpdateInspectTemplateRequest { + // Resource name of organization and inspectTemplate to be updated, for + // example `organizations/433245324/inspectTemplates/432452342` or + // projects/project-id/inspectTemplates/432452342. + string name = 1; + + // New InspectTemplate value. + InspectTemplate inspect_template = 2; + + // Mask to control which fields get updated. + google.protobuf.FieldMask update_mask = 3; +} + +// Request message for GetInspectTemplate. +message GetInspectTemplateRequest { + // Resource name of the organization and inspectTemplate to be read, for + // example `organizations/433245324/inspectTemplates/432452342` or + // projects/project-id/inspectTemplates/432452342. + string name = 1; +} + +// Request message for ListInspectTemplates. +message ListInspectTemplatesRequest { + // The parent resource name, for example projects/my-project-id or + // organizations/my-org-id. + string parent = 1; + + // Optional page token to continue retrieval. Comes from previous call + // to `ListInspectTemplates`. + string page_token = 2; + + // Optional size of the page, can be limited by server. If zero server returns + // a page of max size 100. + int32 page_size = 3; + + // Optional comma separated list of fields to order by, + // followed by `asc` or `desc` postfix. This list is case-insensitive, + // default sorting order is ascending, redundant space characters are + // insignificant. + // + // Example: `name asc,update_time, create_time desc` + // + // Supported fields are: + // + // - `create_time`: corresponds to time the template was created. + // - `update_time`: corresponds to time the template was last updated. + // - `name`: corresponds to template's name. + // - `display_name`: corresponds to template's display name. + string order_by = 4; +} + +// Response message for ListInspectTemplates. +message ListInspectTemplatesResponse { + // List of inspectTemplates, up to page_size in ListInspectTemplatesRequest. + repeated InspectTemplate inspect_templates = 1; + + // If the next page is available then the next page token to be used + // in following ListInspectTemplates request. + string next_page_token = 2; +} + +// Request message for DeleteInspectTemplate. +message DeleteInspectTemplateRequest { + // Resource name of the organization and inspectTemplate to be deleted, for + // example `organizations/433245324/inspectTemplates/432452342` or + // projects/project-id/inspectTemplates/432452342. + string name = 1; +} + +// Request message for CreateJobTrigger. +message CreateJobTriggerRequest { + // The parent resource name, for example projects/my-project-id. + string parent = 1; + + // The JobTrigger to create. + JobTrigger job_trigger = 2; + + // The trigger id can contain uppercase and lowercase letters, + // numbers, and hyphens; that is, it must match the regular + // expression: `[a-zA-Z\\d-_]+`. The maximum length is 100 + // characters. Can be empty to allow the system to generate one. + string trigger_id = 3; +} + +// Request message for ActivateJobTrigger. +message ActivateJobTriggerRequest { + // Resource name of the trigger to activate, for example + // `projects/dlp-test-project/jobTriggers/53234423`. + string name = 1; +} + +// Request message for UpdateJobTrigger. +message UpdateJobTriggerRequest { + // Resource name of the project and the triggeredJob, for example + // `projects/dlp-test-project/jobTriggers/53234423`. + string name = 1; + + // New JobTrigger value. + JobTrigger job_trigger = 2; + + // Mask to control which fields get updated. + google.protobuf.FieldMask update_mask = 3; +} + +// Request message for GetJobTrigger. +message GetJobTriggerRequest { + // Resource name of the project and the triggeredJob, for example + // `projects/dlp-test-project/jobTriggers/53234423`. + string name = 1; +} + +// Request message for CreateDlpJobRequest. Used to initiate long running +// jobs such as calculating risk metrics or inspecting Google Cloud +// Storage. +message CreateDlpJobRequest { + // The parent resource name, for example projects/my-project-id. + string parent = 1; + + // The configuration details for the specific type of job to run. + oneof job { + InspectJobConfig inspect_job = 2; + + RiskAnalysisJobConfig risk_job = 3; + } + + // The job id can contain uppercase and lowercase letters, + // numbers, and hyphens; that is, it must match the regular + // expression: `[a-zA-Z\\d-_]+`. The maximum length is 100 + // characters. Can be empty to allow the system to generate one. + string job_id = 4; +} + +// Request message for ListJobTriggers. +message ListJobTriggersRequest { + // The parent resource name, for example `projects/my-project-id`. + string parent = 1; + + // Optional page token to continue retrieval. Comes from previous call + // to ListJobTriggers. `order_by` field must not + // change for subsequent calls. + string page_token = 2; + + // Optional size of the page, can be limited by a server. + int32 page_size = 3; + + // Optional comma separated list of triggeredJob fields to order by, + // followed by `asc` or `desc` postfix. This list is case-insensitive, + // default sorting order is ascending, redundant space characters are + // insignificant. + // + // Example: `name asc,update_time, create_time desc` + // + // Supported fields are: + // + // - `create_time`: corresponds to time the JobTrigger was created. + // - `update_time`: corresponds to time the JobTrigger was last updated. + // - `last_run_time`: corresponds to the last time the JobTrigger ran. + // - `name`: corresponds to JobTrigger's name. + // - `display_name`: corresponds to JobTrigger's display name. + // - `status`: corresponds to JobTrigger's status. + string order_by = 4; + + // Optional. Allows filtering. + // + // Supported syntax: + // + // * Filter expressions are made up of one or more restrictions. + // * Restrictions can be combined by `AND` or `OR` logical operators. A + // sequence of restrictions implicitly uses `AND`. + // * A restriction has the form of `<field> <operator> <value>`. + // * Supported fields/values for inspect jobs: + // - `status` - HEALTHY|PAUSED|CANCELLED + // - `inspected_storage` - DATASTORE|CLOUD_STORAGE|BIGQUERY + // - 'last_run_time` - RFC 3339 formatted timestamp, surrounded by + // quotation marks. Nanoseconds are ignored. + // - 'error_count' - Number of errors that have occurred while running. + // * The operator must be `=` or `!=` for status and inspected_storage. + // + // Examples: + // + // * inspected_storage = cloud_storage AND status = HEALTHY + // * inspected_storage = cloud_storage OR inspected_storage = bigquery + // * inspected_storage = cloud_storage AND (state = PAUSED OR state = HEALTHY) + // * last_run_time > \"2017-12-12T00:00:00+00:00\" + // + // The length of this field should be no more than 500 characters. + string filter = 5; +} + +// Response message for ListJobTriggers. +message ListJobTriggersResponse { + // List of triggeredJobs, up to page_size in ListJobTriggersRequest. + repeated JobTrigger job_triggers = 1; + + // If the next page is available then the next page token to be used + // in following ListJobTriggers request. + string next_page_token = 2; +} + +// Request message for DeleteJobTrigger. +message DeleteJobTriggerRequest { + // Resource name of the project and the triggeredJob, for example + // `projects/dlp-test-project/jobTriggers/53234423`. + string name = 1; +} + +message InspectJobConfig { + // The data to scan. + StorageConfig storage_config = 1; + + // How and what to scan for. + InspectConfig inspect_config = 2; + + // If provided, will be used as the default for all values in InspectConfig. + // `inspect_config` will be merged into the values persisted as part of the + // template. + string inspect_template_name = 3; + + // Actions to execute at the completion of the job. Are executed in the order + // provided. + repeated Action actions = 4; +} + +// Combines all of the information about a DLP job. +message DlpJob { + enum JobState { + JOB_STATE_UNSPECIFIED = 0; + + // The job has not yet started. + PENDING = 1; + + // The job is currently running. + RUNNING = 2; + + // The job is no longer running. + DONE = 3; + + // The job was canceled before it could complete. + CANCELED = 4; + + // The job had an error and did not complete. + FAILED = 5; + } + + // The server-assigned name. + string name = 1; + + // The type of job. + DlpJobType type = 2; + + // State of a job. + JobState state = 3; + + oneof details { + // Results from analyzing risk of a data source. + AnalyzeDataSourceRiskDetails risk_details = 4; + + // Results from inspecting a data source. + InspectDataSourceDetails inspect_details = 5; + } + + // Time when the job was created. + google.protobuf.Timestamp create_time = 6; + + // Time when the job started. + google.protobuf.Timestamp start_time = 7; + + // Time when the job finished. + google.protobuf.Timestamp end_time = 8; + + // If created by a job trigger, the resource name of the trigger that + // instantiated the job. + string job_trigger_name = 10; + + // A stream of errors encountered running the job. + repeated Error errors = 11; +} + +// The request message for [DlpJobs.GetDlpJob][]. +message GetDlpJobRequest { + // The name of the DlpJob resource. + string name = 1; +} + +// Operators available for comparing the value of fields. +enum RelationalOperator { + RELATIONAL_OPERATOR_UNSPECIFIED = 0; + + // Equal. Attempts to match even with incompatible types. + EQUAL_TO = 1; + + // Not equal to. Attempts to match even with incompatible types. + NOT_EQUAL_TO = 2; + + // Greater than. + GREATER_THAN = 3; + + // Less than. + LESS_THAN = 4; + + // Greater than or equals. + GREATER_THAN_OR_EQUALS = 5; + + // Less than or equals. + LESS_THAN_OR_EQUALS = 6; + + // Exists + EXISTS = 7; +} + +// The request message for listing DLP jobs. +message ListDlpJobsRequest { + // The parent resource name, for example projects/my-project-id. + string parent = 4; + + // Optional. Allows filtering. + // + // Supported syntax: + // + // * Filter expressions are made up of one or more restrictions. + // * Restrictions can be combined by `AND` or `OR` logical operators. A + // sequence of restrictions implicitly uses `AND`. + // * A restriction has the form of `<field> <operator> <value>`. + // * Supported fields/values for inspect jobs: + // - `state` - PENDING|RUNNING|CANCELED|FINISHED|FAILED + // - `inspected_storage` - DATASTORE|CLOUD_STORAGE|BIGQUERY + // - `trigger_name` - The resource name of the trigger that created job. + // * Supported fields for risk analysis jobs: + // - `state` - RUNNING|CANCELED|FINISHED|FAILED + // * The operator must be `=` or `!=`. + // + // Examples: + // + // * inspected_storage = cloud_storage AND state = done + // * inspected_storage = cloud_storage OR inspected_storage = bigquery + // * inspected_storage = cloud_storage AND (state = done OR state = canceled) + // + // The length of this field should be no more than 500 characters. + string filter = 1; + + // The standard list page size. + int32 page_size = 2; + + // The standard list page token. + string page_token = 3; + + // The type of job. Defaults to `DlpJobType.INSPECT` + DlpJobType type = 5; + + // Optional comma separated list of fields to order by, + // followed by `asc` or `desc` postfix. This list is case-insensitive, + // default sorting order is ascending, redundant space characters are + // insignificant. + // + // Example: `name asc, end_time asc, create_time desc` + // + // Supported fields are: + // + // - `create_time`: corresponds to time the job was created. + // - `end_time`: corresponds to time the job ended. + // - `name`: corresponds to job's name. + // - `state`: corresponds to `state` + string order_by = 6; +} + +// The response message for listing DLP jobs. +message ListDlpJobsResponse { + // A list of DlpJobs that matches the specified filter in the request. + repeated DlpJob jobs = 1; + + // The standard List next-page token. + string next_page_token = 2; +} + +// The request message for canceling a DLP job. +message CancelDlpJobRequest { + // The name of the DlpJob resource to be cancelled. + string name = 1; +} + +// The request message for deleting a DLP job. +message DeleteDlpJobRequest { + // The name of the DlpJob resource to be deleted. + string name = 1; +} + +// Request message for CreateDeidentifyTemplate. +message CreateDeidentifyTemplateRequest { + // The parent resource name, for example projects/my-project-id or + // organizations/my-org-id. + string parent = 1; + + // The DeidentifyTemplate to create. + DeidentifyTemplate deidentify_template = 2; + + // The template id can contain uppercase and lowercase letters, + // numbers, and hyphens; that is, it must match the regular + // expression: `[a-zA-Z\\d-_]+`. The maximum length is 100 + // characters. Can be empty to allow the system to generate one. + string template_id = 3; +} + +// Request message for UpdateDeidentifyTemplate. +message UpdateDeidentifyTemplateRequest { + // Resource name of organization and deidentify template to be updated, for + // example `organizations/433245324/deidentifyTemplates/432452342` or + // projects/project-id/deidentifyTemplates/432452342. + string name = 1; + + // New DeidentifyTemplate value. + DeidentifyTemplate deidentify_template = 2; + + // Mask to control which fields get updated. + google.protobuf.FieldMask update_mask = 3; +} + +// Request message for GetDeidentifyTemplate. +message GetDeidentifyTemplateRequest { + // Resource name of the organization and deidentify template to be read, for + // example `organizations/433245324/deidentifyTemplates/432452342` or + // projects/project-id/deidentifyTemplates/432452342. + string name = 1; +} + +// Request message for ListDeidentifyTemplates. +message ListDeidentifyTemplatesRequest { + // The parent resource name, for example projects/my-project-id or + // organizations/my-org-id. + string parent = 1; + + // Optional page token to continue retrieval. Comes from previous call + // to `ListDeidentifyTemplates`. + string page_token = 2; + + // Optional size of the page, can be limited by server. If zero server returns + // a page of max size 100. + int32 page_size = 3; + + // Optional comma separated list of fields to order by, + // followed by `asc` or `desc` postfix. This list is case-insensitive, + // default sorting order is ascending, redundant space characters are + // insignificant. + // + // Example: `name asc,update_time, create_time desc` + // + // Supported fields are: + // + // - `create_time`: corresponds to time the template was created. + // - `update_time`: corresponds to time the template was last updated. + // - `name`: corresponds to template's name. + // - `display_name`: corresponds to template's display name. + string order_by = 4; +} + +// Response message for ListDeidentifyTemplates. +message ListDeidentifyTemplatesResponse { + // List of deidentify templates, up to page_size in + // ListDeidentifyTemplatesRequest. + repeated DeidentifyTemplate deidentify_templates = 1; + + // If the next page is available then the next page token to be used + // in following ListDeidentifyTemplates request. + string next_page_token = 2; +} + +// Request message for DeleteDeidentifyTemplate. +message DeleteDeidentifyTemplateRequest { + // Resource name of the organization and deidentify template to be deleted, + // for example `organizations/433245324/deidentifyTemplates/432452342` or + // projects/project-id/deidentifyTemplates/432452342. + string name = 1; +} + +// Configuration for a custom dictionary created from a data source of any size +// up to the maximum size defined in the +// [limits](https://cloud.google.com/dlp/limits) page. The artifacts of +// dictionary creation are stored in the specified Google Cloud Storage +// location. Consider using `CustomInfoType.Dictionary` for smaller dictionaries +// that satisfy the size requirements. +message LargeCustomDictionaryConfig { + // Location to store dictionary artifacts in Google Cloud Storage. These files + // will only be accessible by project owners and the DLP API. If any of these + // artifacts are modified, the dictionary is considered invalid and can no + // longer be used. + CloudStoragePath output_path = 1; + + oneof source { + // Set of files containing newline-delimited lists of dictionary phrases. + CloudStorageFileSet cloud_storage_file_set = 2; + + // Field in a BigQuery table where each cell represents a dictionary phrase. + BigQueryField big_query_field = 3; + } +} + +// Configuration for a StoredInfoType. +message StoredInfoTypeConfig { + // Display name of the StoredInfoType (max 256 characters). + string display_name = 1; + + // Description of the StoredInfoType (max 256 characters). + string description = 2; + + oneof type { + // StoredInfoType where findings are defined by a dictionary of phrases. + LargeCustomDictionaryConfig large_custom_dictionary = 3; + } +} + +// Version of a StoredInfoType, including the configuration used to build it, +// create timestamp, and current state. +message StoredInfoTypeVersion { + // StoredInfoType configuration. + StoredInfoTypeConfig config = 1; + + // Create timestamp of the version. Read-only, determined by the system + // when the version is created. + google.protobuf.Timestamp create_time = 2; + + // Stored info type version state. Read-only, updated by the system + // during dictionary creation. + StoredInfoTypeState state = 3; + + // Errors that occurred when creating this storedInfoType version, or + // anomalies detected in the storedInfoType data that render it unusable. Only + // the five most recent errors will be displayed, with the most recent error + // appearing first. + // <p>For example, some of the data for stored custom dictionaries is put in + // the user's Google Cloud Storage bucket, and if this data is modified or + // deleted by the user or another system, the dictionary becomes invalid. + // <p>If any errors occur, fix the problem indicated by the error message and + // use the UpdateStoredInfoType API method to create another version of the + // storedInfoType to continue using it, reusing the same `config` if it was + // not the source of the error. + repeated Error errors = 4; +} + +// StoredInfoType resource message that contains information about the current +// version and any pending updates. +message StoredInfoType { + // Resource name. + string name = 1; + + // Current version of the stored info type. + StoredInfoTypeVersion current_version = 2; + + // Pending versions of the stored info type. Empty if no versions are + // pending. + repeated StoredInfoTypeVersion pending_versions = 3; +} + +// Request message for CreateStoredInfoType. +message CreateStoredInfoTypeRequest { + // The parent resource name, for example projects/my-project-id or + // organizations/my-org-id. + string parent = 1; + + // Configuration of the storedInfoType to create. + StoredInfoTypeConfig config = 2; + + // The storedInfoType ID can contain uppercase and lowercase letters, + // numbers, and hyphens; that is, it must match the regular + // expression: `[a-zA-Z\\d-_]+`. The maximum length is 100 + // characters. Can be empty to allow the system to generate one. + string stored_info_type_id = 3; +} + +// Request message for UpdateStoredInfoType. +message UpdateStoredInfoTypeRequest { + // Resource name of organization and storedInfoType to be updated, for + // example `organizations/433245324/storedInfoTypes/432452342` or + // projects/project-id/storedInfoTypes/432452342. + string name = 1; + + // Updated configuration for the storedInfoType. If not provided, a new + // version of the storedInfoType will be created with the existing + // configuration. + StoredInfoTypeConfig config = 2; + + // Mask to control which fields get updated. + google.protobuf.FieldMask update_mask = 3; +} + +// Request message for GetStoredInfoType. +message GetStoredInfoTypeRequest { + // Resource name of the organization and storedInfoType to be read, for + // example `organizations/433245324/storedInfoTypes/432452342` or + // projects/project-id/storedInfoTypes/432452342. + string name = 1; +} + +// Request message for ListStoredInfoTypes. +message ListStoredInfoTypesRequest { + // The parent resource name, for example projects/my-project-id or + // organizations/my-org-id. + string parent = 1; + + // Optional page token to continue retrieval. Comes from previous call + // to `ListStoredInfoTypes`. + string page_token = 2; + + // Optional size of the page, can be limited by server. If zero server returns + // a page of max size 100. + int32 page_size = 3; + + // Optional comma separated list of fields to order by, + // followed by `asc` or `desc` postfix. This list is case-insensitive, + // default sorting order is ascending, redundant space characters are + // insignificant. + // + // Example: `name asc, display_name, create_time desc` + // + // Supported fields are: + // + // - `create_time`: corresponds to time the most recent version of the + // resource was created. + // - `state`: corresponds to the state of the resource. + // - `name`: corresponds to resource name. + // - `display_name`: corresponds to info type's display name. + string order_by = 4; +} + +// Response message for ListStoredInfoTypes. +message ListStoredInfoTypesResponse { + // List of storedInfoTypes, up to page_size in ListStoredInfoTypesRequest. + repeated StoredInfoType stored_info_types = 1; + + // If the next page is available then the next page token to be used + // in following ListStoredInfoTypes request. + string next_page_token = 2; +} + +// Request message for DeleteStoredInfoType. +message DeleteStoredInfoTypeRequest { + // Resource name of the organization and storedInfoType to be deleted, for + // example `organizations/433245324/storedInfoTypes/432452342` or + // projects/project-id/storedInfoTypes/432452342. + string name = 1; +} + +// An enum to represent the various type of DLP jobs. +enum DlpJobType { + DLP_JOB_TYPE_UNSPECIFIED = 0; + + // The job inspected Google Cloud for sensitive data. + INSPECT_JOB = 1; + + // The job executed a Risk Analysis computation. + RISK_ANALYSIS_JOB = 2; +} + +// State of a StoredInfoType version. +enum StoredInfoTypeState { + STORED_INFO_TYPE_STATE_UNSPECIFIED = 0; + + // StoredInfoType version is being created. + PENDING = 1; + + // StoredInfoType version is ready for use. + READY = 2; + + // StoredInfoType creation failed. All relevant error messages are returned in + // the `StoredInfoTypeVersion` message. + FAILED = 3; + + // StoredInfoType is no longer valid because artifacts stored in + // user-controlled storage were modified. To fix an invalid StoredInfoType, + // use the `UpdateStoredInfoType` method to create a new version. + INVALID = 4; +} diff --git a/google/privacy/dlp/v2/dlp_gapic.yaml b/google/privacy/dlp/v2/dlp_gapic.yaml new file mode 100644 index 000000000..12b514849 --- /dev/null +++ b/google/privacy/dlp/v2/dlp_gapic.yaml @@ -0,0 +1,542 @@ +type: com.google.api.codegen.ConfigProto +config_schema_version: 1.0.0 +language_settings: + java: + package_name: com.google.cloud.dlp.v2 + python: + package_name: google.cloud.dlp_v2.gapic + go: + package_name: cloud.google.com/go/dlp/apiv2 + release_level: GA + csharp: + package_name: Google.Cloud.Dlp.V2 + ruby: + package_name: Google::Cloud::Dlp::V2 + php: + package_name: Google\Cloud\Dlp\V2 + nodejs: + package_name: dlp.v2 + domain_layer_location: google-cloud +collection_oneofs: +- oneof_name: deidentify_template_oneof + collection_names: + - organization_deidentify_template + - project_deidentify_template +- oneof_name: inspect_template_oneof + collection_names: + - organization_inspect_template + - project_inspect_template +- oneof_name: stored_info_type_oneof + collection_names: + - organization_stored_info_type + - project_stored_info_type +# A list of API interface configurations. +interfaces: +- name: google.privacy.dlp.v2.DlpService + # A list of resource collection configurations. + # Consists of a name_pattern and an entity_name. + # The name_pattern is a pattern to describe the names of the resources of this + # collection, using the platform's conventions for URI patterns. A generator + # may use this to generate methods to compose and decompose such names. The + # pattern should use named placeholders as in `shelves/{shelf}/books/{book}`; + # those will be taken as hints for the parameter names of the generated + # methods. If empty, no name methods are generated. + # The entity_name is the name to be used as a basis for generated methods and + # classes. + collections: + - name_pattern: organizations/{organization} + entity_name: organization + language_overrides: + - language: csharp + common_resource_name: Google.Api.Gax.ResourceNames.OrganizationName + - name_pattern: organizations/{organization}/deidentifyTemplates/{deidentify_template} + entity_name: organization_deidentify_template + - name_pattern: projects/{project}/deidentifyTemplates/{deidentify_template} + entity_name: project_deidentify_template + - name_pattern: organizations/{organization}/inspectTemplates/{inspect_template} + entity_name: organization_inspect_template + - name_pattern: projects/{project}/inspectTemplates/{inspect_template} + entity_name: project_inspect_template + - name_pattern: projects/{project}/jobTriggers/{job_trigger} + entity_name: project_job_trigger + - name_pattern: projects/{project} + entity_name: project + language_overrides: + - language: csharp + common_resource_name: Google.Api.Gax.ResourceNames.ProjectName + - name_pattern: projects/{project}/dlpJobs/{dlp_job} + entity_name: dlp_job + - name_pattern: organizations/{organization}/storedInfoTypes/{stored_info_type} + entity_name: organization_stored_info_type + - name_pattern: projects/{project}/storedInfoTypes/{stored_info_type} + entity_name: project_stored_info_type + # Definition for retryable codes. + retry_codes_def: + - name: idempotent + retry_codes: + - UNAVAILABLE + - DEADLINE_EXCEEDED + - name: non_idempotent + retry_codes: [] + # Definition for retry/backoff parameters. + retry_params_def: + - name: default + initial_retry_delay_millis: 100 + retry_delay_multiplier: 1.3 + max_retry_delay_millis: 60000 + initial_rpc_timeout_millis: 20000 + rpc_timeout_multiplier: 1 + max_rpc_timeout_millis: 20000 + total_timeout_millis: 600000 + # A list of method configurations. + # Common properties: + # + # name - The simple name of the method. + # + # flattening - Specifies the configuration for parameter flattening. + # Describes the parameter groups for which a generator should produce method + # overloads which allow a client to directly pass request message fields as + # method parameters. This information may or may not be used, depending on + # the target language. + # Consists of groups, which each represent a list of parameters to be + # flattened. Each parameter listed must be a field of the request message. + # + # required_fields - Fields that are always required for a request to be + # valid. + # + # resource_name_treatment - An enum that specifies how to treat the resource + # name formats defined in the field_name_patterns and + # response_field_name_patterns fields. + # UNSET: default value + # NONE: the collection configs will not be used by the generated code. + # VALIDATE: string fields will be validated by the client against the + # specified resource name formats. + # STATIC_TYPES: the client will use generated types for resource names. + # + # page_streaming - Specifies the configuration for paging. + # Describes information for generating a method which transforms a paging + # list RPC into a stream of resources. + # Consists of a request and a response. + # The request specifies request information of the list method. It defines + # which fields match the paging pattern in the request. The request consists + # of a page_size_field and a token_field. The page_size_field is the name of + # the optional field specifying the maximum number of elements to be + # returned in the response. The token_field is the name of the field in the + # request containing the page token. + # The response specifies response information of the list method. It defines + # which fields match the paging pattern in the response. The response + # consists of a token_field and a resources_field. The token_field is the + # name of the field in the response containing the next page token. The + # resources_field is the name of the field in the response containing the + # list of resources belonging to the page. + # + # retry_codes_name - Specifies the configuration for retryable codes. The + # name must be defined in interfaces.retry_codes_def. + # + # retry_params_name - Specifies the configuration for retry/backoff + # parameters. The name must be defined in interfaces.retry_params_def. + # + # field_name_patterns - Maps the field name of the request type to + # entity_name of interfaces.collections. + # Specifies the string pattern that the field must follow. + # + # timeout_millis - Specifies the default timeout for a non-retrying call. If + # the call is retrying, refer to retry_params_name instead. + methods: + - name: InspectContent + required_fields: + - parent + resource_name_treatment: STATIC_TYPES + retry_codes_name: idempotent + retry_params_name: default + field_name_patterns: + parent: project + timeout_millis: 300000 + - name: RedactImage + required_fields: + - parent + resource_name_treatment: STATIC_TYPES + retry_codes_name: idempotent + retry_params_name: default + field_name_patterns: + parent: project + timeout_millis: 300000 + - name: DeidentifyContent + required_fields: + - parent + resource_name_treatment: STATIC_TYPES + retry_codes_name: idempotent + retry_params_name: default + field_name_patterns: + parent: project + timeout_millis: 300000 + - name: ReidentifyContent + required_fields: + - parent + resource_name_treatment: STATIC_TYPES + retry_codes_name: idempotent + retry_params_name: default + field_name_patterns: + parent: project + timeout_millis: 300000 + - name: ListInfoTypes + retry_codes_name: idempotent + retry_params_name: default + timeout_millis: 300000 + - name: CreateInspectTemplate + required_fields: + - parent + resource_name_treatment: STATIC_TYPES + retry_codes_name: non_idempotent + retry_params_name: default + field_name_patterns: + parent: organization + timeout_millis: 300000 + - name: UpdateInspectTemplate + required_fields: + - name + resource_name_treatment: STATIC_TYPES + retry_codes_name: non_idempotent + retry_params_name: default + field_name_patterns: + name: inspect_template_oneof + timeout_millis: 300000 + - name: GetInspectTemplate + resource_name_treatment: STATIC_TYPES + retry_codes_name: idempotent + retry_params_name: default + field_name_patterns: + name: inspect_template_oneof + timeout_millis: 300000 + - name: ListInspectTemplates + required_fields: + - parent + resource_name_treatment: STATIC_TYPES + page_streaming: + request: + page_size_field: page_size + token_field: page_token + response: + token_field: next_page_token + resources_field: inspect_templates + retry_codes_name: idempotent + retry_params_name: default + field_name_patterns: + parent: organization + timeout_millis: 300000 + - name: DeleteInspectTemplate + required_fields: + - name + resource_name_treatment: STATIC_TYPES + retry_codes_name: idempotent + retry_params_name: default + field_name_patterns: + name: inspect_template_oneof + timeout_millis: 300000 + - name: CreateDeidentifyTemplate + required_fields: + - parent + resource_name_treatment: STATIC_TYPES + retry_codes_name: non_idempotent + retry_params_name: default + field_name_patterns: + parent: organization + timeout_millis: 300000 + - name: UpdateDeidentifyTemplate + required_fields: + - name + resource_name_treatment: STATIC_TYPES + retry_codes_name: non_idempotent + retry_params_name: default + field_name_patterns: + name: deidentify_template_oneof + timeout_millis: 300000 + - name: GetDeidentifyTemplate + required_fields: + - name + resource_name_treatment: STATIC_TYPES + retry_codes_name: idempotent + retry_params_name: default + field_name_patterns: + name: deidentify_template_oneof + timeout_millis: 300000 + - name: ListDeidentifyTemplates + required_fields: + - parent + resource_name_treatment: STATIC_TYPES + page_streaming: + request: + page_size_field: page_size + token_field: page_token + response: + token_field: next_page_token + resources_field: deidentify_templates + retry_codes_name: idempotent + retry_params_name: default + field_name_patterns: + parent: organization + timeout_millis: 300000 + - name: DeleteDeidentifyTemplate + required_fields: + - name + resource_name_treatment: STATIC_TYPES + retry_codes_name: idempotent + retry_params_name: default + field_name_patterns: + name: deidentify_template_oneof + timeout_millis: 300000 + - name: CreateDlpJob + required_fields: + - parent + resource_name_treatment: STATIC_TYPES + retry_codes_name: non_idempotent + retry_params_name: default + field_name_patterns: + parent: project + timeout_millis: 300000 + - name: ListDlpJobs + required_fields: + - parent + resource_name_treatment: STATIC_TYPES + page_streaming: + request: + page_size_field: page_size + token_field: page_token + response: + token_field: next_page_token + resources_field: jobs + retry_codes_name: idempotent + retry_params_name: default + field_name_patterns: + parent: project + timeout_millis: 300000 + - name: GetDlpJob + required_fields: + - name + resource_name_treatment: STATIC_TYPES + retry_codes_name: idempotent + retry_params_name: default + field_name_patterns: + name: dlp_job + timeout_millis: 300000 + - name: DeleteDlpJob + required_fields: + - name + resource_name_treatment: STATIC_TYPES + retry_codes_name: idempotent + retry_params_name: default + field_name_patterns: + name: dlp_job + timeout_millis: 300000 + - name: CancelDlpJob + required_fields: + - name + resource_name_treatment: STATIC_TYPES + retry_codes_name: non_idempotent + retry_params_name: default + field_name_patterns: + name: dlp_job + timeout_millis: 300000 + - name: ListJobTriggers + required_fields: + - parent + resource_name_treatment: STATIC_TYPES + page_streaming: + request: + page_size_field: page_size + token_field: page_token + response: + token_field: next_page_token + resources_field: job_triggers + retry_codes_name: idempotent + retry_params_name: default + field_name_patterns: + parent: project + timeout_millis: 300000 + - name: GetJobTrigger + required_fields: + - name + resource_name_treatment: STATIC_TYPES + retry_codes_name: idempotent + retry_params_name: default + field_name_patterns: + name: project_job_trigger + timeout_millis: 300000 + - name: DeleteJobTrigger + required_fields: + - name + resource_name_treatment: STATIC_TYPES + retry_codes_name: idempotent + retry_params_name: default + timeout_millis: 300000 + - name: ActivateJobTrigger + surface_treatments: + - include_languages: + - go + - java + - csharp + - ruby + - nodejs + - python + - php + visibility: DISABLED + - name: UpdateJobTrigger + required_fields: + - name + resource_name_treatment: STATIC_TYPES + retry_codes_name: non_idempotent + retry_params_name: default + field_name_patterns: + name: project_job_trigger + timeout_millis: 300000 + - name: CreateJobTrigger + required_fields: + - parent + resource_name_treatment: STATIC_TYPES + retry_codes_name: non_idempotent + retry_params_name: default + field_name_patterns: + parent: project + timeout_millis: 300000 + + - name: CreateStoredInfoType + required_fields: + - parent + resource_name_treatment: STATIC_TYPES + retry_codes_name: non_idempotent + retry_params_name: default + field_name_patterns: + parent: organization + timeout_millis: 300000 + - name: UpdateStoredInfoType + required_fields: + - name + resource_name_treatment: STATIC_TYPES + retry_codes_name: non_idempotent + retry_params_name: default + field_name_patterns: + name: stored_info_type_oneof + timeout_millis: 300000 + - name: GetStoredInfoType + required_fields: + - name + resource_name_treatment: STATIC_TYPES + retry_codes_name: idempotent + retry_params_name: default + field_name_patterns: + name: stored_info_type_oneof + timeout_millis: 300000 + - name: ListStoredInfoTypes + required_fields: + - parent + resource_name_treatment: STATIC_TYPES + page_streaming: + request: + page_size_field: page_size + token_field: page_token + response: + token_field: next_page_token + resources_field: stored_info_types + retry_codes_name: idempotent + retry_params_name: default + field_name_patterns: + parent: organization + timeout_millis: 300000 + - name: DeleteStoredInfoType + required_fields: + - name + resource_name_treatment: STATIC_TYPES + retry_codes_name: idempotent + retry_params_name: default + field_name_patterns: + name: stored_info_type_oneof + timeout_millis: 300000 + +resource_name_generation: +- message_name: InspectContentRequest + field_entity_map: + parent: project +- message_name: RedactImageRequest + field_entity_map: + parent: project +- message_name: DeidentifyContentRequest + field_entity_map: + parent: project +- message_name: ReidentifyContentRequest + field_entity_map: + parent: project +- message_name: CreateInspectTemplateRequest + field_entity_map: + parent: organization +- message_name: UpdateInspectTemplateRequest + field_entity_map: + name: inspect_template_oneof +- message_name: GetInspectTemplateRequest + field_entity_map: + name: inspect_template_oneof +- message_name: ListInspectTemplatesRequest + field_entity_map: + parent: organization +- message_name: DeleteInspectTemplateRequest + field_entity_map: + name: inspect_template_oneof +- message_name: CreateDeidentifyTemplateRequest + field_entity_map: + parent: organization +- message_name: UpdateDeidentifyTemplateRequest + field_entity_map: + name: deidentify_template_oneof +- message_name: GetDeidentifyTemplateRequest + field_entity_map: + name: deidentify_template_oneof +- message_name: ListDeidentifyTemplatesRequest + field_entity_map: + parent: organization +- message_name: DeleteDeidentifyTemplateRequest + field_entity_map: + name: deidentify_template_oneof +- message_name: CreateJobTriggerRequest + field_entity_map: + parent: project +- message_name: UpdateJobTriggerRequest + field_entity_map: + name: project_job_trigger +- message_name: GetJobTriggerRequest + field_entity_map: + name: project_job_trigger +- message_name: ListJobTriggersRequest + field_entity_map: + parent: project +- message_name: DeleteJobTriggerRequest + field_entity_map: + name: project_job_trigger +- message_name: CreateDlpJobRequest + field_entity_map: + parent: project +- message_name: ListDlpJobsRequest + field_entity_map: + parent: project +- message_name: GetDlpJobRequest + field_entity_map: + name: dlp_job +- message_name: DeleteDlpJobRequest + field_entity_map: + name: dlp_job +- message_name: CancelDlpJobRequest + field_entity_map: + name: dlp_job +- message_name: CreateStoredInfoTypeRequest + field_entity_map: + parent: organization +- message_name: UpdateStoredInfoTypeRequest + field_entity_map: + name: stored_info_type_oneof +- message_name: GetStoredInfoTypeRequest + field_entity_map: + name: stored_info_type_oneof +- message_name: ListStoredInfoTypesRequest + field_entity_map: + parent: organization +- message_name: DeleteStoredInfoTypeRequest + field_entity_map: + name: stored_info_type_oneof diff --git a/google/privacy/dlp/v2/storage.proto b/google/privacy/dlp/v2/storage.proto new file mode 100644 index 000000000..12b428407 --- /dev/null +++ b/google/privacy/dlp/v2/storage.proto @@ -0,0 +1,639 @@ +// Copyright 2019 Google LLC. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +syntax = "proto3"; + +package google.privacy.dlp.v2; + +import "google/api/annotations.proto"; +import "google/api/resource.proto"; +import "google/protobuf/timestamp.proto"; + +option csharp_namespace = "Google.Cloud.Dlp.V2"; +option go_package = "google.golang.org/genproto/googleapis/privacy/dlp/v2;dlp"; +option java_multiple_files = true; +option java_outer_classname = "DlpStorage"; +option java_package = "com.google.privacy.dlp.v2"; +option php_namespace = "Google\\Cloud\\Dlp\\V2"; + +// Type of information detected by the API. +message InfoType { + // Name of the information type. Either a name of your choosing when + // creating a CustomInfoType, or one of the names listed + // at https://cloud.google.com/dlp/docs/infotypes-reference when specifying + // a built-in type. InfoType names should conform to the pattern + // [a-zA-Z0-9_]{1,64}. + string name = 1; +} + +// A reference to a StoredInfoType to use with scanning. +message StoredType { + // Resource name of the requested `StoredInfoType`, for example + // `organizations/433245324/storedInfoTypes/432452342` or + // `projects/project-id/storedInfoTypes/432452342`. + string name = 1; + + // Timestamp indicating when the version of the `StoredInfoType` used for + // inspection was created. Output-only field, populated by the system. + google.protobuf.Timestamp create_time = 2; +} + +// Categorization of results based on how likely they are to represent a match, +// based on the number of elements they contain which imply a match. +enum Likelihood { + // Default value; same as POSSIBLE. + LIKELIHOOD_UNSPECIFIED = 0; + + // Few matching elements. + VERY_UNLIKELY = 1; + + UNLIKELY = 2; + + // Some matching elements. + POSSIBLE = 3; + + LIKELY = 4; + + // Many matching elements. + VERY_LIKELY = 5; +} + +// Custom information type provided by the user. Used to find domain-specific +// sensitive information configurable to the data in question. +message CustomInfoType { + // Custom information type based on a dictionary of words or phrases. This can + // be used to match sensitive information specific to the data, such as a list + // of employee IDs or job titles. + // + // Dictionary words are case-insensitive and all characters other than letters + // and digits in the unicode [Basic Multilingual + // Plane](https://en.wikipedia.org/wiki/Plane_%28Unicode%29#Basic_Multilingual_Plane) + // will be replaced with whitespace when scanning for matches, so the + // dictionary phrase "Sam Johnson" will match all three phrases "sam johnson", + // "Sam, Johnson", and "Sam (Johnson)". Additionally, the characters + // surrounding any match must be of a different type than the adjacent + // characters within the word, so letters must be next to non-letters and + // digits next to non-digits. For example, the dictionary word "jen" will + // match the first three letters of the text "jen123" but will return no + // matches for "jennifer". + // + // Dictionary words containing a large number of characters that are not + // letters or digits may result in unexpected findings because such characters + // are treated as whitespace. The + // [limits](https://cloud.google.com/dlp/limits) page contains details about + // the size limits of dictionaries. For dictionaries that do not fit within + // these constraints, consider using `LargeCustomDictionaryConfig` in the + // `StoredInfoType` API. + message Dictionary { + // Message defining a list of words or phrases to search for in the data. + message WordList { + // Words or phrases defining the dictionary. The dictionary must contain + // at least one phrase and every phrase must contain at least 2 characters + // that are letters or digits. [required] + repeated string words = 1; + } + + oneof source { + // List of words or phrases to search for. + WordList word_list = 1; + + // Newline-delimited file of words in Cloud Storage. Only a single file + // is accepted. + CloudStoragePath cloud_storage_path = 3; + } + } + + // Message defining a custom regular expression. + message Regex { + // Pattern defining the regular expression. Its syntax + // (https://github.com/google/re2/wiki/Syntax) can be found under the + // google/re2 repository on GitHub. + string pattern = 1; + + // The index of the submatch to extract as findings. When not + // specified, the entire match is returned. No more than 3 may be included. + repeated int32 group_indexes = 2; + } + + // Message for detecting output from deidentification transformations + // such as + // [`CryptoReplaceFfxFpeConfig`](/dlp/docs/reference/rest/v2/organizations.deidentifyTemplates#cryptoreplaceffxfpeconfig). + // These types of transformations are + // those that perform pseudonymization, thereby producing a "surrogate" as + // output. This should be used in conjunction with a field on the + // transformation such as `surrogate_info_type`. This CustomInfoType does + // not support the use of `detection_rules`. + message SurrogateType { + + } + + // Rule for modifying a CustomInfoType to alter behavior under certain + // circumstances, depending on the specific details of the rule. Not supported + // for the `surrogate_type` custom info type. + message DetectionRule { + // Message for specifying a window around a finding to apply a detection + // rule. + message Proximity { + // Number of characters before the finding to consider. + int32 window_before = 1; + + // Number of characters after the finding to consider. + int32 window_after = 2; + } + + // Message for specifying an adjustment to the likelihood of a finding as + // part of a detection rule. + message LikelihoodAdjustment { + oneof adjustment { + // Set the likelihood of a finding to a fixed value. + Likelihood fixed_likelihood = 1; + + // Increase or decrease the likelihood by the specified number of + // levels. For example, if a finding would be `POSSIBLE` without the + // detection rule and `relative_likelihood` is 1, then it is upgraded to + // `LIKELY`, while a value of -1 would downgrade it to `UNLIKELY`. + // Likelihood may never drop below `VERY_UNLIKELY` or exceed + // `VERY_LIKELY`, so applying an adjustment of 1 followed by an + // adjustment of -1 when base likelihood is `VERY_LIKELY` will result in + // a final likelihood of `LIKELY`. + int32 relative_likelihood = 2; + } + } + + // The rule that adjusts the likelihood of findings within a certain + // proximity of hotwords. + message HotwordRule { + // Regular expression pattern defining what qualifies as a hotword. + Regex hotword_regex = 1; + + // Proximity of the finding within which the entire hotword must reside. + // The total length of the window cannot exceed 1000 characters. Note that + // the finding itself will be included in the window, so that hotwords may + // be used to match substrings of the finding itself. For example, the + // certainty of a phone number regex "\(\d{3}\) \d{3}-\d{4}" could be + // adjusted upwards if the area code is known to be the local area code of + // a company office using the hotword regex "\(xxx\)", where "xxx" + // is the area code in question. + Proximity proximity = 2; + + // Likelihood adjustment to apply to all matching findings. + LikelihoodAdjustment likelihood_adjustment = 3; + } + + oneof type { + // Hotword-based detection rule. + HotwordRule hotword_rule = 1; + } + } + + enum ExclusionType { + // A finding of this custom info type will not be excluded from results. + EXCLUSION_TYPE_UNSPECIFIED = 0; + + // A finding of this custom info type will be excluded from final results, + // but can still affect rule execution. + EXCLUSION_TYPE_EXCLUDE = 1; + } + + // CustomInfoType can either be a new infoType, or an extension of built-in + // infoType, when the name matches one of existing infoTypes and that infoType + // is specified in `InspectContent.info_types` field. Specifying the latter + // adds findings to the one detected by the system. If built-in info type is + // not specified in `InspectContent.info_types` list then the name is treated + // as a custom info type. + InfoType info_type = 1; + + // Likelihood to return for this CustomInfoType. This base value can be + // altered by a detection rule if the finding meets the criteria specified by + // the rule. Defaults to `VERY_LIKELY` if not specified. + Likelihood likelihood = 6; + + oneof type { + // A list of phrases to detect as a CustomInfoType. + Dictionary dictionary = 2; + + // Regular expression based CustomInfoType. + Regex regex = 3; + + // Message for detecting output from deidentification transformations that + // support reversing. + SurrogateType surrogate_type = 4; + + // Load an existing `StoredInfoType` resource for use in + // `InspectDataSource`. Not currently supported in `InspectContent`. + StoredType stored_type = 5; + } + + // Set of detection rules to apply to all findings of this CustomInfoType. + // Rules are applied in order that they are specified. Not supported for the + // `surrogate_type` CustomInfoType. + repeated DetectionRule detection_rules = 7; + + // If set to EXCLUSION_TYPE_EXCLUDE this infoType will not cause a finding + // to be returned. It still can be used for rules matching. + ExclusionType exclusion_type = 8; +} + +// General identifier of a data field in a storage service. +message FieldId { + // Name describing the field. + string name = 1; +} + +// Datastore partition ID. +// A partition ID identifies a grouping of entities. The grouping is always +// by project and namespace, however the namespace ID may be empty. +// +// A partition ID contains several dimensions: +// project ID and namespace ID. +message PartitionId { + // The ID of the project to which the entities belong. + string project_id = 2; + + // If not empty, the ID of the namespace to which the entities belong. + string namespace_id = 4; +} + +// A representation of a Datastore kind. +message KindExpression { + // The name of the kind. + string name = 1; +} + +// Options defining a data set within Google Cloud Datastore. +message DatastoreOptions { + // A partition ID identifies a grouping of entities. The grouping is always + // by project and namespace, however the namespace ID may be empty. + PartitionId partition_id = 1; + + // The kind to process. + KindExpression kind = 2; +} + +// Message representing a set of files in a Cloud Storage bucket. Regular +// expressions are used to allow fine-grained control over which files in the +// bucket to include. +// +// Included files are those that match at least one item in `include_regex` and +// do not match any items in `exclude_regex`. Note that a file that matches +// items from both lists will _not_ be included. For a match to occur, the +// entire file path (i.e., everything in the url after the bucket name) must +// match the regular expression. +// +// For example, given the input `{bucket_name: "mybucket", include_regex: +// ["directory1/.*"], exclude_regex: +// ["directory1/excluded.*"]}`: +// +// * `gs://mybucket/directory1/myfile` will be included +// * `gs://mybucket/directory1/directory2/myfile` will be included (`.*` matches +// across `/`) +// * `gs://mybucket/directory0/directory1/myfile` will _not_ be included (the +// full path doesn't match any items in `include_regex`) +// * `gs://mybucket/directory1/excludedfile` will _not_ be included (the path +// matches an item in `exclude_regex`) +// +// If `include_regex` is left empty, it will match all files by default +// (this is equivalent to setting `include_regex: [".*"]`). +// +// Some other common use cases: +// +// * `{bucket_name: "mybucket", exclude_regex: [".*\.pdf"]}` will include all +// files in `mybucket` except for .pdf files +// * `{bucket_name: "mybucket", include_regex: ["directory/[^/]+"]}` will +// include all files directly under `gs://mybucket/directory/`, without matching +// across `/` +message CloudStorageRegexFileSet { + // The name of a Cloud Storage bucket. Required. + string bucket_name = 1; + + // A list of regular expressions matching file paths to include. All files in + // the bucket that match at least one of these regular expressions will be + // included in the set of files, except for those that also match an item in + // `exclude_regex`. Leaving this field empty will match all files by default + // (this is equivalent to including `.*` in the list). + // + // Regular expressions use RE2 + // [syntax](https://github.com/google/re2/wiki/Syntax); a guide can be found + // under the google/re2 repository on GitHub. + repeated string include_regex = 2; + + // A list of regular expressions matching file paths to exclude. All files in + // the bucket that match at least one of these regular expressions will be + // excluded from the scan. + // + // Regular expressions use RE2 + // [syntax](https://github.com/google/re2/wiki/Syntax); a guide can be found + // under the google/re2 repository on GitHub. + repeated string exclude_regex = 3; +} + +// Options defining a file or a set of files within a Google Cloud Storage +// bucket. +message CloudStorageOptions { + // Set of files to scan. + message FileSet { + // The Cloud Storage url of the file(s) to scan, in the format + // `gs://<bucket>/<path>`. Trailing wildcard in the path is allowed. + // + // If the url ends in a trailing slash, the bucket or directory represented + // by the url will be scanned non-recursively (content in sub-directories + // will not be scanned). This means that `gs://mybucket/` is equivalent to + // `gs://mybucket/*`, and `gs://mybucket/directory/` is equivalent to + // `gs://mybucket/directory/*`. + // + // Exactly one of `url` or `regex_file_set` must be set. + string url = 1; + + // The regex-filtered set of files to scan. Exactly one of `url` or + // `regex_file_set` must be set. + CloudStorageRegexFileSet regex_file_set = 2; + } + + // How to sample bytes if not all bytes are scanned. Meaningful only when used + // in conjunction with bytes_limit_per_file. If not specified, scanning would + // start from the top. + enum SampleMethod { + SAMPLE_METHOD_UNSPECIFIED = 0; + + // Scan from the top (default). + TOP = 1; + + // For each file larger than bytes_limit_per_file, randomly pick the offset + // to start scanning. The scanned bytes are contiguous. + RANDOM_START = 2; + } + + // The set of one or more files to scan. + FileSet file_set = 1; + + // Max number of bytes to scan from a file. If a scanned file's size is bigger + // than this value then the rest of the bytes are omitted. Only one + // of bytes_limit_per_file and bytes_limit_per_file_percent can be specified. + int64 bytes_limit_per_file = 4; + + // Max percentage of bytes to scan from a file. The rest are omitted. The + // number of bytes scanned is rounded down. Must be between 0 and 100, + // inclusively. Both 0 and 100 means no limit. Defaults to 0. Only one + // of bytes_limit_per_file and bytes_limit_per_file_percent can be specified. + int32 bytes_limit_per_file_percent = 8; + + // List of file type groups to include in the scan. + // If empty, all files are scanned and available data format processors + // are applied. In addition, the binary content of the selected files + // is always scanned as well. + repeated FileType file_types = 5; + + SampleMethod sample_method = 6; + + // Limits the number of files to scan to this percentage of the input FileSet. + // Number of files scanned is rounded down. Must be between 0 and 100, + // inclusively. Both 0 and 100 means no limit. Defaults to 0. + int32 files_limit_percent = 7; +} + +// Message representing a set of files in Cloud Storage. +message CloudStorageFileSet { + // The url, in the format `gs://<bucket>/<path>`. Trailing wildcard in the + // path is allowed. + string url = 1; +} + +// Message representing a single file or path in Cloud Storage. +message CloudStoragePath { + // A url representing a file or path (no wildcards) in Cloud Storage. + // Example: gs://[BUCKET_NAME]/dictionary.txt + string path = 1; +} + +// Options defining BigQuery table and row identifiers. +message BigQueryOptions { + // How to sample rows if not all rows are scanned. Meaningful only when used + // in conjunction with either rows_limit or rows_limit_percent. If not + // specified, scanning would start from the top. + enum SampleMethod { + SAMPLE_METHOD_UNSPECIFIED = 0; + + // Scan from the top (default). + TOP = 1; + + // Randomly pick the row to start scanning. The scanned rows are contiguous. + RANDOM_START = 2; + } + + // Complete BigQuery table reference. + BigQueryTable table_reference = 1; + + // References to fields uniquely identifying rows within the table. + // Nested fields in the format, like `person.birthdate.year`, are allowed. + repeated FieldId identifying_fields = 2; + + // Max number of rows to scan. If the table has more rows than this value, the + // rest of the rows are omitted. If not set, or if set to 0, all rows will be + // scanned. Only one of rows_limit and rows_limit_percent can be specified. + // Cannot be used in conjunction with TimespanConfig. + int64 rows_limit = 3; + + // Max percentage of rows to scan. The rest are omitted. The number of rows + // scanned is rounded down. Must be between 0 and 100, inclusively. Both 0 and + // 100 means no limit. Defaults to 0. Only one of rows_limit and + // rows_limit_percent can be specified. Cannot be used in conjunction with + // TimespanConfig. + int32 rows_limit_percent = 6; + + SampleMethod sample_method = 4; + + // References to fields excluded from scanning. This allows you to skip + // inspection of entire columns which you know have no findings. + repeated FieldId excluded_fields = 5; +} + +// Shared message indicating Cloud storage type. +message StorageConfig { + // Configuration of the timespan of the items to include in scanning. + // Currently only supported when inspecting Google Cloud Storage and BigQuery. + message TimespanConfig { + // Exclude files or rows older than this value. + google.protobuf.Timestamp start_time = 1; + + // Exclude files or rows newer than this value. + // If set to zero, no upper time limit is applied. + google.protobuf.Timestamp end_time = 2; + + // Specification of the field containing the timestamp of scanned items. + // Used for data sources like Datastore or BigQuery. + // If not specified for BigQuery, table last modification timestamp + // is checked against given time span. + // The valid data types of the timestamp field are: + // for BigQuery - timestamp, date, datetime; + // for Datastore - timestamp. + // Datastore entity will be scanned if the timestamp property does not exist + // or its value is empty or invalid. + FieldId timestamp_field = 3; + + // When the job is started by a JobTrigger we will automatically figure out + // a valid start_time to avoid scanning files that have not been modified + // since the last time the JobTrigger executed. This will be based on the + // time of the execution of the last run of the JobTrigger. + bool enable_auto_population_of_timespan_config = 4; + } + + oneof type { + // Google Cloud Datastore options specification. + DatastoreOptions datastore_options = 2; + + // Google Cloud Storage options specification. + CloudStorageOptions cloud_storage_options = 3; + + // BigQuery options specification. + BigQueryOptions big_query_options = 4; + } + + TimespanConfig timespan_config = 6; +} + +// Definitions of file type groups to scan. +enum FileType { + // Includes all files. + FILE_TYPE_UNSPECIFIED = 0; + + // Includes all file extensions not covered by text file types. + BINARY_FILE = 1; + + // Included file extensions: + // asc, brf, c, cc, cpp, csv, cxx, c++, cs, css, dart, eml, go, h, hh, hpp, + // hxx, h++, hs, html, htm, shtml, shtm, xhtml, lhs, ini, java, js, json, + // ocaml, md, mkd, markdown, m, ml, mli, pl, pm, php, phtml, pht, py, pyw, + // rb, rbw, rs, rc, scala, sh, sql, tex, txt, text, tsv, vcard, vcs, wml, + // xml, xsl, xsd, yml, yaml. + TEXT_FILE = 2; + + // Included file extensions: + // bmp, gif, jpg, jpeg, jpe, png. + // bytes_limit_per_file has no effect on image files. + IMAGE = 3; +} + +// Row key for identifying a record in BigQuery table. +message BigQueryKey { + // Complete BigQuery table reference. + BigQueryTable table_reference = 1; + + // Absolute number of the row from the beginning of the table at the time + // of scanning. + int64 row_number = 2; +} + +// Record key for a finding in Cloud Datastore. +message DatastoreKey { + // Datastore entity key. + Key entity_key = 1; +} + +// A unique identifier for a Datastore entity. +// If a key's partition ID or any of its path kinds or names are +// reserved/read-only, the key is reserved/read-only. +// A reserved/read-only key is forbidden in certain documented contexts. +message Key { + // A (kind, ID/name) pair used to construct a key path. + // + // If either name or ID is set, the element is complete. + // If neither is set, the element is incomplete. + message PathElement { + // The kind of the entity. + // A kind matching regex `__.*__` is reserved/read-only. + // A kind must not contain more than 1500 bytes when UTF-8 encoded. + // Cannot be `""`. + string kind = 1; + + // The type of ID. + oneof id_type { + // The auto-allocated ID of the entity. + // Never equal to zero. Values less than zero are discouraged and may not + // be supported in the future. + int64 id = 2; + + // The name of the entity. + // A name matching regex `__.*__` is reserved/read-only. + // A name must not be more than 1500 bytes when UTF-8 encoded. + // Cannot be `""`. + string name = 3; + } + } + + // Entities are partitioned into subsets, currently identified by a project + // ID and namespace ID. + // Queries are scoped to a single partition. + PartitionId partition_id = 1; + + // The entity path. + // An entity path consists of one or more elements composed of a kind and a + // string or numerical identifier, which identify entities. The first + // element identifies a _root entity_, the second element identifies + // a _child_ of the root entity, the third element identifies a child of the + // second entity, and so forth. The entities identified by all prefixes of + // the path are called the element's _ancestors_. + // + // A path can never be empty, and a path can have at most 100 elements. + repeated PathElement path = 2; +} + +// Message for a unique key indicating a record that contains a finding. +message RecordKey { + oneof type { + DatastoreKey datastore_key = 2; + + BigQueryKey big_query_key = 3; + } + + // Values of identifying columns in the given row. Order of values matches + // the order of field identifiers specified in the scanning request. + repeated string id_values = 5; +} + +// Message defining the location of a BigQuery table. A table is uniquely +// identified by its project_id, dataset_id, and table_name. Within a query +// a table is often referenced with a string in the format of: +// `<project_id>:<dataset_id>.<table_id>` or +// `<project_id>.<dataset_id>.<table_id>`. +message BigQueryTable { + // The Google Cloud Platform project ID of the project containing the table. + // If omitted, project ID is inferred from the API call. + string project_id = 1; + + // Dataset ID of the table. + string dataset_id = 2; + + // Name of the table. + string table_id = 3; +} + +// Message defining a field of a BigQuery table. +message BigQueryField { + // Source table of the field. + BigQueryTable table = 1; + + // Designated field in the BigQuery table. + FieldId field = 2; +} + +// An entity in a dataset is a field or set of fields that correspond to a +// single person. For example, in medical records the `EntityId` might be a +// patient identifier, or for financial records it might be an account +// identifier. This message is used when generalizations or analysis must take +// into account that multiple rows correspond to the same entity. +message EntityId { + // Composite key indicating which field contains the entity identifier. + FieldId field = 1; +} |