+// Copyright 2016 Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+syntax = "proto3";
+package google.genomics.v1;
+import "google/api/annotations.proto";
+import "google/protobuf/empty.proto";
+import "google/protobuf/field_mask.proto";
+import "google/protobuf/struct.proto";
+import "google/protobuf/wrappers.proto";
+import "google/rpc/status.proto";
+option cc_enable_arenas = true;
+option go_package = "google.golang.org/genproto/googleapis/genomics/v1;genomics";
+option java_multiple_files = true;
+option java_outer_classname = "AnnotationsProto";
+option java_package = "com.google.genomics.v1";
+// This service provides storage and positional retrieval of genomic
+// reference annotations, including variant annotations.
+service AnnotationServiceV1 {
+ // Creates a new annotation set. Caller must have WRITE permission for the
+ // associated dataset.
+ //
+ // The following fields are required:
+ //
+ // * [datasetId][google.genomics.v1.AnnotationSet.dataset_id]
+ // * [referenceSetId][google.genomics.v1.AnnotationSet.reference_set_id]
+ //
+ // All other fields may be optionally specified, unless documented as being
+ // server-generated (for example, the `id` field).
+ rpc CreateAnnotationSet(CreateAnnotationSetRequest) returns (AnnotationSet) {
+ option (google.api.http) = {
+ post: "/v1/annotationsets"
+ body: "annotation_set"
+ };
+ }
+ // Gets an annotation set. Caller must have READ permission for
+ // the associated dataset.
+ rpc GetAnnotationSet(GetAnnotationSetRequest) returns (AnnotationSet) {
+ option (google.api.http) = {
+ get: "/v1/annotationsets/{annotation_set_id}"
+ };
+ }
+ // Updates an annotation set. The update must respect all mutability
+ // restrictions and other invariants described on the annotation set resource.
+ // Caller must have WRITE permission for the associated dataset.
+ rpc UpdateAnnotationSet(UpdateAnnotationSetRequest) returns (AnnotationSet) {
+ option (google.api.http) = {
+ put: "/v1/annotationsets/{annotation_set_id}"
+ body: "annotation_set"
+ };
+ }
+ // Deletes an annotation set. Caller must have WRITE permission
+ // for the associated annotation set.
+ rpc DeleteAnnotationSet(DeleteAnnotationSetRequest)
+ returns (google.protobuf.Empty) {
+ option (google.api.http) = {
+ delete: "/v1/annotationsets/{annotation_set_id}"
+ };
+ }
+ // Searches for annotation sets that match the given criteria. Annotation sets
+ // are returned in an unspecified order. This order is consistent, such that
+ // two queries for the same content (regardless of page size) yield annotation
+ // sets in the same order across their respective streams of paginated
+ // responses. Caller must have READ permission for the queried datasets.
+ rpc SearchAnnotationSets(SearchAnnotationSetsRequest)
+ returns (SearchAnnotationSetsResponse) {
+ option (google.api.http) = {
+ post: "/v1/annotationsets/search"
+ body: "*"
+ };
+ }
+ // Creates a new annotation. Caller must have WRITE permission
+ // for the associated annotation set.
+ //
+ // The following fields are required:
+ //
+ // * [annotationSetId][google.genomics.v1.Annotation.annotation_set_id]
+ // * [referenceName][google.genomics.v1.Annotation.reference_name] or
+ // [referenceId][google.genomics.v1.Annotation.reference_id]
+ //
+ // ### Transcripts
+ //
+ // For annotations of type TRANSCRIPT, the following fields of
+ // [transcript][google.genomics.v1.Annotation.transcript] must be provided:
+ //
+ // * [exons.start][google.genomics.v1.Transcript.Exon.start]
+ // * [exons.end][google.genomics.v1.Transcript.Exon.end]
+ //
+ // All other fields may be optionally specified, unless documented as being
+ // server-generated (for example, the `id` field). The annotated
+ // range must be no longer than 100Mbp (mega base pairs). See the
+ // [Annotation resource][google.genomics.v1.Annotation]
+ // for additional restrictions on each field.
+ rpc CreateAnnotation(CreateAnnotationRequest) returns (Annotation) {
+ option (google.api.http) = {
+ post: "/v1/annotations"
+ body: "annotation"
+ };
+ }
+ // Creates one or more new annotations atomically. All annotations must
+ // belong to the same annotation set. Caller must have WRITE
+ // permission for this annotation set. For optimal performance, batch
+ // positionally adjacent annotations together.
+ //
+ // If the request has a systemic issue, such as an attempt to write to
+ // an inaccessible annotation set, the entire RPC will fail accordingly. For
+ // lesser data issues, when possible an error will be isolated to the
+ // corresponding batch entry in the response; the remaining well formed
+ // annotations will be created normally.
+ //
+ // For details on the requirements for each individual annotation resource,
+ // see
+ // [CreateAnnotation][google.genomics.v1.AnnotationServiceV1.CreateAnnotation].
+ rpc BatchCreateAnnotations(BatchCreateAnnotationsRequest)
+ returns (BatchCreateAnnotationsResponse) {
+ option (google.api.http) = {
+ post: "/v1/annotations:batchCreate"
+ body: "*"
+ };
+ }
+ // Gets an annotation. Caller must have READ permission
+ // for the associated annotation set.
+ rpc GetAnnotation(GetAnnotationRequest) returns (Annotation) {
+ option (google.api.http) = {
+ get: "/v1/annotations/{annotation_id}"
+ };
+ }
+ // Updates an annotation. Caller must have
+ // WRITE permission for the associated dataset.
+ rpc UpdateAnnotation(UpdateAnnotationRequest) returns (Annotation) {
+ option (google.api.http) = {
+ put: "/v1/annotations/{annotation_id}"
+ body: "annotation"
+ };
+ }
+ // Deletes an annotation. Caller must have WRITE permission for
+ // the associated annotation set.
+ rpc DeleteAnnotation(DeleteAnnotationRequest)
+ returns (google.protobuf.Empty) {
+ option (google.api.http) = {
+ delete: "/v1/annotations/{annotation_id}"
+ };
+ }
+ // Searches for annotations that match the given criteria. Results are
+ // ordered by genomic coordinate (by reference sequence, then position).
+ // Annotations with equivalent genomic coordinates are returned in an
+ // unspecified order. This order is consistent, such that two queries for the
+ // same content (regardless of page size) yield annotations in the same order
+ // across their respective streams of paginated responses. Caller must have
+ // READ permission for the queried annotation sets.
+ rpc SearchAnnotations(SearchAnnotationsRequest)
+ returns (SearchAnnotationsResponse) {
+ option (google.api.http) = {
+ post: "/v1/annotations/search"
+ body: "*"
+ };
+ }
+// An annotation set is a logical grouping of annotations that share consistent
+// type information and provenance. Examples of annotation sets include 'all
+// genes from refseq', and 'all variant annotations from ClinVar'.
+message AnnotationSet {
+ // The server-generated annotation set ID, unique across all annotation sets.
+ string id = 1;
+ // The dataset to which this annotation set belongs.
+ string dataset_id = 2;
+ // The ID of the reference set that defines the coordinate space for this
+ // set's annotations.
+ string reference_set_id = 3;
+ // The display name for this annotation set.
+ string name = 4;
+ // The source URI describing the file from which this annotation set was
+ // generated, if any.
+ string source_uri = 5;
+ // The type of annotations contained within this set.
+ AnnotationType type = 6;
+ // A map of additional read alignment information. This must be of the form
+ // map<string, string[]> (string key mapping to a list of string values).
+ map<string, google.protobuf.ListValue> info = 17;
+// An annotation describes a region of reference genome. The value of an
+// annotation may be one of several canonical types, supplemented by arbitrary
+// info tags. An annotation is not inherently associated with a specific
+// sample or individual (though a client could choose to use annotations in
+// this way). Example canonical annotation types are `GENE` and
+// `VARIANT`.
+message Annotation {
+ // The server-generated annotation ID, unique across all annotations.
+ string id = 1;
+ // The annotation set to which this annotation belongs.
+ string annotation_set_id = 2;
+ // The display name of this annotation.
+ string name = 3;
+ // The ID of the Google Genomics reference associated with this range.
+ string reference_id = 4;
+ // The display name corresponding to the reference specified by
+ // `referenceId`, for example `chr1`, `1`, or `chrX`.
+ string reference_name = 5;
+ // The start position of the range on the reference, 0-based inclusive.
+ int64 start = 6;
+ // The end position of the range on the reference, 0-based exclusive.
+ int64 end = 7;
+ // Whether this range refers to the reverse strand, as opposed to the forward
+ // strand. Note that regardless of this field, the start/end position of the
+ // range always refer to the forward strand.
+ bool reverse_strand = 8;
+ // The data type for this annotation. Must match the containing annotation
+ // set's type.
+ AnnotationType type = 9;
+ oneof value {
+ // A variant annotation, which describes the effect of a variant on the
+ // genome, the coding sequence, and/or higher level consequences at the
+ // organism level e.g. pathogenicity. This field is only set for annotations
+ // of type `VARIANT`.
+ VariantAnnotation variant = 10;
+ // A transcript value represents the assertion that a particular region of
+ // the reference genome may be transcribed as RNA. An alternative splicing
+ // pattern would be represented as a separate transcript object. This field
+ // is only set for annotations of type `TRANSCRIPT`.
+ Transcript transcript = 11;
+ }
+ // A map of additional read alignment information. This must be of the form
+ // map<string, string[]> (string key mapping to a list of string values).
+ map<string, google.protobuf.ListValue> info = 12;
+message VariantAnnotation {
+ message ClinicalCondition {
+ // A set of names for the condition.
+ repeated string names = 1;
+ // The set of external IDs for this condition.
+ repeated ExternalId external_ids = 2;
+ // The MedGen concept id associated with this gene.
+ // Search for these IDs at http://www.ncbi.nlm.nih.gov/medgen/
+ string concept_id = 3;
+ // The OMIM id for this condition.
+ // Search for these IDs at http://omim.org/
+ string omim_id = 4;
+ }
+ enum Type {
+ // `TYPE_OTHER` should be used when no other Type will suffice.
+ // Further explanation of the variant type may be included in the
+ // [info][google.genomics.v1.Annotation.info] field.
+ // `INSERTION` indicates an insertion.
+ // `DELETION` indicates a deletion.
+ // `SUBSTITUTION` indicates a block substitution of
+ // two or more nucleotides.
+ // `SNP` indicates a single nucleotide polymorphism.
+ SNP = 5;
+ // `STRUCTURAL` indicates a large structural variant,
+ // including chromosomal fusions, inversions, etc.
+ // `CNV` indicates a variation in copy number.
+ CNV = 7;
+ }
+ enum Effect {
+ // `EFFECT_OTHER` should be used when no other Effect
+ // will suffice.
+ // `FRAMESHIFT` indicates a mutation in which the insertion or
+ // deletion of nucleotides resulted in a frameshift change.
+ // `FRAME_PRESERVING_INDEL` indicates a mutation in which a
+ // multiple of three nucleotides has been inserted or deleted, resulting
+ // in no change to the reading frame of the coding sequence.
+ // `SYNONYMOUS_SNP` indicates a single nucleotide polymorphism
+ // mutation that results in no amino acid change.
+ // `NONSYNONYMOUS_SNP` indicates a single nucleotide
+ // polymorphism mutation that results in an amino acid change.
+ // `STOP_GAIN` indicates a mutation that leads to the creation
+ // of a stop codon at the variant site. Frameshift mutations creating
+ // downstream stop codons do not count as `STOP_GAIN`.
+ STOP_GAIN = 6;
+ // `STOP_LOSS` indicates a mutation that eliminates a
+ // stop codon at the variant site.
+ STOP_LOSS = 7;
+ // `SPLICE_SITE_DISRUPTION` indicates that this variant is
+ // found in a splice site for the associated transcript, and alters the
+ // normal splicing pattern.
+ }
+ enum ClinicalSignificance {
+ // `OTHER` should be used when no other clinical significance
+ // value will suffice.
+ BENIGN = 3;
+ // `MULTIPLE_REPORTED` should be used when multiple clinical
+ // signficances are reported for a variant. The original clinical
+ // significance values may be provided in the `info` field.
+ }
+ // Type has been adapted from ClinVar's list of variant types.
+ Type type = 1;
+ // Effect of the variant on the coding sequence.
+ Effect effect = 2;
+ // The alternate allele for this variant. If multiple alternate alleles
+ // exist at this location, create a separate variant for each one, as they
+ // may represent distinct conditions.
+ string alternate_bases = 3;
+ // Google annotation ID of the gene affected by this variant. This should
+ // be provided when the variant is created.
+ string gene_id = 4;
+ // Google annotation IDs of the transcripts affected by this variant. These
+ // should be provided when the variant is created.
+ repeated string transcript_ids = 5;
+ // The set of conditions associated with this variant.
+ // A condition describes the way a variant influences human health.
+ repeated ClinicalCondition conditions = 6;
+ // Describes the clinical significance of a variant.
+ // It is adapted from the ClinVar controlled vocabulary for clinical
+ // significance described at:
+ // http://www.ncbi.nlm.nih.gov/clinvar/docs/clinsig/
+ ClinicalSignificance clinical_significance = 7;
+// A transcript represents the assertion that a particular region of the
+// reference genome may be transcribed as RNA.
+message Transcript {
+ message Exon {
+ // The start position of the exon on this annotation's reference sequence,
+ // 0-based inclusive. Note that this is relative to the reference start, and
+ // **not** the containing annotation start.
+ int64 start = 1;
+ // The end position of the exon on this annotation's reference sequence,
+ // 0-based exclusive. Note that this is relative to the reference start, and
+ // *not* the containing annotation start.
+ int64 end = 2;
+ // The frame of this exon. Contains a value of 0, 1, or 2, which indicates
+ // the offset of the first coding base of the exon within the reading frame
+ // of the coding DNA sequence, if any. This field is dependent on the
+ // strandedness of this annotation (see
+ // [Annotation.reverse_strand][google.genomics.v1.Annotation.reverse_strand]).
+ // For forward stranded annotations, this offset is relative to the
+ // [exon.start][google.genomics.v1.Transcript.Exon.start]. For reverse
+ // strand annotations, this offset is relative to the
+ // [exon.end][google.genomics.v1.Transcript.Exon.end] `- 1`.
+ //
+ // Unset if this exon does not intersect the coding sequence. Upon creation
+ // of a transcript, the frame must be populated for all or none of the
+ // coding exons.
+ google.protobuf.Int32Value frame = 3;
+ }
+ message CodingSequence {
+ // The start of the coding sequence on this annotation's reference sequence,
+ // 0-based inclusive. Note that this position is relative to the reference
+ // start, and *not* the containing annotation start.
+ int64 start = 1;
+ // The end of the coding sequence on this annotation's reference sequence,
+ // 0-based exclusive. Note that this position is relative to the reference
+ // start, and *not* the containing annotation start.
+ int64 end = 2;
+ }
+ // The annotation ID of the gene from which this transcript is transcribed.
+ string gene_id = 1;
+ // The <a href="http://en.wikipedia.org/wiki/Exon">exons</a> that compose
+ // this transcript. This field should be unset for genomes where transcript
+ // splicing does not occur, for example prokaryotes.
+ //
+ // Introns are regions of the transcript that are not included in the
+ // spliced RNA product. Though not explicitly modeled here, intron ranges can
+ // be deduced; all regions of this transcript that are not exons are introns.
+ //
+ // Exonic sequences do not necessarily code for a translational product
+ // (amino acids). Only the regions of exons bounded by the
+ // [codingSequence][google.genomics.v1.Transcript.coding_sequence] correspond
+ // to coding DNA sequence.
+ //
+ // Exons are ordered by start position and may not overlap.
+ repeated Exon exons = 2;
+ // The range of the coding sequence for this transcript, if any. To determine
+ // the exact ranges of coding sequence, intersect this range with those of the
+ // [exons][google.genomics.v1.Transcript.exons], if any. If there are any
+ // [exons][google.genomics.v1.Transcript.exons], the
+ // [codingSequence][google.genomics.v1.Transcript.coding_sequence] must start
+ // and end within them.
+ //
+ // Note that in some cases, the reference genome will not exactly match the
+ // observed mRNA transcript e.g. due to variance in the source genome from
+ // reference. In these cases,
+ // [exon.frame][google.genomics.v1.Transcript.Exon.frame] will not necessarily
+ // match the expected reference reading frame and coding exon reference bases
+ // cannot necessarily be concatenated to produce the original transcript mRNA.
+ CodingSequence coding_sequence = 3;
+message ExternalId {
+ // The name of the source of this data.
+ string source_name = 1;
+ // The id used by the source of this data.
+ string id = 2;
+message CreateAnnotationSetRequest {
+ // The annotation set to create.
+ AnnotationSet annotation_set = 1;
+message GetAnnotationSetRequest {
+ // The ID of the annotation set to be retrieved.
+ string annotation_set_id = 1;
+message UpdateAnnotationSetRequest {
+ // The ID of the annotation set to be updated.
+ string annotation_set_id = 1;
+ // The new annotation set.
+ AnnotationSet annotation_set = 2;
+ // An optional mask specifying which fields to update. Mutable fields are
+ // [name][google.genomics.v1.AnnotationSet.name],
+ // [source_uri][google.genomics.v1.AnnotationSet.source_uri], and
+ // [info][google.genomics.v1.AnnotationSet.info]. If unspecified, all
+ // mutable fields will be updated.
+ google.protobuf.FieldMask update_mask = 3;
+message DeleteAnnotationSetRequest {
+ // The ID of the annotation set to be deleted.
+ string annotation_set_id = 1;
+message SearchAnnotationSetsRequest {
+ // Required. The dataset IDs to search within. Caller must have `READ` access
+ // to these datasets.
+ repeated string dataset_ids = 1;
+ // If specified, only annotation sets associated with the given reference set
+ // are returned.
+ string reference_set_id = 2;
+ // Only return annotations sets for which a substring of the name matches this
+ // string (case insensitive).
+ string name = 3;
+ // If specified, only annotation sets that have any of these types are
+ // returned.
+ repeated AnnotationType types = 4;
+ // The continuation token, which is used to page through large result sets.
+ // To get the next page of results, set this parameter to the value of
+ // `nextPageToken` from the previous response.
+ string page_token = 5;
+ // The maximum number of results to return in a single page. If unspecified,
+ // defaults to 128. The maximum value is 1024.
+ int32 page_size = 6;
+message SearchAnnotationSetsResponse {
+ // The matching annotation sets.
+ repeated AnnotationSet annotation_sets = 1;
+ // The continuation token, which is used to page through large result sets.
+ // Provide this value in a subsequent request to return the next page of
+ // results. This field will be empty if there aren't any additional results.
+ string next_page_token = 2;
+message CreateAnnotationRequest {
+ // The annotation to be created.
+ Annotation annotation = 1;
+message BatchCreateAnnotationsRequest {
+ // The annotations to be created. At most 4096 can be specified in a single
+ // request.
+ repeated Annotation annotations = 1;
+ // A unique request ID which enables the server to detect duplicated requests.
+ // If provided, duplicated requests will result in the same response; if not
+ // provided, duplicated requests may result in duplicated data. For a given
+ // annotation set, callers should not reuse `request_id`s when writing
+ // different batches of annotations - behavior in this case is undefined.
+ // A common approach is to use a UUID. For batch jobs where worker crashes are
+ // a possibility, consider using some unique variant of a worker or run ID.
+ string request_id = 2;
+message BatchCreateAnnotationsResponse {
+ message Entry {
+ // The creation status.
+ google.rpc.Status status = 1;
+ // The created annotation, if creation was successful.
+ Annotation annotation = 2;
+ }
+ // The resulting per-annotation entries, ordered consistently with the
+ // original request.
+ repeated Entry entries = 1;
+message GetAnnotationRequest {
+ // The ID of the annotation to be retrieved.
+ string annotation_id = 1;
+message UpdateAnnotationRequest {
+ // The ID of the annotation to be updated.
+ string annotation_id = 1;
+ // The new annotation.
+ Annotation annotation = 2;
+ // An optional mask specifying which fields to update. Mutable fields are
+ // [name][google.genomics.v1.Annotation.name],
+ // [variant][google.genomics.v1.Annotation.variant],
+ // [transcript][google.genomics.v1.Annotation.transcript], and
+ // [info][google.genomics.v1.Annotation.info]. If unspecified, all mutable
+ // fields will be updated.
+ google.protobuf.FieldMask update_mask = 3;
+message DeleteAnnotationRequest {
+ // The ID of the annotation to be deleted.
+ string annotation_id = 1;
+message SearchAnnotationsRequest {
+ // Required. The annotation sets to search within. The caller must have
+ // `READ` access to these annotation sets.
+ // All queried annotation sets must have the same type.
+ repeated string annotation_set_ids = 1;
+ // Required. `reference_id` or `reference_name` must be set.
+ oneof reference {
+ // The ID of the reference to query.
+ string reference_id = 2;
+ // The name of the reference to query, within the reference set associated
+ // with this query.
+ string reference_name = 3;
+ }
+ // The start position of the range on the reference, 0-based inclusive. If
+ // specified,
+ // [referenceId][google.genomics.v1.SearchAnnotationsRequest.reference_id] or
+ // [referenceName][google.genomics.v1.SearchAnnotationsRequest.reference_name]
+ // must be specified. Defaults to 0.
+ int64 start = 4;
+ // The end position of the range on the reference, 0-based exclusive. If
+ // [referenceId][google.genomics.v1.SearchAnnotationsRequest.reference_id] or
+ // [referenceName][google.genomics.v1.SearchAnnotationsRequest.reference_name]
+ // must be specified, Defaults to the length of the reference.
+ int64 end = 5;
+ // The continuation token, which is used to page through large result sets.
+ // To get the next page of results, set this parameter to the value of
+ // `nextPageToken` from the previous response.
+ string page_token = 6;
+ // The maximum number of results to return in a single page. If unspecified,
+ // defaults to 256. The maximum value is 2048.
+ int32 page_size = 7;
+message SearchAnnotationsResponse {
+ // The matching annotations.
+ repeated Annotation annotations = 1;
+ // The continuation token, which is used to page through large result sets.
+ // Provide this value in a subsequent request to return the next page of
+ // results. This field will be empty if there aren't any additional results.
+ string next_page_token = 2;
+// When an [Annotation][google.genomics.v1.Annotation] or
+// [AnnotationSet][google.genomics.v1.AnnotationSet] is created, if `type` is
+// not specified it will be set to `GENERIC`.
+enum AnnotationType {
+ // A `GENERIC` annotation type should be used when no other annotation
+ // type will suffice. This represents an untyped annotation of the reference
+ // genome.
+ GENERIC = 1;
+ // A `VARIANT` annotation type.
+ VARIANT = 2;
+ // A `GENE` annotation type represents the existence of a gene at the
+ // associated reference coordinates. The start coordinate is typically the
+ // gene's transcription start site and the end is typically the end of the
+ // gene's last exon.
+ GENE = 3;
+ // A `TRANSCRIPT` annotation type represents the assertion that a
+ // particular region of the reference genome may be transcribed as RNA.
+syntax = "proto3";
+package google.genomics.v1;
+import "google/api/annotations.proto";
+option cc_enable_arenas = true;
+option go_package = "google.golang.org/genproto/googleapis/genomics/v1;genomics";
+option java_multiple_files = true;
+option java_outer_classname = "CigarProto";
+option java_package = "com.google.genomics.v1";
+// A single CIGAR operation.
+message CigarUnit {
+ // Describes the different types of CIGAR alignment operations that exist.
+ // Used wherever CIGAR alignments are used.
+ enum Operation {
+ // An alignment match indicates that a sequence can be aligned to the
+ // reference without evidence of an INDEL. Unlike the
+ // the `ALIGNMENT_MATCH` operator does not indicate whether the
+ // reference and read sequences are an exact match. This operator is
+ // equivalent to SAM's `M`.
+ // The insert operator indicates that the read contains evidence of bases
+ // being inserted into the reference. This operator is equivalent to SAM's
+ // `I`.
+ INSERT = 2;
+ // The delete operator indicates that the read contains evidence of bases
+ // being deleted from the reference. This operator is equivalent to SAM's
+ // `D`.
+ DELETE = 3;
+ // The skip operator indicates that this read skips a long segment of the
+ // reference, but the bases have not been deleted. This operator is commonly
+ // used when working with RNA-seq data, where reads may skip long segments
+ // of the reference between exons. This operator is equivalent to SAM's
+ // `N`.
+ SKIP = 4;
+ // The soft clip operator indicates that bases at the start/end of a read
+ // have not been considered during alignment. This may occur if the majority
+ // of a read maps, except for low quality bases at the start/end of a read.
+ // This operator is equivalent to SAM's `S`. Bases that are soft
+ // clipped will still be stored in the read.
+ CLIP_SOFT = 5;
+ // The hard clip operator indicates that bases at the start/end of a read
+ // have been omitted from this alignment. This may occur if this linear
+ // alignment is part of a chimeric alignment, or if the read has been
+ // trimmed (for example, during error correction or to trim poly-A tails for
+ // RNA-seq). This operator is equivalent to SAM's `H`.
+ CLIP_HARD = 6;
+ // The pad operator indicates that there is padding in an alignment. This
+ // operator is equivalent to SAM's `P`.
+ PAD = 7;
+ // This operator indicates that this portion of the aligned sequence exactly
+ // matches the reference. This operator is equivalent to SAM's `=`.
+ // This operator indicates that this portion of the aligned sequence is an
+ // alignment match to the reference, but a sequence mismatch. This can
+ // indicate a SNP or a read error. This operator is equivalent to SAM's
+ // `X`.
+ }
+ Operation operation = 1;
+ // The number of genomic bases that the operation runs for. Required.
+ int64 operation_length = 2;
+ // `referenceSequence` is only used at mismatches
+ // (`SEQUENCE_MISMATCH`) and deletions (`DELETE`).
+ // Filling this field replaces SAM's MD tag. If the relevant information is
+ // not available, this field is unset.
+ string reference_sequence = 3;
+syntax = "proto3";
+package google.genomics.v1;
+import "google/api/annotations.proto";
+import "google/iam/v1/iam_policy.proto";
+import "google/iam/v1/policy.proto";
+import "google/protobuf/empty.proto";
+import "google/protobuf/field_mask.proto";
+import "google/protobuf/timestamp.proto";
+option cc_enable_arenas = true;
+option go_package = "google.golang.org/genproto/googleapis/genomics/v1;genomics";
+option java_multiple_files = true;
+option java_outer_classname = "DatasetsProto";
+option java_package = "com.google.genomics.v1";
+// This service manages datasets, which are collections of genomic data.
+service DatasetServiceV1 {
+ // Lists datasets within a project.
+ //
+ // For the definitions of datasets and other genomics resources, see
+ // [Fundamentals of Google
+ // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
+ rpc ListDatasets(ListDatasetsRequest) returns (ListDatasetsResponse) {
+ option (google.api.http) = {
+ get: "/v1/datasets"
+ };
+ }
+ // Creates a new dataset.
+ //
+ // For the definitions of datasets and other genomics resources, see
+ // [Fundamentals of Google
+ // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
+ rpc CreateDataset(CreateDatasetRequest) returns (Dataset) {
+ option (google.api.http) = {
+ post: "/v1/datasets"
+ body: "dataset"
+ };
+ }
+ // Gets a dataset by ID.
+ //
+ // For the definitions of datasets and other genomics resources, see
+ // [Fundamentals of Google
+ // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
+ rpc GetDataset(GetDatasetRequest) returns (Dataset) {
+ option (google.api.http) = {
+ get: "/v1/datasets/{dataset_id}"
+ };
+ }
+ // Updates a dataset.
+ //
+ // For the definitions of datasets and other genomics resources, see
+ // [Fundamentals of Google
+ // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
+ //
+ // This method supports patch semantics.
+ rpc UpdateDataset(UpdateDatasetRequest) returns (Dataset) {
+ option (google.api.http) = {
+ patch: "/v1/datasets/{dataset_id}"
+ body: "dataset"
+ };
+ }
+ // Deletes a dataset and all of its contents (all read group sets,
+ // reference sets, variant sets, call sets, annotation sets, etc.)
+ // This is reversible (up to one week after the deletion) via
+ // the
+ // [datasets.undelete][google.genomics.v1.DatasetServiceV1.UndeleteDataset]
+ // operation.
+ //
+ // For the definitions of datasets and other genomics resources, see
+ // [Fundamentals of Google
+ // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
+ rpc DeleteDataset(DeleteDatasetRequest) returns (google.protobuf.Empty) {
+ option (google.api.http) = {
+ delete: "/v1/datasets/{dataset_id}"
+ };
+ }
+ // Undeletes a dataset by restoring a dataset which was deleted via this API.
+ //
+ // For the definitions of datasets and other genomics resources, see
+ // [Fundamentals of Google
+ // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
+ //
+ // This operation is only possible for a week after the deletion occurred.
+ rpc UndeleteDataset(UndeleteDatasetRequest) returns (Dataset) {
+ option (google.api.http) = {
+ post: "/v1/datasets/{dataset_id}:undelete"
+ body: "*"
+ };
+ }
+ // Sets the access control policy on the specified dataset. Replaces any
+ // existing policy.
+ //
+ // For the definitions of datasets and other genomics resources, see
+ // [Fundamentals of Google
+ // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
+ //
+ // See <a href="/iam/docs/managing-policies#setting_a_policy">Setting a
+ // Policy</a> for more information.
+ rpc SetIamPolicy(google.iam.v1.SetIamPolicyRequest)
+ returns (google.iam.v1.Policy) {
+ option (google.api.http) = {
+ post: "/v1/{resource=datasets/*}:setIamPolicy"
+ body: "*"
+ };
+ }
+ // Gets the access control policy for the dataset. This is empty if the
+ // policy or resource does not exist.
+ //
+ // See <a href="/iam/docs/managing-policies#getting_a_policy">Getting a
+ // Policy</a> for more information.
+ //
+ // For the definitions of datasets and other genomics resources, see
+ // [Fundamentals of Google
+ // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
+ rpc GetIamPolicy(google.iam.v1.GetIamPolicyRequest)
+ returns (google.iam.v1.Policy) {
+ option (google.api.http) = {
+ post: "/v1/{resource=datasets/*}:getIamPolicy"
+ body: "*"
+ };
+ }
+ // Returns permissions that a caller has on the specified resource.
+ // See <a href="/iam/docs/managing-policies#testing_permissions">Testing
+ // Permissions</a> for more information.
+ //
+ // For the definitions of datasets and other genomics resources, see
+ // [Fundamentals of Google
+ // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
+ rpc TestIamPermissions(google.iam.v1.TestIamPermissionsRequest)
+ returns (google.iam.v1.TestIamPermissionsResponse) {
+ option (google.api.http) = {
+ post: "/v1/{resource=datasets/*}:testIamPermissions"
+ body: "*"
+ };
+ }
+// A Dataset is a collection of genomic data.
+// For more genomics resource definitions, see [Fundamentals of Google
+// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
+message Dataset {
+ // The server-generated dataset ID, unique across all datasets.
+ string id = 1;
+ // The Google Cloud project ID that this dataset belongs to.
+ string project_id = 2;
+ // The dataset name.
+ string name = 3;
+ // The time this dataset was created, in seconds from the epoch.
+ google.protobuf.Timestamp create_time = 4;
+// The dataset list request.
+message ListDatasetsRequest {
+ // Required. The Google Cloud project ID to list datasets for.
+ string project_id = 1;
+ // The maximum number of results to return in a single page. If unspecified,
+ // defaults to 50. The maximum value is 1024.
+ int32 page_size = 2;
+ // The continuation token, which is used to page through large result sets.
+ // To get the next page of results, set this parameter to the value of
+ // `nextPageToken` from the previous response.
+ string page_token = 3;
+// The dataset list response.
+message ListDatasetsResponse {
+ // The list of matching Datasets.
+ repeated Dataset datasets = 1;
+ // The continuation token, which is used to page through large result sets.
+ // Provide this value in a subsequent request to return the next page of
+ // results. This field will be empty if there aren't any additional results.
+ string next_page_token = 2;
+message CreateDatasetRequest {
+ // The dataset to be created. Must contain projectId and name.
+ Dataset dataset = 1;
+message UpdateDatasetRequest {
+ // The ID of the dataset to be updated.
+ string dataset_id = 1;
+ // The new dataset data.
+ Dataset dataset = 2;
+ // An optional mask specifying which fields to update. At this time, the only
+ // mutable field is [name][google.genomics.v1.Dataset.name]. The only
+ // acceptable value is "name". If unspecified, all mutable fields will be
+ // updated.
+ google.protobuf.FieldMask update_mask = 3;
+message DeleteDatasetRequest {
+ // The ID of the dataset to be deleted.
+ string dataset_id = 1;
+message UndeleteDatasetRequest {
+ // The ID of the dataset to be undeleted.
+ string dataset_id = 1;
+message GetDatasetRequest {
+ // The ID of the dataset.
+ string dataset_id = 1;
+syntax = "proto3";
+package google.genomics.v1;
+import "google/api/annotations.proto";
+import "google/protobuf/any.proto";
+import "google/protobuf/timestamp.proto";
+option cc_enable_arenas = true;
+option go_package = "google.golang.org/genproto/googleapis/genomics/v1;genomics";
+option java_multiple_files = true;
+option java_outer_classname = "OperationsProto";
+option java_package = "com.google.genomics.v1";
+// Metadata describing an [Operation][google.longrunning.Operation].
+message OperationMetadata {
+ // The Google Cloud Project in which the job is scoped.
+ string project_id = 1;
+ // The time at which the job was submitted to the Genomics service.
+ google.protobuf.Timestamp create_time = 2;
+ // The time at which the job began to run.
+ google.protobuf.Timestamp start_time = 3;
+ // The time at which the job stopped running.
+ google.protobuf.Timestamp end_time = 4;
+ // The original request that started the operation. Note that this will be in
+ // current version of the API. If the operation was started with v1beta2 API
+ // and a GetOperation is performed on v1 API, a v1 request will be returned.
+ google.protobuf.Any request = 5;
+ // Optional event messages that were generated during the job's execution.
+ // This also contains any warnings that were generated during import
+ // or export.
+ repeated OperationEvent events = 6;
+ // This field is deprecated. Use `labels` instead. Optionally provided by the
+ // caller when submitting the request that creates the operation.
+ string client_id = 7;
+ // Runtime metadata on this Operation.
+ google.protobuf.Any runtime_metadata = 8;
+ // Optionally provided by the caller when submitting the request that creates
+ // the operation.
+ map<string, string> labels = 9;
+// An event that occurred during an [Operation][google.longrunning.Operation].
+message OperationEvent {
+ // Optional time of when event started.
+ google.protobuf.Timestamp start_time = 1;
+ // Optional time of when event finished. An event can have a start time and no
+ // finish time. If an event has a finish time, there must be a start time.
+ google.protobuf.Timestamp end_time = 2;
+ // Required description of event.
+ string description = 3;
+syntax = "proto3";
+package google.genomics.v1;
+import "google/api/annotations.proto";
+option cc_enable_arenas = true;
+option go_package = "google.golang.org/genproto/googleapis/genomics/v1;genomics";
+option java_multiple_files = true;
+option java_outer_classname = "PositionProto";
+option java_package = "com.google.genomics.v1";
+// An abstraction for referring to a genomic position, in relation to some
+// already known reference. For now, represents a genomic position as a
+// reference name, a base number on that reference (0-based), and a
+// determination of forward or reverse strand.
+message Position {
+ // The name of the reference in whatever reference set is being used.
+ string reference_name = 1;
+ // The 0-based offset from the start of the forward strand for that reference.
+ int64 position = 2;
+ // Whether this position is on the reverse strand, as opposed to the forward
+ // strand.
+ bool reverse_strand = 3;
+syntax = "proto3";
+package google.genomics.v1;
+import "google/api/annotations.proto";
+option cc_enable_arenas = true;
+option go_package = "google.golang.org/genproto/googleapis/genomics/v1;genomics";
+option java_multiple_files = true;
+option java_outer_classname = "RangeProto";
+option java_package = "com.google.genomics.v1";
+// A 0-based half-open genomic coordinate range for search requests.
+message Range {
+ // The reference sequence name, for example `chr1`,
+ // `1`, or `chrX`.
+ string reference_name = 1;
+ // The start position of the range on the reference, 0-based inclusive.
+ int64 start = 2;
+ // The end position of the range on the reference, 0-based exclusive.
+ int64 end = 3;
+syntax = "proto3";
+package google.genomics.v1;
+import "google/api/annotations.proto";
+import "google/genomics/v1/cigar.proto";
+import "google/genomics/v1/position.proto";
+import "google/protobuf/struct.proto";
+option cc_enable_arenas = true;
+option go_package = "google.golang.org/genproto/googleapis/genomics/v1;genomics";
+option java_multiple_files = true;
+option java_outer_classname = "ReadAlignmentProto";
+option java_package = "com.google.genomics.v1";
+// A linear alignment can be represented by one CIGAR string. Describes the
+// mapped position and local alignment of the read to the reference.
+message LinearAlignment {
+ // The position of this alignment.
+ Position position = 1;
+ // The mapping quality of this alignment. Represents how likely
+ // the read maps to this position as opposed to other locations.
+ //
+ // Specifically, this is -10 log10 Pr(mapping position is wrong), rounded to
+ // the nearest integer.
+ int32 mapping_quality = 2;
+ // Represents the local alignment of this sequence (alignment matches, indels,
+ // etc) against the reference.
+ repeated CigarUnit cigar = 3;
+// A read alignment describes a linear alignment of a string of DNA to a
+// [reference sequence][google.genomics.v1.Reference], in addition to metadata
+// about the fragment (the molecule of DNA sequenced) and the read (the bases
+// which were read by the sequencer). A read is equivalent to a line in a SAM
+// file. A read belongs to exactly one read group and exactly one
+// [read group set][google.genomics.v1.ReadGroupSet].
+// For more genomics resource definitions, see [Fundamentals of Google
+// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
+// ### Reverse-stranded reads
+// Mapped reads (reads having a non-null `alignment`) can be aligned to either
+// the forward or the reverse strand of their associated reference. Strandedness
+// of a mapped read is encoded by `alignment.position.reverseStrand`.
+// If we consider the reference to be a forward-stranded coordinate space of
+// `[0, reference.length)` with `0` as the left-most position and
+// `reference.length` as the right-most position, reads are always aligned left
+// to right. That is, `alignment.position.position` always refers to the
+// left-most reference coordinate and `alignment.cigar` describes the alignment
+// of this read to the reference from left to right. All per-base fields such as
+// `alignedSequence` and `alignedQuality` share this same left-to-right
+// orientation; this is true of reads which are aligned to either strand. For
+// reverse-stranded reads, this means that `alignedSequence` is the reverse
+// complement of the bases that were originally reported by the sequencing
+// machine.
+// ### Generating a reference-aligned sequence string
+// When interacting with mapped reads, it's often useful to produce a string
+// representing the local alignment of the read to reference. The following
+// pseudocode demonstrates one way of doing this:
+// out = ""
+// offset = 0
+// for c in read.alignment.cigar {
+// switch c.operation {
+// out += read.alignedSequence[offset:offset+c.operationLength]
+// offset += c.operationLength
+// break
+// case "CLIP_SOFT", "INSERT":
+// offset += c.operationLength
+// break
+// case "PAD":
+// out += repeat("*", c.operationLength)
+// break
+// case "DELETE":
+// out += repeat("-", c.operationLength)
+// break
+// case "SKIP":
+// out += repeat(" ", c.operationLength)
+// break
+// case "CLIP_HARD":
+// break
+// }
+// }
+// return out
+// ### Converting to SAM's CIGAR string
+// The following pseudocode generates a SAM CIGAR string from the
+// `cigar` field. Note that this is a lossy conversion
+// (`cigar.referenceSequence` is lost).
+// cigarMap = {
+// "INSERT": "I",
+// "DELETE": "D",
+// "SKIP": "N",
+// "CLIP_SOFT": "S",
+// "CLIP_HARD": "H",
+// "PAD": "P",
+// "SEQUENCE_MATCH": "=",
+// }
+// cigarStr = ""
+// for c in read.alignment.cigar {
+// cigarStr += c.operationLength + cigarMap[c.operation]
+// }
+// return cigarStr
+message Read {
+ // The server-generated read ID, unique across all reads. This is different
+ // from the `fragmentName`.
+ string id = 1;
+ // The ID of the read group this read belongs to. A read belongs to exactly
+ // one read group. This is a server-generated ID which is distinct from SAM's
+ // RG tag (for that value, see
+ // [ReadGroup.name][google.genomics.v1.ReadGroup.name]).
+ string read_group_id = 2;
+ // The ID of the read group set this read belongs to. A read belongs to
+ // exactly one read group set.
+ string read_group_set_id = 3;
+ // The fragment name. Equivalent to QNAME (query template name) in SAM.
+ string fragment_name = 4;
+ // The orientation and the distance between reads from the fragment are
+ // consistent with the sequencing protocol (SAM flag 0x2).
+ bool proper_placement = 5;
+ // The fragment is a PCR or optical duplicate (SAM flag 0x400).
+ bool duplicate_fragment = 6;
+ // The observed length of the fragment, equivalent to TLEN in SAM.
+ int32 fragment_length = 7;
+ // The read number in sequencing. 0-based and less than numberReads. This
+ // field replaces SAM flag 0x40 and 0x80.
+ int32 read_number = 8;
+ // The number of reads in the fragment (extension to SAM flag 0x1).
+ int32 number_reads = 9;
+ // Whether this read did not pass filters, such as platform or vendor quality
+ // controls (SAM flag 0x200).
+ bool failed_vendor_quality_checks = 10;
+ // The linear alignment for this alignment record. This field is null for
+ // unmapped reads.
+ LinearAlignment alignment = 11;
+ // Whether this alignment is secondary. Equivalent to SAM flag 0x100.
+ // A secondary alignment represents an alternative to the primary alignment
+ // for this read. Aligners may return secondary alignments if a read can map
+ // ambiguously to multiple coordinates in the genome. By convention, each read
+ // has one and only one alignment where both `secondaryAlignment`
+ // and `supplementaryAlignment` are false.
+ bool secondary_alignment = 12;
+ // Whether this alignment is supplementary. Equivalent to SAM flag 0x800.
+ // Supplementary alignments are used in the representation of a chimeric
+ // alignment. In a chimeric alignment, a read is split into multiple
+ // linear alignments that map to different reference contigs. The first
+ // linear alignment in the read will be designated as the representative
+ // alignment; the remaining linear alignments will be designated as
+ // supplementary alignments. These alignments may have different mapping
+ // quality scores. In each linear alignment in a chimeric alignment, the read
+ // will be hard clipped. The `alignedSequence` and
+ // `alignedQuality` fields in the alignment record will only
+ // represent the bases for its respective linear alignment.
+ bool supplementary_alignment = 13;
+ // The bases of the read sequence contained in this alignment record,
+ // **without CIGAR operations applied** (equivalent to SEQ in SAM).
+ // `alignedSequence` and `alignedQuality` may be
+ // shorter than the full read sequence and quality. This will occur if the
+ // alignment is part of a chimeric alignment, or if the read was trimmed. When
+ // this occurs, the CIGAR for this read will begin/end with a hard clip
+ // operator that will indicate the length of the excised sequence.
+ string aligned_sequence = 14;
+ // The quality of the read sequence contained in this alignment record
+ // (equivalent to QUAL in SAM).
+ // `alignedSequence` and `alignedQuality` may be shorter than the full read
+ // sequence and quality. This will occur if the alignment is part of a
+ // chimeric alignment, or if the read was trimmed. When this occurs, the CIGAR
+ // for this read will begin/end with a hard clip operator that will indicate
+ // the length of the excised sequence.
+ repeated int32 aligned_quality = 15;
+ // The mapping of the primary alignment of the
+ // `(readNumber+1)%numberReads` read in the fragment. It replaces
+ // mate position and mate strand in SAM.
+ Position next_mate_position = 16;
+ // A map of additional read alignment information. This must be of the form
+ // map<string, string[]> (string key mapping to a list of string values).
+ map<string, google.protobuf.ListValue> info = 17;
+syntax = "proto3";
+package google.genomics.v1;
+import "google/api/annotations.proto";
+import "google/protobuf/struct.proto";
+option cc_enable_arenas = true;
+option go_package = "google.golang.org/genproto/googleapis/genomics/v1;genomics";
+option java_multiple_files = true;
+option java_outer_classname = "ReadGroupProto";
+option java_package = "com.google.genomics.v1";
+// A read group is all the data that's processed the same way by the sequencer.
+message ReadGroup {
+ message Experiment {
+ // A client-supplied library identifier; a library is a collection of DNA
+ // fragments which have been prepared for sequencing from a sample. This
+ // field is important for quality control as error or bias can be introduced
+ // during sample preparation.
+ string library_id = 1;
+ // The platform unit used as part of this experiment, for example
+ // flowcell-barcode.lane for Illumina or slide for SOLiD. Corresponds to the
+ // @RG PU field in the SAM spec.
+ string platform_unit = 2;
+ // The sequencing center used as part of this experiment.
+ string sequencing_center = 3;
+ // The instrument model used as part of this experiment. This maps to
+ // sequencing technology in the SAM spec.
+ string instrument_model = 4;
+ }
+ message Program {
+ // The command line used to run this program.
+ string command_line = 1;
+ // The user specified locally unique ID of the program. Used along with
+ // `prevProgramId` to define an ordering between programs.
+ string id = 2;
+ // The display name of the program. This is typically the colloquial name of
+ // the tool used, for example 'bwa' or 'picard'.
+ string name = 3;
+ // The ID of the program run before this one.
+ string prev_program_id = 4;
+ // The version of the program run.
+ string version = 5;
+ }
+ // The server-generated read group ID, unique for all read groups.
+ // Note: This is different than the @RG ID field in the SAM spec. For that
+ // value, see [name][google.genomics.v1.ReadGroup.name].
+ string id = 1;
+ // The dataset to which this read group belongs.
+ string dataset_id = 2;
+ // The read group name. This corresponds to the @RG ID field in the SAM spec.
+ string name = 3;
+ // A free-form text description of this read group.
+ string description = 4;
+ // A client-supplied sample identifier for the reads in this read group.
+ string sample_id = 5;
+ // The experiment used to generate this read group.
+ Experiment experiment = 6;
+ // The predicted insert size of this read group. The insert size is the length
+ // the sequenced DNA fragment from end-to-end, not including the adapters.
+ int32 predicted_insert_size = 7;
+ // The programs used to generate this read group. Programs are always
+ // identical for all read groups within a read group set. For this reason,
+ // only the first read group in a returned set will have this field
+ // populated.
+ repeated Program programs = 10;
+ // The reference set the reads in this read group are aligned to.
+ string reference_set_id = 11;
+ // A map of additional read group information. This must be of the form
+ // map<string, string[]> (string key mapping to a list of string values).
+ map<string, google.protobuf.ListValue> info = 12;
+syntax = "proto3";
+package google.genomics.v1;
+import "google/api/annotations.proto";
+import "google/genomics/v1/readgroup.proto";
+import "google/protobuf/struct.proto";
+option cc_enable_arenas = true;
+option go_package = "google.golang.org/genproto/googleapis/genomics/v1;genomics";
+option java_multiple_files = true;
+option java_outer_classname = "ReadGroupSetProto";
+option java_package = "com.google.genomics.v1";
+// A read group set is a logical collection of read groups, which are
+// collections of reads produced by a sequencer. A read group set typically
+// models reads corresponding to one sample, sequenced one way, and aligned one
+// way.
+// * A read group set belongs to one dataset.
+// * A read group belongs to one read group set.
+// * A read belongs to one read group.
+// For more genomics resource definitions, see [Fundamentals of Google
+// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
+message ReadGroupSet {
+ // The server-generated read group set ID, unique for all read group sets.
+ string id = 1;
+ // The dataset to which this read group set belongs.
+ string dataset_id = 2;
+ // The reference set to which the reads in this read group set are aligned.
+ string reference_set_id = 3;
+ // The read group set name. By default this will be initialized to the sample
+ // name of the sequenced data contained in this set.
+ string name = 4;
+ // The filename of the original source file for this read group set, if any.
+ string filename = 5;
+ // The read groups in this set. There are typically 1-10 read groups in a read
+ // group set.
+ repeated ReadGroup read_groups = 6;
+ // A map of additional read group set information.
+ map<string, google.protobuf.ListValue> info = 7;
+syntax = "proto3";
+package google.genomics.v1;
+import "google/api/annotations.proto";
+import "google/genomics/v1/range.proto";
+import "google/genomics/v1/readalignment.proto";
+import "google/genomics/v1/readgroupset.proto";
+import "google/longrunning/operations.proto";
+import "google/protobuf/empty.proto";
+import "google/protobuf/field_mask.proto";
+option cc_enable_arenas = true;
+option go_package = "google.golang.org/genproto/googleapis/genomics/v1;genomics";
+option java_multiple_files = true;
+option java_outer_classname = "ReadsProto";
+option java_package = "com.google.genomics.v1";
+service StreamingReadService {
+ // Returns a stream of all the reads matching the search request, ordered
+ // by reference name, position, and ID.
+ rpc StreamReads(StreamReadsRequest) returns (stream StreamReadsResponse) {
+ option (google.api.http) = {
+ post: "/v1/reads:stream"
+ body: "*"
+ };
+ }
+// The Readstore. A data store for DNA sequencing Reads.
+service ReadServiceV1 {
+ // Creates read group sets by asynchronously importing the provided
+ // information.
+ //
+ // For the definitions of read group sets and other genomics resources, see
+ // [Fundamentals of Google
+ // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
+ //
+ // The caller must have WRITE permissions to the dataset.
+ //
+ // ## Notes on [BAM](https://samtools.github.io/hts-specs/SAMv1.pdf) import
+ //
+ // - Tags will be converted to strings - tag types are not preserved
+ // - Comments (`@CO`) in the input file header will not be preserved
+ // - Original header order of references (`@SQ`) will not be preserved
+ // - Any reverse stranded unmapped reads will be reverse complemented, and
+ // their qualities (also the "BQ" and "OQ" tags, if any) will be reversed
+ // - Unmapped reads will be stripped of positional information (reference name
+ // and position)
+ rpc ImportReadGroupSets(ImportReadGroupSetsRequest)
+ returns (google.longrunning.Operation) {
+ option (google.api.http) = {
+ post: "/v1/readgroupsets:import"
+ body: "*"
+ };
+ }
+ // Exports a read group set to a BAM file in Google Cloud Storage.
+ //
+ // For the definitions of read group sets and other genomics resources, see
+ // [Fundamentals of Google
+ // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
+ //
+ // Note that currently there may be some differences between exported BAM
+ // files and the original BAM file at the time of import. See
+ // [ImportReadGroupSets][google.genomics.v1.ReadServiceV1.ImportReadGroupSets]
+ // for caveats.
+ rpc ExportReadGroupSet(ExportReadGroupSetRequest)
+ returns (google.longrunning.Operation) {
+ option (google.api.http) = {
+ post: "/v1/readgroupsets/{read_group_set_id}:export"
+ body: "*"
+ };
+ }
+ // Searches for read group sets matching the criteria.
+ //
+ // For the definitions of read group sets and other genomics resources, see
+ // [Fundamentals of Google
+ // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
+ //
+ // Implements
+ // [GlobalAllianceApi.searchReadGroupSets](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/readmethods.avdl#L135).
+ rpc SearchReadGroupSets(SearchReadGroupSetsRequest)
+ returns (SearchReadGroupSetsResponse) {
+ option (google.api.http) = {
+ post: "/v1/readgroupsets/search"
+ body: "*"
+ };
+ }
+ // Updates a read group set.
+ //
+ // For the definitions of read group sets and other genomics resources, see
+ // [Fundamentals of Google
+ // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
+ //
+ // This method supports patch semantics.
+ rpc UpdateReadGroupSet(UpdateReadGroupSetRequest) returns (ReadGroupSet) {
+ option (google.api.http) = {
+ patch: "/v1/readgroupsets/{read_group_set_id}"
+ body: "read_group_set"
+ };
+ }
+ // Deletes a read group set.
+ //
+ // For the definitions of read group sets and other genomics resources, see
+ // [Fundamentals of Google
+ // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
+ rpc DeleteReadGroupSet(DeleteReadGroupSetRequest)
+ returns (google.protobuf.Empty) {
+ option (google.api.http) = {
+ delete: "/v1/readgroupsets/{read_group_set_id}"
+ };
+ }
+ // Gets a read group set by ID.
+ //
+ // For the definitions of read group sets and other genomics resources, see
+ // [Fundamentals of Google
+ // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
+ rpc GetReadGroupSet(GetReadGroupSetRequest) returns (ReadGroupSet) {
+ option (google.api.http) = {
+ get: "/v1/readgroupsets/{read_group_set_id}"
+ };
+ }
+ // Lists fixed width coverage buckets for a read group set, each of which
+ // correspond to a range of a reference sequence. Each bucket summarizes
+ // coverage information across its corresponding genomic range.
+ //
+ // For the definitions of read group sets and other genomics resources, see
+ // [Fundamentals of Google
+ // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
+ //
+ // Coverage is defined as the number of reads which are aligned to a given
+ // base in the reference sequence. Coverage buckets are available at several
+ // precomputed bucket widths, enabling retrieval of various coverage 'zoom
+ // levels'. The caller must have READ permissions for the target read group
+ // set.
+ rpc ListCoverageBuckets(ListCoverageBucketsRequest)
+ returns (ListCoverageBucketsResponse) {
+ option (google.api.http) = {
+ get: "/v1/readgroupsets/{read_group_set_id}/coveragebuckets"
+ };
+ }
+ // Gets a list of reads for one or more read group sets.
+ //
+ // For the definitions of read group sets and other genomics resources, see
+ // [Fundamentals of Google
+ // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
+ //
+ // Reads search operates over a genomic coordinate space of reference sequence
+ // & position defined over the reference sequences to which the requested
+ // read group sets are aligned.
+ //
+ // If a target positional range is specified, search returns all reads whose
+ // alignment to the reference genome overlap the range. A query which
+ // specifies only read group set IDs yields all reads in those read group
+ // sets, including unmapped reads.
+ //
+ // All reads returned (including reads on subsequent pages) are ordered by
+ // genomic coordinate (by reference sequence, then position). Reads with
+ // equivalent genomic coordinates are returned in an unspecified order. This
+ // order is consistent, such that two queries for the same content (regardless
+ // of page size) yield reads in the same order across their respective streams
+ // of paginated responses.
+ //
+ // Implements
+ // [GlobalAllianceApi.searchReads](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/readmethods.avdl#L85).
+ rpc SearchReads(SearchReadsRequest) returns (SearchReadsResponse) {
+ option (google.api.http) = {
+ post: "/v1/reads/search"
+ body: "*"
+ };
+ }
+// The read group set search request.
+message SearchReadGroupSetsRequest {
+ // Restricts this query to read group sets within the given datasets. At least
+ // one ID must be provided.
+ repeated string dataset_ids = 1;
+ // Only return read group sets for which a substring of the name matches this
+ // string.
+ string name = 3;
+ // The continuation token, which is used to page through large result sets.
+ // To get the next page of results, set this parameter to the value of
+ // `nextPageToken` from the previous response.
+ string page_token = 2;
+ // The maximum number of results to return in a single page. If unspecified,
+ // defaults to 256. The maximum value is 1024.
+ int32 page_size = 4;
+// The read group set search response.
+message SearchReadGroupSetsResponse {
+ // The list of matching read group sets.
+ repeated ReadGroupSet read_group_sets = 1;
+ // The continuation token, which is used to page through large result sets.
+ // Provide this value in a subsequent request to return the next page of
+ // results. This field will be empty if there aren't any additional results.
+ string next_page_token = 2;
+// The read group set import request.
+message ImportReadGroupSetsRequest {
+ enum PartitionStrategy {
+ // In most cases, this strategy yields one read group set per file. This is
+ // the default behavior.
+ //
+ // Allocate one read group set per file per sample. For BAM files, read
+ // groups are considered to share a sample if they have identical sample
+ // names. Furthermore, all reads for each file which do not belong to a read
+ // group, if any, will be grouped into a single read group set per-file.
+ // Includes all read groups in all imported files into a single read group
+ // set. Requires that the headers for all imported files are equivalent. All
+ // reads which do not belong to a read group, if any, will be grouped into a
+ // separate read group set.
+ MERGE_ALL = 2;
+ }
+ // Required. The ID of the dataset these read group sets will belong to. The
+ // caller must have WRITE permissions to this dataset.
+ string dataset_id = 1;
+ // The reference set to which the imported read group sets are aligned to, if
+ // any. The reference names of this reference set must be a superset of those
+ // found in the imported file headers. If no reference set id is provided, a
+ // best effort is made to associate with a matching reference set.
+ string reference_set_id = 4;
+ // A list of URIs pointing at [BAM
+ // files](https://samtools.github.io/hts-specs/SAMv1.pdf)
+ // in Google Cloud Storage.
+ // Those URIs can include wildcards (*), but do not add or remove
+ // matching files before import has completed.
+ //
+ // Note that Google Cloud Storage object listing is only eventually
+ // consistent: files added may be not be immediately visible to
+ // everyone. Thus, if using a wildcard it is preferable not to start
+ // the import immediately after the files are created.
+ repeated string source_uris = 2;
+ // The partition strategy describes how read groups are partitioned into read
+ // group sets.
+ PartitionStrategy partition_strategy = 5;
+// The read group set import response.
+message ImportReadGroupSetsResponse {
+ // IDs of the read group sets that were created.
+ repeated string read_group_set_ids = 1;
+// The read group set export request.
+message ExportReadGroupSetRequest {
+ // Required. The Google Cloud project ID that owns this
+ // export. The caller must have WRITE access to this project.
+ string project_id = 1;
+ // Required. A Google Cloud Storage URI for the exported BAM file.
+ // The currently authenticated user must have write access to the new file.
+ // An error will be returned if the URI already contains data.
+ string export_uri = 2;
+ // Required. The ID of the read group set to export. The caller must have
+ // READ access to this read group set.
+ string read_group_set_id = 3;
+ // The reference names to export. If this is not specified, all reference
+ // sequences, including unmapped reads, are exported.
+ // Use `*` to export only unmapped reads.
+ repeated string reference_names = 4;
+message UpdateReadGroupSetRequest {
+ // The ID of the read group set to be updated. The caller must have WRITE
+ // permissions to the dataset associated with this read group set.
+ string read_group_set_id = 1;
+ // The new read group set data. See `updateMask` for details on mutability of
+ // fields.
+ ReadGroupSet read_group_set = 2;
+ // An optional mask specifying which fields to update. Supported fields:
+ //
+ // * [name][google.genomics.v1.ReadGroupSet.name].
+ // * [referenceSetId][google.genomics.v1.ReadGroupSet.reference_set_id].
+ //
+ // Leaving `updateMask` unset is equivalent to specifying all mutable
+ // fields.
+ google.protobuf.FieldMask update_mask = 3;
+message DeleteReadGroupSetRequest {
+ // The ID of the read group set to be deleted. The caller must have WRITE
+ // permissions to the dataset associated with this read group set.
+ string read_group_set_id = 1;
+message GetReadGroupSetRequest {
+ // The ID of the read group set.
+ string read_group_set_id = 1;
+message ListCoverageBucketsRequest {
+ // Required. The ID of the read group set over which coverage is requested.
+ string read_group_set_id = 1;
+ // The name of the reference to query, within the reference set associated
+ // with this query. Optional.
+ string reference_name = 3;
+ // The start position of the range on the reference, 0-based inclusive. If
+ // specified, `referenceName` must also be specified. Defaults to 0.
+ int64 start = 4;
+ // The end position of the range on the reference, 0-based exclusive. If
+ // specified, `referenceName` must also be specified. If unset or 0, defaults
+ // to the length of the reference.
+ int64 end = 5;
+ // The desired width of each reported coverage bucket in base pairs. This
+ // will be rounded down to the nearest precomputed bucket width; the value
+ // of which is returned as `bucketWidth` in the response. Defaults
+ // to infinity (each bucket spans an entire reference sequence) or the length
+ // of the target range, if specified. The smallest precomputed
+ // `bucketWidth` is currently 2048 base pairs; this is subject to
+ // change.
+ int64 target_bucket_width = 6;
+ // The continuation token, which is used to page through large result sets.
+ // To get the next page of results, set this parameter to the value of
+ // `nextPageToken` from the previous response.
+ string page_token = 7;
+ // The maximum number of results to return in a single page. If unspecified,
+ // defaults to 1024. The maximum value is 2048.
+ int32 page_size = 8;
+// A bucket over which read coverage has been precomputed. A bucket corresponds
+// to a specific range of the reference sequence.
+message CoverageBucket {
+ // The genomic coordinate range spanned by this bucket.
+ Range range = 1;
+ // The average number of reads which are aligned to each individual
+ // reference base in this bucket.
+ float mean_coverage = 2;
+message ListCoverageBucketsResponse {
+ // The length of each coverage bucket in base pairs. Note that buckets at the
+ // end of a reference sequence may be shorter. This value is omitted if the
+ // bucket width is infinity (the default behaviour, with no range or
+ // `targetBucketWidth`).
+ int64 bucket_width = 1;
+ // The coverage buckets. The list of buckets is sparse; a bucket with 0
+ // overlapping reads is not returned. A bucket never crosses more than one
+ // reference sequence. Each bucket has width `bucketWidth`, unless
+ // its end is the end of the reference sequence.
+ repeated CoverageBucket coverage_buckets = 2;
+ // The continuation token, which is used to page through large result sets.
+ // Provide this value in a subsequent request to return the next page of
+ // results. This field will be empty if there aren't any additional results.
+ string next_page_token = 3;
+// The read search request.
+message SearchReadsRequest {
+ // The IDs of the read groups sets within which to search for reads. All
+ // specified read group sets must be aligned against a common set of reference
+ // sequences; this defines the genomic coordinates for the query. Must specify
+ // one of `readGroupSetIds` or `readGroupIds`.
+ repeated string read_group_set_ids = 1;
+ // The IDs of the read groups within which to search for reads. All specified
+ // read groups must belong to the same read group sets. Must specify one of
+ // `readGroupSetIds` or `readGroupIds`.
+ repeated string read_group_ids = 5;
+ // The reference sequence name, for example `chr1`, `1`, or `chrX`. If set to
+ // `*`, only unmapped reads are returned. If unspecified, all reads (mapped
+ // and unmapped) are returned.
+ string reference_name = 7;
+ // The start position of the range on the reference, 0-based inclusive. If
+ // specified, `referenceName` must also be specified.
+ int64 start = 8;
+ // The end position of the range on the reference, 0-based exclusive. If
+ // specified, `referenceName` must also be specified.
+ int64 end = 9;
+ // The continuation token, which is used to page through large result sets.
+ // To get the next page of results, set this parameter to the value of
+ // `nextPageToken` from the previous response.
+ string page_token = 3;
+ // The maximum number of results to return in a single page. If unspecified,
+ // defaults to 256. The maximum value is 2048.
+ int32 page_size = 4;
+// The read search response.
+message SearchReadsResponse {
+ // The list of matching alignments sorted by mapped genomic coordinate,
+ // if any, ascending in position within the same reference. Unmapped reads,
+ // which have no position, are returned contiguously and are sorted in
+ // ascending lexicographic order by fragment name.
+ repeated Read alignments = 1;
+ // The continuation token, which is used to page through large result sets.
+ // Provide this value in a subsequent request to return the next page of
+ // results. This field will be empty if there aren't any additional results.
+ string next_page_token = 2;
+// The stream reads request.
+message StreamReadsRequest {
+ // The Google Cloud project ID which will be billed
+ // for this access. The caller must have WRITE access to this project.
+ // Required.
+ string project_id = 1;
+ // The ID of the read group set from which to stream reads.
+ string read_group_set_id = 2;
+ // The reference sequence name, for example `chr1`,
+ // `1`, or `chrX`. If set to *, only unmapped reads are
+ // returned.
+ string reference_name = 3;
+ // The start position of the range on the reference, 0-based inclusive. If
+ // specified, `referenceName` must also be specified.
+ int64 start = 4;
+ // The end position of the range on the reference, 0-based exclusive. If
+ // specified, `referenceName` must also be specified.
+ int64 end = 5;
+ // Restricts results to a shard containing approximately `1/totalShards`
+ // of the normal response payload for this query. Results from a sharded
+ // request are disjoint from those returned by all queries which differ only
+ // in their shard parameter. A shard may yield 0 results; this is especially
+ // likely for large values of `totalShards`.
+ //
+ // Valid values are `[0, totalShards)`.
+ int32 shard = 6;
+ // Specifying `totalShards` causes a disjoint subset of the normal response
+ // payload to be returned for each query with a unique `shard` parameter
+ // specified. A best effort is made to yield equally sized shards. Sharding
+ // can be used to distribute processing amongst workers, where each worker is
+ // assigned a unique `shard` number and all workers specify the same
+ // `totalShards` number. The union of reads returned for all sharded queries
+ // `[0, totalShards)` is equal to those returned by a single unsharded query.
+ //
+ // Queries for different values of `totalShards` with common divisors will
+ // share shard boundaries. For example, streaming `shard` 2 of 5
+ // `totalShards` yields the same results as streaming `shard`s 4 and 5 of 10
+ // `totalShards`. This property can be leveraged for adaptive retries.
+ int32 total_shards = 7;
+message StreamReadsResponse {
+ repeated Read alignments = 1;
diff --git a/google/genomics/v1/references.proto b/google/genomics/v1/references.proto
new file mode 100644
index 000000000..fb9dc4e63
--- /dev/null
+++ b/google/genomics/v1/references.proto
@@ -0,0 +1,295 @@
+// Copyright 2016 Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+syntax = "proto3";
+package google.genomics.v1;
+import "google/api/annotations.proto";
+option cc_enable_arenas = true;
+option go_package = "google.golang.org/genproto/googleapis/genomics/v1;genomics";
+option java_multiple_files = true;
+option java_outer_classname = "ReferencesProto";
+option java_package = "com.google.genomics.v1";
+service ReferenceServiceV1 {
+ // Searches for reference sets which match the given criteria.
+ //
+ // For the definitions of references and other genomics resources, see
+ // [Fundamentals of Google
+ // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
+ //
+ // Implements
+ // [GlobalAllianceApi.searchReferenceSets](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/referencemethods.avdl#L71)
+ rpc SearchReferenceSets(SearchReferenceSetsRequest)
+ returns (SearchReferenceSetsResponse) {
+ option (google.api.http) = {
+ post: "/v1/referencesets/search"
+ body: "*"
+ };
+ }
+ // Gets a reference set.
+ //
+ // For the definitions of references and other genomics resources, see
+ // [Fundamentals of Google
+ // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
+ //
+ // Implements
+ // [GlobalAllianceApi.getReferenceSet](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/referencemethods.avdl#L83).
+ rpc GetReferenceSet(GetReferenceSetRequest) returns (ReferenceSet) {
+ option (google.api.http) = {
+ get: "/v1/referencesets/{reference_set_id}"
+ };
+ }
+ // Searches for references which match the given criteria.
+ //
+ // For the definitions of references and other genomics resources, see
+ // [Fundamentals of Google
+ // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
+ //
+ // Implements
+ // [GlobalAllianceApi.searchReferences](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/referencemethods.avdl#L146).
+ rpc SearchReferences(SearchReferencesRequest)
+ returns (SearchReferencesResponse) {
+ option (google.api.http) = {
+ post: "/v1/references/search"
+ body: "*"
+ };
+ }
+ // Gets a reference.
+ //
+ // For the definitions of references and other genomics resources, see
+ // [Fundamentals of Google
+ // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
+ //
+ // Implements
+ // [GlobalAllianceApi.getReference](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/referencemethods.avdl#L158).
+ rpc GetReference(GetReferenceRequest) returns (Reference) {
+ option (google.api.http) = {
+ get: "/v1/references/{reference_id}"
+ };
+ }
+ // Lists the bases in a reference, optionally restricted to a range.
+ //
+ // For the definitions of references and other genomics resources, see
+ // [Fundamentals of Google
+ // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
+ //
+ // Implements
+ // [GlobalAllianceApi.getReferenceBases](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/referencemethods.avdl#L221).
+ rpc ListBases(ListBasesRequest) returns (ListBasesResponse) {
+ option (google.api.http) = {
+ get: "/v1/references/{reference_id}/bases"
+ };
+ }
+// A reference is a canonical assembled DNA sequence, intended to act as a
+// reference coordinate space for other genomic annotations. A single reference
+// might represent the human chromosome 1 or mitochandrial DNA, for instance. A
+// reference belongs to one or more reference sets.
+// For more genomics resource definitions, see [Fundamentals of Google
+// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
+message Reference {
+ // The server-generated reference ID, unique across all references.
+ string id = 1;
+ // The length of this reference's sequence.
+ int64 length = 2;
+ // MD5 of the upper-case sequence excluding all whitespace characters (this
+ // is equivalent to SQ:M5 in SAM). This value is represented in lower case
+ // hexadecimal format.
+ string md5checksum = 3;
+ // The name of this reference, for example `22`.
+ string name = 4;
+ // The URI from which the sequence was obtained. Typically specifies a FASTA
+ // format file.
+ string source_uri = 5;
+ // All known corresponding accession IDs in INSDC (GenBank/ENA/DDBJ) ideally
+ // with a version number, for example `GCF_000001405.26`.
+ repeated string source_accessions = 6;
+ // ID from http://www.ncbi.nlm.nih.gov/taxonomy. For example, 9606 for human.
+ int32 ncbi_taxon_id = 7;
+// A reference set is a set of references which typically comprise a reference
+// assembly for a species, such as `GRCh38` which is representative
+// of the human genome. A reference set defines a common coordinate space for
+// comparing reference-aligned experimental data. A reference set contains 1 or
+// more references.
+// For more genomics resource definitions, see [Fundamentals of Google
+// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
+message ReferenceSet {
+ // The server-generated reference set ID, unique across all reference sets.
+ string id = 1;
+ // The IDs of the reference objects that are part of this set.
+ // `Reference.md5checksum` must be unique within this set.
+ repeated string reference_ids = 2;
+ // Order-independent MD5 checksum which identifies this reference set. The
+ // checksum is computed by sorting all lower case hexidecimal string
+ // `reference.md5checksum` (for all reference in this set) in
+ // ascending lexicographic order, concatenating, and taking the MD5 of that
+ // value. The resulting value is represented in lower case hexadecimal format.
+ string md5checksum = 3;
+ // ID from http://www.ncbi.nlm.nih.gov/taxonomy (for example, 9606 for human)
+ // indicating the species which this reference set is intended to model. Note
+ // that contained references may specify a different `ncbiTaxonId`, as
+ // assemblies may contain reference sequences which do not belong to the
+ // modeled species, for example EBV in a human reference genome.
+ int32 ncbi_taxon_id = 4;
+ // Free text description of this reference set.
+ string description = 5;
+ // Public id of this reference set, such as `GRCh37`.
+ string assembly_id = 6;
+ // The URI from which the references were obtained.
+ string source_uri = 7;
+ // All known corresponding accession IDs in INSDC (GenBank/ENA/DDBJ) ideally
+ // with a version number, for example `NC_000001.11`.
+ repeated string source_accessions = 8;
+message SearchReferenceSetsRequest {
+ // If present, return reference sets for which the
+ // [md5checksum][google.genomics.v1.ReferenceSet.md5checksum] matches exactly.
+ repeated string md5checksums = 1;
+ // If present, return reference sets for which a prefix of any of
+ // [sourceAccessions][google.genomics.v1.ReferenceSet.source_accessions]
+ // match any of these strings. Accession numbers typically have a main number
+ // and a version, for example `NC_000001.11`.
+ repeated string accessions = 2;
+ // If present, return reference sets for which a substring of their
+ // `assemblyId` matches this string (case insensitive).
+ string assembly_id = 3;
+ // The continuation token, which is used to page through large result sets.
+ // To get the next page of results, set this parameter to the value of
+ // `nextPageToken` from the previous response.
+ string page_token = 4;
+ // The maximum number of results to return in a single page. If unspecified,
+ // defaults to 1024. The maximum value is 4096.
+ int32 page_size = 5;
+message SearchReferenceSetsResponse {
+ // The matching references sets.
+ repeated ReferenceSet reference_sets = 1;
+ // The continuation token, which is used to page through large result sets.
+ // Provide this value in a subsequent request to return the next page of
+ // results. This field will be empty if there aren't any additional results.
+ string next_page_token = 2;
+message GetReferenceSetRequest {
+ // The ID of the reference set.
+ string reference_set_id = 1;
+message SearchReferencesRequest {
+ // If present, return references for which the
+ // [md5checksum][google.genomics.v1.Reference.md5checksum] matches exactly.
+ repeated string md5checksums = 1;
+ // If present, return references for which a prefix of any of
+ // [sourceAccessions][google.genomics.v1.Reference.source_accessions] match
+ // any of these strings. Accession numbers typically have a main number and a
+ // version, for example `GCF_000001405.26`.
+ repeated string accessions = 2;
+ // If present, return only references which belong to this reference set.
+ string reference_set_id = 3;
+ // The continuation token, which is used to page through large result sets.
+ // To get the next page of results, set this parameter to the value of
+ // `nextPageToken` from the previous response.
+ string page_token = 4;
+ // The maximum number of results to return in a single page. If unspecified,
+ // defaults to 1024. The maximum value is 4096.
+ int32 page_size = 5;
+message SearchReferencesResponse {
+ // The matching references.
+ repeated Reference references = 1;
+ // The continuation token, which is used to page through large result sets.
+ // Provide this value in a subsequent request to return the next page of
+ // results. This field will be empty if there aren't any additional results.
+ string next_page_token = 2;
+message GetReferenceRequest {
+ // The ID of the reference.
+ string reference_id = 1;
+message ListBasesRequest {
+ // The ID of the reference.
+ string reference_id = 1;
+ // The start position (0-based) of this query. Defaults to 0.
+ int64 start = 2;
+ // The end position (0-based, exclusive) of this query. Defaults to the length
+ // of this reference.
+ int64 end = 3;
+ // The continuation token, which is used to page through large result sets.
+ // To get the next page of results, set this parameter to the value of
+ // `nextPageToken` from the previous response.
+ string page_token = 4;
+ // The maximum number of bases to return in a single page. If unspecified,
+ // defaults to 200Kbp (kilo base pairs). The maximum value is 10Mbp (mega base
+ // pairs).
+ int32 page_size = 5;
+message ListBasesResponse {
+ // The offset position (0-based) of the given `sequence` from the
+ // start of this `Reference`. This value will differ for each page
+ // in a paginated request.
+ int64 offset = 1;
+ // A substring of the bases that make up this reference.
+ string sequence = 2;
+ // The continuation token, which is used to page through large result sets.
+ // Provide this value in a subsequent request to return the next page of
+ // results. This field will be empty if there aren't any additional results.
+ string next_page_token = 3;
diff --git a/google/genomics/v1/variants.proto b/google/genomics/v1/variants.proto
new file mode 100644
index 000000000..472a06f5f
--- /dev/null
+++ b/google/genomics/v1/variants.proto
@@ -0,0 +1,958 @@
+// Copyright 2016 Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+syntax = "proto3";
+package google.genomics.v1;
+import "google/api/annotations.proto";
+import "google/longrunning/operations.proto";
+import "google/protobuf/empty.proto";
+import "google/protobuf/field_mask.proto";
+import "google/protobuf/struct.proto";
+option cc_enable_arenas = true;
+option go_package = "google.golang.org/genproto/googleapis/genomics/v1;genomics";
+option java_multiple_files = true;
+option java_outer_classname = "VariantsProto";
+option java_package = "com.google.genomics.v1";
+service StreamingVariantService {
+ // Returns a stream of all the variants matching the search request, ordered
+ // by reference name, position, and ID.
+ rpc StreamVariants(StreamVariantsRequest)
+ returns (stream StreamVariantsResponse) {
+ option (google.api.http) = {
+ post: "/v1/variants:stream"
+ body: "*"
+ };
+ }
+service VariantServiceV1 {
+ // Creates variant data by asynchronously importing the provided information.
+ //
+ // For the definitions of variant sets and other genomics resources, see
+ // [Fundamentals of Google
+ // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
+ //
+ // The variants for import will be merged with any existing variant that
+ // matches its reference sequence, start, end, reference bases, and
+ // alternative bases. If no such variant exists, a new one will be created.
+ //
+ // When variants are merged, the call information from the new variant
+ // is added to the existing variant, and Variant info fields are merged
+ // as specified in
+ // [infoMergeConfig][google.genomics.v1.ImportVariantsRequest.info_merge_config].
+ // As a special case, for single-sample VCF files, QUAL and FILTER fields will
+ // be moved to the call level; these are sometimes interpreted in a
+ // call-specific context.
+ // Imported VCF headers are appended to the metadata already in a variant set.
+ rpc ImportVariants(ImportVariantsRequest)
+ returns (google.longrunning.Operation) {
+ option (google.api.http) = {
+ post: "/v1/variants:import"
+ body: "*"
+ };
+ }
+ // Creates a new variant set.
+ //
+ // For the definitions of variant sets and other genomics resources, see
+ // [Fundamentals of Google
+ // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
+ //
+ // The provided variant set must have a valid `datasetId` set - all other
+ // fields are optional. Note that the `id` field will be ignored, as this is
+ // assigned by the server.
+ rpc CreateVariantSet(CreateVariantSetRequest) returns (VariantSet) {
+ option (google.api.http) = {
+ post: "/v1/variantsets"
+ body: "variant_set"
+ };
+ }
+ // Exports variant set data to an external destination.
+ //
+ // For the definitions of variant sets and other genomics resources, see
+ // [Fundamentals of Google
+ // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
+ rpc ExportVariantSet(ExportVariantSetRequest)
+ returns (google.longrunning.Operation) {
+ option (google.api.http) = {
+ post: "/v1/variantsets/{variant_set_id}:export"
+ body: "*"
+ };
+ }
+ // Gets a variant set by ID.
+ //
+ // For the definitions of variant sets and other genomics resources, see
+ // [Fundamentals of Google
+ // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
+ rpc GetVariantSet(GetVariantSetRequest) returns (VariantSet) {
+ option (google.api.http) = {
+ get: "/v1/variantsets/{variant_set_id}"
+ };
+ }
+ // Returns a list of all variant sets matching search criteria.
+ //
+ // For the definitions of variant sets and other genomics resources, see
+ // [Fundamentals of Google
+ // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
+ //
+ // Implements
+ // [GlobalAllianceApi.searchVariantSets](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/variantmethods.avdl#L49).
+ rpc SearchVariantSets(SearchVariantSetsRequest)
+ returns (SearchVariantSetsResponse) {
+ option (google.api.http) = {
+ post: "/v1/variantsets/search"
+ body: "*"
+ };
+ }
+ // Deletes a variant set including all variants, call sets, and calls within.
+ // This is not reversible.
+ //
+ // For the definitions of variant sets and other genomics resources, see
+ // [Fundamentals of Google
+ // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
+ rpc DeleteVariantSet(DeleteVariantSetRequest)
+ returns (google.protobuf.Empty) {
+ option (google.api.http) = {
+ delete: "/v1/variantsets/{variant_set_id}"
+ };
+ }
+ // Updates a variant set using patch semantics.
+ //
+ // For the definitions of variant sets and other genomics resources, see
+ // [Fundamentals of Google
+ // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
+ rpc UpdateVariantSet(UpdateVariantSetRequest) returns (VariantSet) {
+ option (google.api.http) = {
+ patch: "/v1/variantsets/{variant_set_id}"
+ body: "variant_set"
+ };
+ }
+ // Gets a list of variants matching the criteria.
+ //
+ // For the definitions of variants and other genomics resources, see
+ // [Fundamentals of Google
+ // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
+ //
+ // Implements
+ // [GlobalAllianceApi.searchVariants](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/variantmethods.avdl#L126).
+ rpc SearchVariants(SearchVariantsRequest) returns (SearchVariantsResponse) {
+ option (google.api.http) = {
+ post: "/v1/variants/search"
+ body: "*"
+ };
+ }
+ // Creates a new variant.
+ //
+ // For the definitions of variants and other genomics resources, see
+ // [Fundamentals of Google
+ // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
+ rpc CreateVariant(CreateVariantRequest) returns (Variant) {
+ option (google.api.http) = {
+ post: "/v1/variants"
+ body: "variant"
+ };
+ }
+ // Updates a variant.
+ //
+ // For the definitions of variants and other genomics resources, see
+ // [Fundamentals of Google
+ // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
+ //
+ // This method supports patch semantics. Returns the modified variant without
+ // its calls.
+ rpc UpdateVariant(UpdateVariantRequest) returns (Variant) {
+ option (google.api.http) = {
+ patch: "/v1/variants/{variant_id}"
+ body: "variant"
+ };
+ }
+ // Deletes a variant.
+ //
+ // For the definitions of variants and other genomics resources, see
+ // [Fundamentals of Google
+ // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
+ rpc DeleteVariant(DeleteVariantRequest) returns (google.protobuf.Empty) {
+ option (google.api.http) = {
+ delete: "/v1/variants/{variant_id}"
+ };
+ }
+ // Gets a variant by ID.
+ //
+ // For the definitions of variants and other genomics resources, see
+ // [Fundamentals of Google
+ // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
+ rpc GetVariant(GetVariantRequest) returns (Variant) {
+ option (google.api.http) = {
+ get: "/v1/variants/{variant_id}"
+ };
+ }
+ // Merges the given variants with existing variants.
+ //
+ // For the definitions of variants and other genomics resources, see
+ // [Fundamentals of Google
+ // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
+ //
+ // Each variant will be
+ // merged with an existing variant that matches its reference sequence,
+ // start, end, reference bases, and alternative bases. If no such variant
+ // exists, a new one will be created.
+ //
+ // When variants are merged, the call information from the new variant
+ // is added to the existing variant. Variant info fields are merged as
+ // specified in the
+ // [infoMergeConfig][google.genomics.v1.MergeVariantsRequest.info_merge_config]
+ // field of the MergeVariantsRequest.
+ //
+ // Please exercise caution when using this method! It is easy to introduce
+ // mistakes in existing variants and difficult to back out of them. For
+ // example,
+ // suppose you were trying to merge a new variant with an existing one and
+ // both
+ // variants contain calls that belong to callsets with the same callset ID.
+ //
+ // // Existing variant - irrelevant fields trimmed for clarity
+ // {
+ // "variantSetId": "10473108253681171589",
+ // "referenceName": "1",
+ // "start": "10582",
+ // "referenceBases": "G",
+ // "alternateBases": [
+ // "A"
+ // ],
+ // "calls": [
+ // {
+ // "callSetId": "10473108253681171589-0",
+ // "callSetName": "CALLSET0",
+ // "genotype": [
+ // 0,
+ // 1
+ // ],
+ // }
+ // ]
+ // }
+ //
+ // // New variant with conflicting call information
+ // {
+ // "variantSetId": "10473108253681171589",
+ // "referenceName": "1",
+ // "start": "10582",
+ // "referenceBases": "G",
+ // "alternateBases": [
+ // "A"
+ // ],
+ // "calls": [
+ // {
+ // "callSetId": "10473108253681171589-0",
+ // "callSetName": "CALLSET0",
+ // "genotype": [
+ // 1,
+ // 1
+ // ],
+ // }
+ // ]
+ // }
+ //
+ // The resulting merged variant would overwrite the existing calls with those
+ // from the new variant:
+ //
+ // {
+ // "variantSetId": "10473108253681171589",
+ // "referenceName": "1",
+ // "start": "10582",
+ // "referenceBases": "G",
+ // "alternateBases": [
+ // "A"
+ // ],
+ // "calls": [
+ // {
+ // "callSetId": "10473108253681171589-0",
+ // "callSetName": "CALLSET0",
+ // "genotype": [
+ // 1,
+ // 1
+ // ],
+ // }
+ // ]
+ // }
+ //
+ // This may be the desired outcome, but it is up to the user to determine if
+ // if that is indeed the case.
+ rpc MergeVariants(MergeVariantsRequest) returns (google.protobuf.Empty) {
+ option (google.api.http) = {
+ post: "/v1/variants:merge"
+ body: "*"
+ };
+ }
+ // Gets a list of call sets matching the criteria.
+ //
+ // For the definitions of call sets and other genomics resources, see
+ // [Fundamentals of Google
+ // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
+ //
+ // Implements
+ // [GlobalAllianceApi.searchCallSets](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/variantmethods.avdl#L178).
+ rpc SearchCallSets(SearchCallSetsRequest) returns (SearchCallSetsResponse) {
+ option (google.api.http) = {
+ post: "/v1/callsets/search"
+ body: "*"
+ };
+ }
+ // Creates a new call set.
+ //
+ // For the definitions of call sets and other genomics resources, see
+ // [Fundamentals of Google
+ // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
+ rpc CreateCallSet(CreateCallSetRequest) returns (CallSet) {
+ option (google.api.http) = {
+ post: "/v1/callsets"
+ body: "call_set"
+ };
+ }
+ // Updates a call set.
+ //
+ // For the definitions of call sets and other genomics resources, see
+ // [Fundamentals of Google
+ // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
+ //
+ // This method supports patch semantics.
+ rpc UpdateCallSet(UpdateCallSetRequest) returns (CallSet) {
+ option (google.api.http) = {
+ patch: "/v1/callsets/{call_set_id}"
+ body: "call_set"
+ };
+ }
+ // Deletes a call set.
+ //
+ // For the definitions of call sets and other genomics resources, see
+ // [Fundamentals of Google
+ // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
+ rpc DeleteCallSet(DeleteCallSetRequest) returns (google.protobuf.Empty) {
+ option (google.api.http) = {
+ delete: "/v1/callsets/{call_set_id}"
+ };
+ }
+ // Gets a call set by ID.
+ //
+ // For the definitions of call sets and other genomics resources, see
+ // [Fundamentals of Google
+ // Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
+ rpc GetCallSet(GetCallSetRequest) returns (CallSet) {
+ option (google.api.http) = {
+ get: "/v1/callsets/{call_set_id}"
+ };
+ }
+// Metadata describes a single piece of variant call metadata.
+// These data include a top level key and either a single value string (value)
+// or a list of key-value pairs (info.)
+// Value and info are mutually exclusive.
+message VariantSetMetadata {
+ enum Type {
+ INTEGER = 1;
+ FLOAT = 2;
+ FLAG = 3;
+ STRING = 5;
+ }
+ // The top-level key.
+ string key = 1;
+ // The value field for simple metadata
+ string value = 2;
+ // User-provided ID field, not enforced by this API.
+ // Two or more pieces of structured metadata with identical
+ // id and key fields are considered equivalent.
+ string id = 4;
+ // The type of data. Possible types include: Integer, Float,
+ // Flag, Character, and String.
+ Type type = 5;
+ // The number of values that can be included in a field described by this
+ // metadata.
+ string number = 8;
+ // A textual description of this metadata.
+ string description = 7;
+ // Remaining structured metadata key-value pairs. This must be of the form
+ // map<string, string[]> (string key mapping to a list of string values).
+ map<string, google.protobuf.ListValue> info = 3;
+// A variant set is a collection of call sets and variants. It contains summary
+// statistics of those contents. A variant set belongs to a dataset.
+// For more genomics resource definitions, see [Fundamentals of Google
+// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
+message VariantSet {
+ // The dataset to which this variant set belongs.
+ string dataset_id = 1;
+ // The server-generated variant set ID, unique across all variant sets.
+ string id = 2;
+ // The reference set to which the variant set is mapped. The reference set
+ // describes the alignment provenance of the variant set, while the
+ // `referenceBounds` describe the shape of the actual variant data. The
+ // reference set's reference names are a superset of those found in the
+ // `referenceBounds`.
+ //
+ // For example, given a variant set that is mapped to the GRCh38 reference set
+ // and contains a single variant on reference 'X', `referenceBounds` would
+ // contain only an entry for 'X', while the associated reference set
+ // enumerates all possible references: '1', '2', 'X', 'Y', 'MT', etc.
+ string reference_set_id = 6;
+ // A list of all references used by the variants in a variant set
+ // with associated coordinate upper bounds for each one.
+ repeated ReferenceBound reference_bounds = 5;
+ // The metadata associated with this variant set.
+ repeated VariantSetMetadata metadata = 4;
+ // User-specified, mutable name.
+ string name = 7;
+ // A textual description of this variant set.
+ string description = 8;
+// A variant represents a change in DNA sequence relative to a reference
+// sequence. For example, a variant could represent a SNP or an insertion.
+// Variants belong to a variant set.
+// For more genomics resource definitions, see [Fundamentals of Google
+// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
+// Each of the calls on a variant represent a determination of genotype with
+// respect to that variant. For example, a call might assign probability of 0.32
+// to the occurrence of a SNP named rs1234 in a sample named NA12345. A call
+// belongs to a call set, which contains related calls typically from one
+// sample.
+message Variant {
+ // The ID of the variant set this variant belongs to.
+ string variant_set_id = 15;
+ // The server-generated variant ID, unique across all variants.
+ string id = 2;
+ // Names for the variant, for example a RefSNP ID.
+ repeated string names = 3;
+ // The date this variant was created, in milliseconds from the epoch.
+ int64 created = 12;
+ // The reference on which this variant occurs.
+ // (such as `chr20` or `X`)
+ string reference_name = 14;
+ // The position at which this variant occurs (0-based).
+ // This corresponds to the first base of the string of reference bases.
+ int64 start = 16;
+ // The end position (0-based) of this variant. This corresponds to the first
+ // base after the last base in the reference allele. So, the length of
+ // the reference allele is (end - start). This is useful for variants
+ // that don't explicitly give alternate bases, for example large deletions.
+ int64 end = 13;
+ // The reference bases for this variant. They start at the given
+ // position.
+ string reference_bases = 6;
+ // The bases that appear instead of the reference bases.
+ repeated string alternate_bases = 7;
+ // A measure of how likely this variant is to be real.
+ // A higher value is better.
+ double quality = 8;
+ // A list of filters (normally quality filters) this variant has failed.
+ // `PASS` indicates this variant has passed all filters.
+ repeated string filter = 9;
+ // A map of additional variant information. This must be of the form
+ // map<string, string[]> (string key mapping to a list of string values).
+ map<string, google.protobuf.ListValue> info = 10;
+ // The variant calls for this particular variant. Each one represents the
+ // determination of genotype with respect to this variant.
+ repeated VariantCall calls = 11;
+// A call represents the determination of genotype with respect to a particular
+// variant. It may include associated information such as quality and phasing.
+// For example, a call might assign a probability of 0.32 to the occurrence of
+// a SNP named rs1234 in a call set with the name NA12345.
+message VariantCall {
+ // The ID of the call set this variant call belongs to.
+ string call_set_id = 8;
+ // The name of the call set this variant call belongs to.
+ string call_set_name = 9;
+ // The genotype of this variant call. Each value represents either the value
+ // of the `referenceBases` field or a 1-based index into
+ // `alternateBases`. If a variant had a `referenceBases`
+ // value of `T` and an `alternateBases`
+ // value of `["A", "C"]`, and the `genotype` was
+ // `[2, 1]`, that would mean the call
+ // represented the heterozygous value `CA` for this variant.
+ // If the `genotype` was instead `[0, 1]`, the
+ // represented value would be `TA`. Ordering of the
+ // genotype values is important if the `phaseset` is present.
+ // If a genotype is not called (that is, a `.` is present in the
+ // GT string) -1 is returned.
+ repeated int32 genotype = 7;
+ // If this field is present, this variant call's genotype ordering implies
+ // the phase of the bases and is consistent with any other variant calls in
+ // the same reference sequence which have the same phaseset value.
+ // When importing data from VCF, if the genotype data was phased but no
+ // phase set was specified this field will be set to `*`.
+ string phaseset = 5;
+ // The genotype likelihoods for this variant call. Each array entry
+ // represents how likely a specific genotype is for this call. The value
+ // ordering is defined by the GL tag in the VCF spec.
+ // If Phred-scaled genotype likelihood scores (PL) are available and
+ // log10(P) genotype likelihood scores (GL) are not, PL scores are converted
+ // to GL scores. If both are available, PL scores are stored in `info`.
+ repeated double genotype_likelihood = 6;
+ // A map of additional variant call information. This must be of the form
+ // map<string, string[]> (string key mapping to a list of string values).
+ map<string, google.protobuf.ListValue> info = 2;
+// A call set is a collection of variant calls, typically for one sample. It
+// belongs to a variant set.
+// For more genomics resource definitions, see [Fundamentals of Google
+// Genomics](https://cloud.google.com/genomics/fundamentals-of-google-genomics)
+message CallSet {
+ // The server-generated call set ID, unique across all call sets.
+ string id = 1;
+ // The call set name.
+ string name = 2;
+ // The sample ID this call set corresponds to.
+ string sample_id = 7;
+ // The IDs of the variant sets this call set belongs to. This field must
+ // have exactly length one, as a call set belongs to a single variant set.
+ // This field is repeated for compatibility with the
+ // [GA4GH 0.5.1
+ // API](https://github.com/ga4gh/schemas/blob/v0.5.1/src/main/resources/avro/variants.avdl#L76).
+ repeated string variant_set_ids = 6;
+ // The date this call set was created in milliseconds from the epoch.
+ int64 created = 5;
+ // A map of additional call set information. This must be of the form
+ // map<string, string[]> (string key mapping to a list of string values).
+ map<string, google.protobuf.ListValue> info = 4;
+// ReferenceBound records an upper bound for the starting coordinate of
+// variants in a particular reference.
+message ReferenceBound {
+ // The name of the reference associated with this reference bound.
+ string reference_name = 1;
+ // An upper bound (inclusive) on the starting coordinate of any
+ // variant in the reference sequence.
+ int64 upper_bound = 2;
+// The variant data import request.
+message ImportVariantsRequest {
+ enum Format {
+ // VCF (Variant Call Format). The VCF files may be gzip compressed. gVCF is
+ // also supported.
+ // Complete Genomics masterVarBeta format. The masterVarBeta files may
+ // be bzip2 compressed.
+ }
+ // Required. The variant set to which variant data should be imported.
+ string variant_set_id = 1;
+ // A list of URIs referencing variant files in Google Cloud Storage. URIs can
+ // include wildcards [as described
+ // here](https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames).
+ // Note that recursive wildcards ('**') are not supported.
+ repeated string source_uris = 2;
+ // The format of the variant data being imported. If unspecified, defaults to
+ // to `VCF`.
+ Format format = 3;
+ // Convert reference names to the canonical representation.
+ // hg19 haploytypes (those reference names containing "_hap")
+ // are not modified in any way.
+ // All other reference names are modified according to the following rules:
+ // The reference name is capitalized.
+ // The "chr" prefix is dropped for all autosomes and sex chromsomes.
+ // For example "chr17" becomes "17" and "chrX" becomes "X".
+ // All mitochondrial chromosomes ("chrM", "chrMT", etc) become "MT".
+ bool normalize_reference_names = 5;
+ // A mapping between info field keys and the InfoMergeOperations to
+ // be performed on them. This is plumbed down to the MergeVariantRequests
+ // generated by the resulting import job.
+ map<string, InfoMergeOperation> info_merge_config = 6;
+// The variant data import response.
+message ImportVariantsResponse {
+ // IDs of the call sets created during the import.
+ repeated string call_set_ids = 1;
+// The CreateVariantSet request
+message CreateVariantSetRequest {
+ // Required. The variant set to be created. Must have a valid `datasetId`.
+ VariantSet variant_set = 1;
+// The variant data export request.
+message ExportVariantSetRequest {
+ enum Format {
+ // Export the data to Google BigQuery.
+ }
+ // Required. The ID of the variant set that contains variant data which
+ // should be exported. The caller must have READ access to this variant set.
+ string variant_set_id = 1;
+ // If provided, only variant call information from the specified call sets
+ // will be exported. By default all variant calls are exported.
+ repeated string call_set_ids = 2;
+ // Required. The Google Cloud project ID that owns the destination
+ // BigQuery dataset. The caller must have WRITE access to this project. This
+ // project will also own the resulting export job.
+ string project_id = 3;
+ // The format for the exported data.
+ Format format = 4;
+ // Required. The BigQuery dataset to export data to. This dataset must already
+ // exist. Note that this is distinct from the Genomics concept of "dataset".
+ string bigquery_dataset = 5;
+ // Required. The BigQuery table to export data to.
+ // If the table doesn't exist, it will be created. If it already exists, it
+ // will be overwritten.
+ string bigquery_table = 6;
+// The variant set request.
+message GetVariantSetRequest {
+ // Required. The ID of the variant set.
+ string variant_set_id = 1;
+// The search variant sets request.
+message SearchVariantSetsRequest {
+ // Exactly one dataset ID must be provided here. Only variant sets which
+ // belong to this dataset will be returned.
+ repeated string dataset_ids = 1;
+ // The continuation token, which is used to page through large result sets.
+ // To get the next page of results, set this parameter to the value of
+ // `nextPageToken` from the previous response.
+ string page_token = 2;
+ // The maximum number of results to return in a single page. If unspecified,
+ // defaults to 1024.
+ int32 page_size = 3;
+// The search variant sets response.
+message SearchVariantSetsResponse {
+ // The variant sets belonging to the requested dataset.
+ repeated VariantSet variant_sets = 1;
+ // The continuation token, which is used to page through large result sets.
+ // Provide this value in a subsequent request to return the next page of
+ // results. This field will be empty if there aren't any additional results.
+ string next_page_token = 2;
+// The delete variant set request.
+message DeleteVariantSetRequest {
+ // The ID of the variant set to be deleted.
+ string variant_set_id = 1;
+message UpdateVariantSetRequest {
+ // The ID of the variant to be updated (must already exist).
+ string variant_set_id = 1;
+ // The new variant data. Only the variant_set.metadata will be considered
+ // for update.
+ VariantSet variant_set = 2;
+ // An optional mask specifying which fields to update. Supported fields:
+ //
+ // * [metadata][google.genomics.v1.VariantSet.metadata].
+ // * [name][google.genomics.v1.VariantSet.name].
+ // * [description][google.genomics.v1.VariantSet.description].
+ //
+ // Leaving `updateMask` unset is equivalent to specifying all mutable
+ // fields.
+ google.protobuf.FieldMask update_mask = 5;
+// The variant search request.
+message SearchVariantsRequest {
+ // At most one variant set ID must be provided. Only variants from this
+ // variant set will be returned. If omitted, a call set id must be included in
+ // the request.
+ repeated string variant_set_ids = 1;
+ // Only return variants which have exactly this name.
+ string variant_name = 2;
+ // Only return variant calls which belong to call sets with these ids.
+ // Leaving this blank returns all variant calls. If a variant has no
+ // calls belonging to any of these call sets, it won't be returned at all.
+ repeated string call_set_ids = 3;
+ // Required. Only return variants in this reference sequence.
+ string reference_name = 4;
+ // The beginning of the window (0-based, inclusive) for which
+ // overlapping variants should be returned. If unspecified, defaults to 0.
+ int64 start = 5;
+ // The end of the window, 0-based exclusive. If unspecified or 0, defaults to
+ // the length of the reference.
+ int64 end = 6;
+ // The continuation token, which is used to page through large result sets.
+ // To get the next page of results, set this parameter to the value of
+ // `nextPageToken` from the previous response.
+ string page_token = 7;
+ // The maximum number of variants to return in a single page. If unspecified,
+ // defaults to 5000. The maximum value is 10000.
+ int32 page_size = 8;
+ // The maximum number of calls to return in a single page. Note that this
+ // limit may be exceeded in the event that a matching variant contains more
+ // calls than the requested maximum. If unspecified, defaults to 5000. The
+ // maximum value is 10000.
+ int32 max_calls = 9;
+// The variant search response.
+message SearchVariantsResponse {
+ // The list of matching Variants.
+ repeated Variant variants = 1;
+ // The continuation token, which is used to page through large result sets.
+ // Provide this value in a subsequent request to return the next page of
+ // results. This field will be empty if there aren't any additional results.
+ string next_page_token = 2;
+message CreateVariantRequest {
+ // The variant to be created.
+ Variant variant = 1;
+message UpdateVariantRequest {
+ // The ID of the variant to be updated.
+ string variant_id = 1;
+ // The new variant data.
+ Variant variant = 2;
+ // An optional mask specifying which fields to update. At this time, mutable
+ // fields are [names][google.genomics.v1.Variant.names] and
+ // [info][google.genomics.v1.Variant.info]. Acceptable values are "names" and
+ // "info". If unspecified, all mutable fields will be updated.
+ google.protobuf.FieldMask update_mask = 3;
+message DeleteVariantRequest {
+ // The ID of the variant to be deleted.
+ string variant_id = 1;
+message GetVariantRequest {
+ // The ID of the variant.
+ string variant_id = 1;
+message MergeVariantsRequest {
+ // The destination variant set.
+ string variant_set_id = 1;
+ // The variants to be merged with existing variants.
+ repeated Variant variants = 2;
+ // A mapping between info field keys and the InfoMergeOperations to
+ // be performed on them.
+ map<string, InfoMergeOperation> info_merge_config = 3;
+// The call set search request.
+message SearchCallSetsRequest {
+ // Restrict the query to call sets within the given variant sets. At least one
+ // ID must be provided.
+ repeated string variant_set_ids = 1;
+ // Only return call sets for which a substring of the name matches this
+ // string.
+ string name = 2;
+ // The continuation token, which is used to page through large result sets.
+ // To get the next page of results, set this parameter to the value of
+ // `nextPageToken` from the previous response.
+ string page_token = 3;
+ // The maximum number of results to return in a single page. If unspecified,
+ // defaults to 1024.
+ int32 page_size = 4;
+// The call set search response.
+message SearchCallSetsResponse {
+ // The list of matching call sets.
+ repeated CallSet call_sets = 1;
+ // The continuation token, which is used to page through large result sets.
+ // Provide this value in a subsequent request to return the next page of
+ // results. This field will be empty if there aren't any additional results.
+ string next_page_token = 2;
+message CreateCallSetRequest {
+ // The call set to be created.
+ CallSet call_set = 1;
+message UpdateCallSetRequest {
+ // The ID of the call set to be updated.
+ string call_set_id = 1;
+ // The new call set data.
+ CallSet call_set = 2;
+ // An optional mask specifying which fields to update. At this time, the only
+ // mutable field is [name][google.genomics.v1.CallSet.name]. The only
+ // acceptable value is "name". If unspecified, all mutable fields will be
+ // updated.
+ google.protobuf.FieldMask update_mask = 3;
+message DeleteCallSetRequest {
+ // The ID of the call set to be deleted.
+ string call_set_id = 1;
+message GetCallSetRequest {
+ // The ID of the call set.
+ string call_set_id = 1;
+// The stream variants request.
+message StreamVariantsRequest {
+ // The Google Cloud project ID which will be billed
+ // for this access. The caller must have WRITE access to this project.
+ // Required.
+ string project_id = 1;
+ // The variant set ID from which to stream variants.
+ string variant_set_id = 2;
+ // Only return variant calls which belong to call sets with these IDs.
+ // Leaving this blank returns all variant calls.
+ repeated string call_set_ids = 3;
+ // Required. Only return variants in this reference sequence.
+ string reference_name = 4;
+ // The beginning of the window (0-based, inclusive) for which
+ // overlapping variants should be returned.
+ int64 start = 5;
+ // The end of the window (0-based, exclusive) for which overlapping
+ // variants should be returned.
+ int64 end = 6;
+message StreamVariantsResponse {
+ repeated Variant variants = 1;
+// Operations to be performed during import on Variant info fields.
+// These operations are set for each info field in the info_merge_config
+// map of ImportVariantsRequest, which is plumbed down to the
+// MergeVariantRequests generated by the import job.
+enum InfoMergeOperation {
+ // By default, Variant info fields are persisted if the Variant doesn't
+ // already exist in the variantset. If the Variant is equivalent to a
+ // Variant already in the variantset, the incoming Variant's info field
+ // is ignored in favor of that of the already persisted Variant.
+ // This operation removes an info field from the incoming Variant
+ // and persists this info field in each of the incoming Variant's Calls.