-
Notifications
You must be signed in to change notification settings - Fork 674
Expand file tree
/
Copy pathdocument_schema.proto
More file actions
141 lines (116 loc) · 5.14 KB
/
document_schema.proto
File metadata and controls
141 lines (116 loc) · 5.14 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto3";
package google.cloud.documentai.v1;
option csharp_namespace = "Google.Cloud.DocumentAI.V1";
option go_package = "google.golang.org/genproto/googleapis/cloud/documentai/v1;documentai";
option java_multiple_files = true;
option java_outer_classname = "DocumentAiDocumentSchema";
option java_package = "com.google.cloud.documentai.v1";
option php_namespace = "Google\\Cloud\\DocumentAI\\V1";
option ruby_package = "Google::Cloud::DocumentAI::V1";
// The schema defines the output of the processed document by a processor.
message DocumentSchema {
// EntityType is the wrapper of a label of the corresponding model with
// detailed attributes and limitations for entity-based processors. Multiple
// types can also compose a dependency tree to represent nested types.
message EntityType {
// Defines the a list of enum values.
message EnumValues {
// The individual values that this enum values type can include.
repeated string values = 1;
}
// Defines properties that can be part of the entity type.
message Property {
// Types of occurrences of the entity type in the document. Note: this
// represents the number of instances of an entity types, not number of
// mentions of a given entity instance.
enum OccurrenceType {
// Unspecified occurrence type.
OCCURRENCE_TYPE_UNSPECIFIED = 0;
// There will be zero or one instance of this entity type.
OPTIONAL_ONCE = 1;
// The entity type will appear zero or multiple times.
OPTIONAL_MULTIPLE = 2;
// The entity type will only appear exactly once.
REQUIRED_ONCE = 3;
// The entity type will appear once or more times.
REQUIRED_MULTIPLE = 4;
}
// The name of the property. Follows the same guidelines as the
// EntityType name.
string name = 1;
// A reference to the value type of the property. This type is subject
// to the same conventions as the `Entity.base_types` field.
string value_type = 2;
// Occurrence type limits the number of instances an entity type appears
// in the document.
OccurrenceType occurrence_type = 3;
}
oneof value_source {
// If specified, lists all the possible values for this entity. This
// should not be more than a handful of values. If the number of values
// is >10 or could change frequently use the `EntityType.value_ontology`
// field and specify a list of all possible values in a value ontology
// file.
EnumValues enum_values = 14;
}
// User defined name for the type.
string display_name = 13;
// Name of the type. It must be unique within the schema file and
// cannot be a 'Common Type'. Besides that we use the following naming
// conventions:
//
// - *use `snake_casing`*
// - name matching is case-sensitive
// - Maximum 64 characters.
// - Must start with a letter.
// - Allowed characters: ASCII letters `[a-z0-9_-]`. (For backward
// compatibility internal infrastructure and tooling can handle any ascii
// character)
// - The `/` is sometimes used to denote a property of a type. For example
// `line_item/amount`. This convention is deprecated, but will still be
// honored for backward compatibility.
string name = 1;
// The entity type that this type is derived from. For now, one and only
// one should be set.
repeated string base_types = 2;
// Describing the nested structure, or composition of an entity.
repeated Property properties = 6;
}
// Metadata for global schema behavior.
message Metadata {
// If true, a `document` entity type can be applied to subdocument (
// splitting). Otherwise, it can only be applied to the entire document
// (classification).
bool document_splitter = 1;
// If true, on a given page, there can be multiple `document` annotations
// covering it.
bool document_allow_multiple_labels = 2;
// If set, all the nested entities must be prefixed with the parents.
bool prefixed_naming_on_properties = 6;
// If set, we will skip the naming format validation in the schema. So the
// string values in `DocumentSchema.EntityType.name` and
// `DocumentSchema.EntityType.Property.name` will not be checked.
bool skip_naming_validation = 7;
}
// Display name to show to users.
string display_name = 1;
// Description of the schema.
string description = 2;
// Entity types of the schema.
repeated EntityType entity_types = 3;
// Metadata of the schema.
Metadata metadata = 4;
}