Skip to content

Commit f9f09ce

Browse files
authored
update redact_image, quickstart samples (#1399)
1 parent 714f1c8 commit f9f09ce

File tree

3 files changed

+67
-48
lines changed

3 files changed

+67
-48
lines changed

dlp/quickstart.py

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -23,19 +23,22 @@ def quickstart():
2323

2424
# [START quickstart]
2525
# Import the client library
26-
import google.cloud.dlp_v2beta1
26+
import google.cloud.dlp
27+
28+
# Edit this with your Google Cloud Project ID.
29+
project = 'your-project'
2730

2831
# Instantiate a client.
29-
dlp = google.cloud.dlp_v2beta1.DlpServiceClient()
32+
dlp = google.cloud.dlp.DlpServiceClient()
3033

3134
# The string to inspect
3235
content = 'Robert Frost'
3336

34-
# Construct the list of content items to inspect; in this case, only one.
35-
items = [{'type': 'text/plain', 'value': content}]
37+
# Construct the item to inspect.
38+
item = {'value': content}
3639

37-
# The info types to search for in the content.
38-
info_types = [{'name': 'US_MALE_NAME'}, {'name': 'US_FEMALE_NAME'}]
40+
# The info types to search for in the content. Required.
41+
info_types = [{'name': 'FIRST_NAME'}, {'name': 'LAST_NAME'}]
3942

4043
# The minimum likelihood to constitute a match. Optional.
4144
min_likelihood = 'LIKELIHOOD_UNSPECIFIED'
@@ -51,16 +54,19 @@ def quickstart():
5154
inspect_config = {
5255
'info_types': info_types,
5356
'min_likelihood': min_likelihood,
54-
'max_findings': max_findings,
5557
'include_quote': include_quote,
58+
'limits': {'max_findings_per_request': max_findings},
5659
}
5760

61+
# Convert the project id into a full resource id.
62+
parent = dlp.project_path(project)
63+
5864
# Call the API.
59-
response = dlp.inspect_content(inspect_config, items)
65+
response = dlp.inspect_content(parent, inspect_config, item)
6066

6167
# Print out the results.
62-
if response.results[0].findings:
63-
for finding in response.results[0].findings:
68+
if response.result.findings:
69+
for finding in response.result.findings:
6470
try:
6571
print('Quote: {}'.format(finding.quote))
6672
except AttributeError:

dlp/redact.py

Lines changed: 46 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
import argparse
2121
import mimetypes
22+
import os
2223

2324

2425
# [START redact_string]
@@ -83,8 +84,8 @@ def redact_string(item, replace_string, info_types=None, min_likelihood=None):
8384

8485

8586
# [START redact_image]
86-
def redact_image(filename, output_filename,
87-
info_types=None, min_likelihood=None, mime_type=None):
87+
def redact_image(project, filename, output_filename,
88+
info_types, min_likelihood=None, mime_type=None):
8889
"""Uses the Data Loss Prevention API to redact protected data in an image.
8990
Args:
9091
filename: The path to the file to inspect.
@@ -101,17 +102,14 @@ def redact_image(filename, output_filename,
101102
None; the response from the API is printed to the terminal.
102103
"""
103104
# Import the client library
104-
import google.cloud.dlp_v2beta1
105+
import google.cloud.dlp
105106

106107
# Instantiate a client.
107-
dlp = google.cloud.dlp_v2beta1.DlpServiceClient()
108+
dlp = google.cloud.dlp.DlpServiceClient()
108109

109110
# Prepare info_types by converting the list of strings into a list of
110-
# dictionaries (protos are also accepted). The info_types are not submitted
111-
# directly in this example, but are used in the construction of
112-
# image_redaction_configs.
113-
if info_types is not None:
114-
info_types = [{'name': info_type} for info_type in info_types]
111+
# dictionaries (protos are also accepted).
112+
info_types = [{'name': info_type} for info_type in info_types]
115113

116114
# Prepare image_redaction_configs, a list of dictionaries. Each dictionary
117115
# contains an info_type and optionally the color used for the replacement.
@@ -124,39 +122,57 @@ def redact_image(filename, output_filename,
124122

125123
# Construct the configuration dictionary. Keys which are None may
126124
# optionally be omitted entirely.
127-
redact_config = {
125+
inspect_config = {
128126
'min_likelihood': min_likelihood,
127+
'info_types': info_types,
129128
}
130129

131130
# If mime_type is not specified, guess it from the filename.
132131
if mime_type is None:
133132
mime_guess = mimetypes.MimeTypes().guess_type(filename)
134133
mime_type = mime_guess[0] or 'application/octet-stream'
135134

136-
# Construct the items list (in this case, only one item, containing the
137-
# image file's byte data).
135+
# Select the content type index from the list of supported types.
136+
supported_content_types = {
137+
None: 0, # "Unspecified"
138+
'image/jpeg': 1,
139+
'image/bmp': 2,
140+
'image/png': 3,
141+
'image/svg': 4,
142+
'text/plain': 5,
143+
}
144+
content_type_index = supported_content_types.get(mime_type, 0)
145+
146+
# Construct the byte_item, containing the file's byte data.
138147
with open(filename, mode='rb') as f:
139-
items = [{'type': mime_type, 'data': f.read()}]
148+
byte_item = {'type': content_type_index, 'data': f.read()}
149+
150+
# Convert the project id into a full resource id.
151+
parent = dlp.project_path(project)
140152

141153
# Call the API.
142-
response = dlp.redact_content(
143-
redact_config, items, None,
144-
image_redaction_configs=image_redaction_configs)
154+
response = dlp.redact_image(
155+
parent, inspect_config=inspect_config,
156+
image_redaction_configs=image_redaction_configs,
157+
byte_item=byte_item)
145158

146159
# Write out the results.
147160
with open(output_filename, mode='wb') as f:
148-
f.write(response.items[0].data)
161+
f.write(response.redacted_image)
149162
print("Wrote {byte_count} to {filename}".format(
150-
byte_count=len(response.items[0].data), filename=output_filename))
163+
byte_count=len(response.redacted_image), filename=output_filename))
151164
# [END redact_string]
152165

153166

154167
if __name__ == '__main__':
168+
default_project = os.environ.get('GCLOUD_PROJECT')
169+
155170
parser = argparse.ArgumentParser(description=__doc__)
156171
subparsers = parser.add_subparsers(
157172
dest='content', help='Select how to submit content to the API.')
173+
subparsers.required = True
158174

159-
parser_string = subparsers.add_parser('string', help='Inspect a string.')
175+
parser_string = subparsers.add_parser('string', help='Redact a string.')
160176
parser_string.add_argument('item', help='The string to inspect.')
161177
parser_string.add_argument(
162178
'replace_string',
@@ -177,20 +193,23 @@ def redact_image(filename, output_filename,
177193
help='A string representing the minimum likelihood threshold that '
178194
'constitutes a match.')
179195

180-
parser_file = subparsers.add_parser('image', help='Inspect an image file.')
196+
parser_file = subparsers.add_parser('image', help='Redact an image file.')
181197
parser_file.add_argument(
182198
'filename', help='The path to the file to inspect.')
183199
parser_file.add_argument(
184200
'output_filename',
185201
help='The path to which the redacted image will be written.')
202+
parser_file.add_argument(
203+
'--project',
204+
help='The Google Cloud project id to use as a parent resource.',
205+
default=default_project)
186206
parser_file.add_argument(
187207
'--info_types', action='append',
188208
help='Strings representing info types to look for. A full list of '
189209
'info categories and types is available from the API. Examples '
190-
'include "US_MALE_NAME", "US_FEMALE_NAME", "EMAIL_ADDRESS", '
191-
'"CANADA_SOCIAL_INSURANCE_NUMBER", "JAPAN_PASSPORT". If omitted, '
192-
'the API will use a limited default set. Specify this flag '
193-
'multiple times to specify multiple info types.')
210+
'include "FIRST_NAME", "LAST_NAME", "EMAIL_ADDRESS". '
211+
'If unspecified, the three above examples will be used.',
212+
default=['FIRST_NAME', 'LAST_NAME', 'EMAIL_ADDRESS'])
194213
parser_file.add_argument(
195214
'--min_likelihood',
196215
choices=['LIKELIHOOD_UNSPECIFIED', 'VERY_UNLIKELY', 'UNLIKELY',
@@ -210,5 +229,6 @@ def redact_image(filename, output_filename,
210229
min_likelihood=args.min_likelihood)
211230
elif args.content == 'image':
212231
redact_image(
213-
args.filename, args.output_filename, info_types=args.info_types,
214-
min_likelihood=args.min_likelihood, mime_type=args.mime_type)
232+
args.project, args.filename, args.output_filename,
233+
args.info_types, min_likelihood=args.min_likelihood,
234+
mime_type=args.mime_type)

dlp/redact_test.py

Lines changed: 5 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020

2121
import redact
2222

23+
GCLOUD_PROJECT = os.getenv('GCLOUD_PROJECT')
2324
RESOURCE_DIRECTORY = os.path.join(os.path.dirname(__file__), 'resources')
2425

2526

@@ -63,19 +64,11 @@ def test_redact_image_file(tempdir, capsys):
6364
test_filepath = os.path.join(RESOURCE_DIRECTORY, 'test.png')
6465
output_filepath = os.path.join(tempdir, 'redacted.png')
6566

66-
redact.redact_image(test_filepath, output_filepath)
67-
68-
out, _ = capsys.readouterr()
69-
assert output_filepath in out
70-
71-
72-
def test_redact_image_file_with_infotype(tempdir, capsys):
73-
test_filepath = os.path.join(RESOURCE_DIRECTORY, 'test.png')
74-
output_filepath = os.path.join(tempdir, 'redacted_with_infotype.png')
75-
7667
redact.redact_image(
77-
test_filepath, output_filepath,
78-
info_types=['EMAIL_ADDRESS', 'US_MALE_NAME'])
68+
GCLOUD_PROJECT,
69+
test_filepath,
70+
output_filepath,
71+
['FIRST_NAME', 'EMAIL_ADDRESS'])
7972

8073
out, _ = capsys.readouterr()
8174
assert output_filepath in out

0 commit comments

Comments
 (0)