19
19
20
20
import argparse
21
21
import mimetypes
22
+ import os
22
23
23
24
24
25
# [START redact_string]
@@ -83,8 +84,8 @@ def redact_string(item, replace_string, info_types=None, min_likelihood=None):
83
84
84
85
85
86
# [START redact_image]
86
- def redact_image (filename , output_filename ,
87
- info_types = None , min_likelihood = None , mime_type = None ):
87
+ def redact_image (project , filename , output_filename ,
88
+ info_types , min_likelihood = None , mime_type = None ):
88
89
"""Uses the Data Loss Prevention API to redact protected data in an image.
89
90
Args:
90
91
filename: The path to the file to inspect.
@@ -101,17 +102,14 @@ def redact_image(filename, output_filename,
101
102
None; the response from the API is printed to the terminal.
102
103
"""
103
104
# Import the client library
104
- import google .cloud .dlp_v2beta1
105
+ import google .cloud .dlp
105
106
106
107
# Instantiate a client.
107
- dlp = google .cloud .dlp_v2beta1 .DlpServiceClient ()
108
+ dlp = google .cloud .dlp .DlpServiceClient ()
108
109
109
110
# Prepare info_types by converting the list of strings into a list of
110
- # dictionaries (protos are also accepted). The info_types are not submitted
111
- # directly in this example, but are used in the construction of
112
- # image_redaction_configs.
113
- if info_types is not None :
114
- info_types = [{'name' : info_type } for info_type in info_types ]
111
+ # dictionaries (protos are also accepted).
112
+ info_types = [{'name' : info_type } for info_type in info_types ]
115
113
116
114
# Prepare image_redaction_configs, a list of dictionaries. Each dictionary
117
115
# contains an info_type and optionally the color used for the replacement.
@@ -124,39 +122,57 @@ def redact_image(filename, output_filename,
124
122
125
123
# Construct the configuration dictionary. Keys which are None may
126
124
# optionally be omitted entirely.
127
- redact_config = {
125
+ inspect_config = {
128
126
'min_likelihood' : min_likelihood ,
127
+ 'info_types' : info_types ,
129
128
}
130
129
131
130
# If mime_type is not specified, guess it from the filename.
132
131
if mime_type is None :
133
132
mime_guess = mimetypes .MimeTypes ().guess_type (filename )
134
133
mime_type = mime_guess [0 ] or 'application/octet-stream'
135
134
136
- # Construct the items list (in this case, only one item, containing the
137
- # image file's byte data).
135
+ # Select the content type index from the list of supported types.
136
+ supported_content_types = {
137
+ None : 0 , # "Unspecified"
138
+ 'image/jpeg' : 1 ,
139
+ 'image/bmp' : 2 ,
140
+ 'image/png' : 3 ,
141
+ 'image/svg' : 4 ,
142
+ 'text/plain' : 5 ,
143
+ }
144
+ content_type_index = supported_content_types .get (mime_type , 0 )
145
+
146
+ # Construct the byte_item, containing the file's byte data.
138
147
with open (filename , mode = 'rb' ) as f :
139
- items = [{'type' : mime_type , 'data' : f .read ()}]
148
+ byte_item = {'type' : content_type_index , 'data' : f .read ()}
149
+
150
+ # Convert the project id into a full resource id.
151
+ parent = dlp .project_path (project )
140
152
141
153
# Call the API.
142
- response = dlp .redact_content (
143
- redact_config , items , None ,
144
- image_redaction_configs = image_redaction_configs )
154
+ response = dlp .redact_image (
155
+ parent , inspect_config = inspect_config ,
156
+ image_redaction_configs = image_redaction_configs ,
157
+ byte_item = byte_item )
145
158
146
159
# Write out the results.
147
160
with open (output_filename , mode = 'wb' ) as f :
148
- f .write (response .items [ 0 ]. data )
161
+ f .write (response .redacted_image )
149
162
print ("Wrote {byte_count} to {filename}" .format (
150
- byte_count = len (response .items [ 0 ]. data ), filename = output_filename ))
163
+ byte_count = len (response .redacted_image ), filename = output_filename ))
151
164
# [END redact_string]
152
165
153
166
154
167
if __name__ == '__main__' :
168
+ default_project = os .environ .get ('GCLOUD_PROJECT' )
169
+
155
170
parser = argparse .ArgumentParser (description = __doc__ )
156
171
subparsers = parser .add_subparsers (
157
172
dest = 'content' , help = 'Select how to submit content to the API.' )
173
+ subparsers .required = True
158
174
159
- parser_string = subparsers .add_parser ('string' , help = 'Inspect a string.' )
175
+ parser_string = subparsers .add_parser ('string' , help = 'Redact a string.' )
160
176
parser_string .add_argument ('item' , help = 'The string to inspect.' )
161
177
parser_string .add_argument (
162
178
'replace_string' ,
@@ -177,20 +193,23 @@ def redact_image(filename, output_filename,
177
193
help = 'A string representing the minimum likelihood threshold that '
178
194
'constitutes a match.' )
179
195
180
- parser_file = subparsers .add_parser ('image' , help = 'Inspect an image file.' )
196
+ parser_file = subparsers .add_parser ('image' , help = 'Redact an image file.' )
181
197
parser_file .add_argument (
182
198
'filename' , help = 'The path to the file to inspect.' )
183
199
parser_file .add_argument (
184
200
'output_filename' ,
185
201
help = 'The path to which the redacted image will be written.' )
202
+ parser_file .add_argument (
203
+ '--project' ,
204
+ help = 'The Google Cloud project id to use as a parent resource.' ,
205
+ default = default_project )
186
206
parser_file .add_argument (
187
207
'--info_types' , action = 'append' ,
188
208
help = 'Strings representing info types to look for. A full list of '
189
209
'info categories and types is available from the API. Examples '
190
- 'include "US_MALE_NAME", "US_FEMALE_NAME", "EMAIL_ADDRESS", '
191
- '"CANADA_SOCIAL_INSURANCE_NUMBER", "JAPAN_PASSPORT". If omitted, '
192
- 'the API will use a limited default set. Specify this flag '
193
- 'multiple times to specify multiple info types.' )
210
+ 'include "FIRST_NAME", "LAST_NAME", "EMAIL_ADDRESS". '
211
+ 'If unspecified, the three above examples will be used.' ,
212
+ default = ['FIRST_NAME' , 'LAST_NAME' , 'EMAIL_ADDRESS' ])
194
213
parser_file .add_argument (
195
214
'--min_likelihood' ,
196
215
choices = ['LIKELIHOOD_UNSPECIFIED' , 'VERY_UNLIKELY' , 'UNLIKELY' ,
@@ -210,5 +229,6 @@ def redact_image(filename, output_filename,
210
229
min_likelihood = args .min_likelihood )
211
230
elif args .content == 'image' :
212
231
redact_image (
213
- args .filename , args .output_filename , info_types = args .info_types ,
214
- min_likelihood = args .min_likelihood , mime_type = args .mime_type )
232
+ args .project , args .filename , args .output_filename ,
233
+ args .info_types , min_likelihood = args .min_likelihood ,
234
+ mime_type = args .mime_type )
0 commit comments