update redact_image, quickstart samples (#1399)

andrewsg · web-flow · commit f9f09ce152f6 · 2018-03-13T15:50:11.000-07:00
diff --git a/dlp/quickstart.py b/dlp/quickstart.py
@@ -23,19 +23,22 @@ def quickstart():
 
     # [START quickstart]
     # Import the client library
-    import google.cloud.dlp_v2beta1
+    import google.cloud.dlp
+
+    # Edit this with your Google Cloud Project ID.
+    project = 'your-project'
 
     # Instantiate a client.
-    dlp = google.cloud.dlp_v2beta1.DlpServiceClient()
+    dlp = google.cloud.dlp.DlpServiceClient()
 
     # The string to inspect
     content = 'Robert Frost'
 
-    # Construct the list of content items to inspect; in this case, only one.
-    items = [{'type': 'text/plain', 'value': content}]
+    # Construct the item to inspect.
+    item = {'value': content}
 
-    # The info types to search for in the content.
-    info_types = [{'name': 'US_MALE_NAME'}, {'name': 'US_FEMALE_NAME'}]
+    # The info types to search for in the content. Required.
+    info_types = [{'name': 'FIRST_NAME'}, {'name': 'LAST_NAME'}]
 
     # The minimum likelihood to constitute a match. Optional.
     min_likelihood = 'LIKELIHOOD_UNSPECIFIED'
@@ -51,16 +54,19 @@ def quickstart():
     inspect_config = {
         'info_types': info_types,
         'min_likelihood': min_likelihood,
-        'max_findings': max_findings,
         'include_quote': include_quote,
+        'limits': {'max_findings_per_request': max_findings},
     }
 
+    # Convert the project id into a full resource id.
+    parent = dlp.project_path(project)
+
     # Call the API.
-    response = dlp.inspect_content(inspect_config, items)
+    response = dlp.inspect_content(parent, inspect_config, item)
 
     # Print out the results.
-    if response.results[0].findings:
-        for finding in response.results[0].findings:
+    if response.result.findings:
+        for finding in response.result.findings:
             try:
                 print('Quote: {}'.format(finding.quote))
             except AttributeError:
diff --git a/dlp/redact.py b/dlp/redact.py
@@ -19,6 +19,7 @@
 
 import argparse
 import mimetypes
+import os
 
 
 # [START redact_string]
@@ -83,8 +84,8 @@ def redact_string(item, replace_string, info_types=None, min_likelihood=None):
 
 
 # [START redact_image]
-def redact_image(filename, output_filename,
-                 info_types=None, min_likelihood=None, mime_type=None):
+def redact_image(project, filename, output_filename,
+                 info_types, min_likelihood=None, mime_type=None):
     """Uses the Data Loss Prevention API to redact protected data in an image.
     Args:
         filename: The path to the file to inspect.
@@ -101,17 +102,14 @@ def redact_image(filename, output_filename,
         None; the response from the API is printed to the terminal.
     """
     # Import the client library
-    import google.cloud.dlp_v2beta1
+    import google.cloud.dlp
 
     # Instantiate a client.
-    dlp = google.cloud.dlp_v2beta1.DlpServiceClient()
+    dlp = google.cloud.dlp.DlpServiceClient()
 
     # Prepare info_types by converting the list of strings into a list of
-    # dictionaries (protos are also accepted). The info_types are not submitted
-    # directly in this example, but are used in the construction of
-    # image_redaction_configs.
-    if info_types is not None:
-        info_types = [{'name': info_type} for info_type in info_types]
+    # dictionaries (protos are also accepted).
+    info_types = [{'name': info_type} for info_type in info_types]
 
     # Prepare image_redaction_configs, a list of dictionaries. Each dictionary
     # contains an info_type and optionally the color used for the replacement.
@@ -124,39 +122,57 @@ def redact_image(filename, output_filename,
 
     # Construct the configuration dictionary. Keys which are None may
     # optionally be omitted entirely.
-    redact_config = {
+    inspect_config = {
         'min_likelihood': min_likelihood,
+        'info_types': info_types,
     }
 
     # If mime_type is not specified, guess it from the filename.
     if mime_type is None:
         mime_guess = mimetypes.MimeTypes().guess_type(filename)
         mime_type = mime_guess[0] or 'application/octet-stream'
 
-    # Construct the items list (in this case, only one item, containing the
-    # image file's byte data).
+    # Select the content type index from the list of supported types.
+    supported_content_types = {
+        None: 0,  # "Unspecified"
+        'image/jpeg': 1,
+        'image/bmp': 2,
+        'image/png': 3,
+        'image/svg': 4,
+        'text/plain': 5,
+    }
+    content_type_index = supported_content_types.get(mime_type, 0)
+
+    # Construct the byte_item, containing the file's byte data.
     with open(filename, mode='rb') as f:
-        items = [{'type': mime_type, 'data': f.read()}]
+        byte_item = {'type': content_type_index, 'data': f.read()}
+
+    # Convert the project id into a full resource id.
+    parent = dlp.project_path(project)
 
     # Call the API.
-    response = dlp.redact_content(
-        redact_config, items, None,
-        image_redaction_configs=image_redaction_configs)
+    response = dlp.redact_image(
+        parent, inspect_config=inspect_config,
+        image_redaction_configs=image_redaction_configs,
+        byte_item=byte_item)
 
     # Write out the results.
     with open(output_filename, mode='wb') as f:
-        f.write(response.items[0].data)
+        f.write(response.redacted_image)
     print("Wrote {byte_count} to {filename}".format(
-        byte_count=len(response.items[0].data), filename=output_filename))
+        byte_count=len(response.redacted_image), filename=output_filename))
 # [END redact_string]
 
 
 if __name__ == '__main__':
+    default_project = os.environ.get('GCLOUD_PROJECT')
+
     parser = argparse.ArgumentParser(description=__doc__)
     subparsers = parser.add_subparsers(
         dest='content', help='Select how to submit content to the API.')
+    subparsers.required = True
 
-    parser_string = subparsers.add_parser('string', help='Inspect a string.')
+    parser_string = subparsers.add_parser('string', help='Redact a string.')
     parser_string.add_argument('item', help='The string to inspect.')
     parser_string.add_argument(
         'replace_string',
@@ -177,20 +193,23 @@ def redact_image(filename, output_filename,
         help='A string representing the minimum likelihood threshold that '
              'constitutes a match.')
 
-    parser_file = subparsers.add_parser('image', help='Inspect an image file.')
+    parser_file = subparsers.add_parser('image', help='Redact an image file.')
     parser_file.add_argument(
         'filename', help='The path to the file to inspect.')
     parser_file.add_argument(
         'output_filename',
         help='The path to which the redacted image will be written.')
+    parser_file.add_argument(
+        '--project',
+        help='The Google Cloud project id to use as a parent resource.',
+        default=default_project)
     parser_file.add_argument(
         '--info_types', action='append',
         help='Strings representing info types to look for. A full list of '
              'info categories and types is available from the API. Examples '
-             'include "US_MALE_NAME", "US_FEMALE_NAME", "EMAIL_ADDRESS", '
-             '"CANADA_SOCIAL_INSURANCE_NUMBER", "JAPAN_PASSPORT". If omitted, '
-             'the API will use a limited default set. Specify this flag '
-             'multiple times to specify multiple info types.')
+             'include "FIRST_NAME", "LAST_NAME", "EMAIL_ADDRESS". '
+             'If unspecified, the three above examples will be used.',
+        default=['FIRST_NAME', 'LAST_NAME', 'EMAIL_ADDRESS'])
     parser_file.add_argument(
         '--min_likelihood',
         choices=['LIKELIHOOD_UNSPECIFIED', 'VERY_UNLIKELY', 'UNLIKELY',
@@ -210,5 +229,6 @@ def redact_image(filename, output_filename,
             min_likelihood=args.min_likelihood)
     elif args.content == 'image':
         redact_image(
-            args.filename, args.output_filename, info_types=args.info_types,
-            min_likelihood=args.min_likelihood, mime_type=args.mime_type)
+            args.project, args.filename, args.output_filename,
+            args.info_types, min_likelihood=args.min_likelihood,
+            mime_type=args.mime_type)
diff --git a/dlp/redact_test.py b/dlp/redact_test.py
@@ -20,6 +20,7 @@
 
 import redact
 
+GCLOUD_PROJECT = os.getenv('GCLOUD_PROJECT')
 RESOURCE_DIRECTORY = os.path.join(os.path.dirname(__file__), 'resources')
 
 
@@ -63,19 +64,11 @@ def test_redact_image_file(tempdir, capsys):
     test_filepath = os.path.join(RESOURCE_DIRECTORY, 'test.png')
     output_filepath = os.path.join(tempdir, 'redacted.png')
 
-    redact.redact_image(test_filepath, output_filepath)
-
-    out, _ = capsys.readouterr()
-    assert output_filepath in out
-
-
-def test_redact_image_file_with_infotype(tempdir, capsys):
-    test_filepath = os.path.join(RESOURCE_DIRECTORY, 'test.png')
-    output_filepath = os.path.join(tempdir, 'redacted_with_infotype.png')
-
     redact.redact_image(
-        test_filepath, output_filepath,
-        info_types=['EMAIL_ADDRESS', 'US_MALE_NAME'])
+        GCLOUD_PROJECT,
+        test_filepath,
+        output_filepath,
+        ['FIRST_NAME', 'EMAIL_ADDRESS'])
 
     out, _ = capsys.readouterr()
     assert output_filepath in out