Skip to content

Commit 3102486

Browse files
author
Takashi Matsuo
authored
[dlp] testing: fix Pub/Sub notifications (#3925)
* re-generated README.rst with some more setup info * use parent with the global location attached * re-enabled some tests with Pub/Sub notification * stop waiting between test retries
1 parent 4b968e8 commit 3102486

File tree

8 files changed

+98
-102
lines changed

8 files changed

+98
-102
lines changed

dlp/README.rst

Lines changed: 18 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,15 @@ This directory contains samples for Google Data Loss Prevention. `Google Data Lo
1414

1515
.. _Google Data Loss Prevention: https://cloud.google.com/dlp/docs/
1616

17+
To run the sample, you need to enable the API at: https://console.cloud.google.com/apis/library/dlp.googleapis.com
18+
19+
20+
To run the sample, you need to have the following roles:
21+
* `DLP Administrator`
22+
* `DLP API Service Agent`
23+
24+
25+
1726
Setup
1827
-------------------------------------------------------------------------------
1928

@@ -58,15 +67,6 @@ Install Dependencies
5867
.. _pip: https://pip.pypa.io/
5968
.. _virtualenv: https://virtualenv.pypa.io/
6069

61-
#. For running *_test.py files, install test dependencies
62-
63-
.. code-block:: bash
64-
65-
$ pip install -r requirements-test.txt
66-
$ pytest inspect_content_test.py
67-
68-
** *_test.py files are demo wrappers and make API calls. You may get rate limited for making high number of requests. **
69-
7070
Samples
7171
-------------------------------------------------------------------------------
7272

@@ -83,7 +83,7 @@ To run this sample:
8383

8484
.. code-block:: bash
8585
86-
$ python quickstart.py <project-id>
86+
$ python quickstart.py
8787
8888
8989
Inspect Content
@@ -101,15 +101,16 @@ To run this sample:
101101
102102
$ python inspect_content.py
103103
104-
usage: inspect_content.py [-h] {string,file,gcs,datastore,bigquery} ...
104+
usage: inspect_content.py [-h] {string,table,file,gcs,datastore,bigquery} ...
105105
106106
Sample app that uses the Data Loss Prevention API to inspect a string, a local
107107
file or a file on Google Cloud Storage.
108108
109109
positional arguments:
110-
{string,file,gcs,datastore,bigquery}
110+
{string,table,file,gcs,datastore,bigquery}
111111
Select how to submit content to the API.
112112
string Inspect a string.
113+
table Inspect a table.
113114
file Inspect a local file.
114115
gcs Inspect files on Google Cloud Storage.
115116
datastore Inspect files on Google Datastore.
@@ -135,13 +136,14 @@ To run this sample:
135136
136137
$ python redact.py
137138
138-
usage: redact.py [-h] [--project PROJECT] [--info_types INFO_TYPES]
139+
usage: redact.py [-h] [--project PROJECT]
140+
[--info_types INFO_TYPES [INFO_TYPES ...]]
139141
[--min_likelihood {LIKELIHOOD_UNSPECIFIED,VERY_UNLIKELY,UNLIKELY,POSSIBLE,LIKELY,VERY_LIKELY}]
140142
[--mime_type MIME_TYPE]
141143
filename output_filename
142144
143-
Sample app that uses the Data Loss Prevent API to redact the contents of a
144-
string or an image file.
145+
Sample app that uses the Data Loss Prevent API to redact the contents of an
146+
image file.
145147
146148
positional arguments:
147149
filename The path to the file to inspect.
@@ -151,7 +153,7 @@ To run this sample:
151153
-h, --help show this help message and exit
152154
--project PROJECT The Google Cloud project id to use as a parent
153155
resource.
154-
--info_types INFO_TYPES
156+
--info_types INFO_TYPES [INFO_TYPES ...]
155157
Strings representing info types to look for. A full
156158
list of info categories and types is available from
157159
the API. Examples include "FIRST_NAME", "LAST_NAME",

dlp/README.rst.in

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ product:
44
name: Google Data Loss Prevention
55
short_name: Data Loss Prevention
66
url: https://cloud.google.com/dlp/docs/
7-
description: >
7+
description: >
88
`Google Data Loss Prevention`_ provides programmatic access to a powerful
99
detection engine for personally identifiable information and other
1010
privacy-sensitive data in unstructured data streams.
@@ -13,6 +13,12 @@ setup:
1313
- auth
1414
- install_deps
1515

16+
required_api_url: https://console.cloud.google.com/apis/library/dlp.googleapis.com
17+
18+
required_roles:
19+
- DLP Administrator
20+
- DLP API Service Agent
21+
1622
samples:
1723
- name: Quickstart
1824
file: quickstart.py

dlp/conftest.py

Lines changed: 0 additions & 20 deletions
This file was deleted.

dlp/inspect_content.py

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -459,11 +459,12 @@ def inspect_gcs_file(
459459
url = "gs://{}/{}".format(bucket, filename)
460460
storage_config = {"cloud_storage_options": {"file_set": {"url": url}}}
461461

462-
# Convert the project id into a full resource id.
463-
parent = dlp.project_path(project)
462+
# Convert the project id into full resource ids.
463+
topic = google.cloud.pubsub.PublisherClient.topic_path(project, topic_id)
464+
parent = dlp.location_path(project, 'global')
464465

465466
# Tell the API where to send a notification when the job is complete.
466-
actions = [{"pub_sub": {"topic": "{}/topics/{}".format(parent, topic_id)}}]
467+
actions = [{"pub_sub": {"topic": topic}}]
467468

468469
# Construct the inspect_job, which defines the entire inspect content task.
469470
inspect_job = {
@@ -623,11 +624,12 @@ def inspect_datastore(
623624
}
624625
}
625626

626-
# Convert the project id into a full resource id.
627-
parent = dlp.project_path(project)
627+
# Convert the project id into full resource ids.
628+
topic = google.cloud.pubsub.PublisherClient.topic_path(project, topic_id)
629+
parent = dlp.location_path(project, 'global')
628630

629631
# Tell the API where to send a notification when the job is complete.
630-
actions = [{"pub_sub": {"topic": "{}/topics/{}".format(parent, topic_id)}}]
632+
actions = [{"pub_sub": {"topic": topic}}]
631633

632634
# Construct the inspect_job, which defines the entire inspect content task.
633635
inspect_job = {
@@ -790,11 +792,12 @@ def inspect_bigquery(
790792
}
791793
}
792794

793-
# Convert the project id into a full resource id.
794-
parent = dlp.project_path(project)
795+
# Convert the project id into full resource ids.
796+
topic = google.cloud.pubsub.PublisherClient.topic_path(project, topic_id)
797+
parent = dlp.location_path(project, 'global')
795798

796799
# Tell the API where to send a notification when the job is complete.
797-
actions = [{"pub_sub": {"topic": "{}/topics/{}".format(parent, topic_id)}}]
800+
actions = [{"pub_sub": {"topic": topic}}]
798801

799802
# Construct the inspect_job, which defines the entire inspect content task.
800803
inspect_job = {

dlp/inspect_content_test.py

Lines changed: 23 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,8 @@
4040
BIGQUERY_DATASET_ID = "dlp_test_dataset" + UNIQUE_STRING
4141
BIGQUERY_TABLE_ID = "dlp_test_table" + UNIQUE_STRING
4242

43+
TIMEOUT = 300 # 5 minutes
44+
4345

4446
@pytest.fixture(scope="module")
4547
def bucket():
@@ -298,6 +300,7 @@ def cancel_operation(out):
298300
client.cancel_dlp_job(operation_id)
299301

300302

303+
@pytest.mark.flaky(max_runs=2, min_passes=1)
301304
def test_inspect_gcs_file(bucket, topic_id, subscription_id, capsys):
302305
try:
303306
inspect_content.inspect_gcs_file(
@@ -307,15 +310,16 @@ def test_inspect_gcs_file(bucket, topic_id, subscription_id, capsys):
307310
topic_id,
308311
subscription_id,
309312
["EMAIL_ADDRESS", "PHONE_NUMBER"],
310-
timeout=1
313+
timeout=TIMEOUT
311314
)
312315

313316
out, _ = capsys.readouterr()
314-
assert "Inspection operation started" in out
317+
assert "Info type: EMAIL_ADDRESS" in out
315318
finally:
316319
cancel_operation(out)
317320

318321

322+
@pytest.mark.flaky(max_runs=2, min_passes=1)
319323
def test_inspect_gcs_file_with_custom_info_types(
320324
bucket, topic_id, subscription_id, capsys):
321325
try:
@@ -331,15 +335,16 @@ def test_inspect_gcs_file_with_custom_info_types(
331335
[],
332336
custom_dictionaries=dictionaries,
333337
custom_regexes=regexes,
334-
timeout=1)
338+
timeout=TIMEOUT)
335339

336340
out, _ = capsys.readouterr()
337341

338-
assert "Inspection operation started" in out
342+
assert "Info type: EMAIL_ADDRESS" in out
339343
finally:
340344
cancel_operation(out)
341345

342346

347+
@pytest.mark.flaky(max_runs=2, min_passes=1)
343348
def test_inspect_gcs_file_no_results(
344349
bucket, topic_id, subscription_id, capsys):
345350
try:
@@ -350,15 +355,16 @@ def test_inspect_gcs_file_no_results(
350355
topic_id,
351356
subscription_id,
352357
["EMAIL_ADDRESS", "PHONE_NUMBER"],
353-
timeout=1)
358+
timeout=TIMEOUT)
354359

355360
out, _ = capsys.readouterr()
356361

357-
assert "Inspection operation started" in out
362+
assert "No findings" in out
358363
finally:
359364
cancel_operation(out)
360365

361366

367+
@pytest.mark.flaky(max_runs=2, min_passes=1)
362368
def test_inspect_gcs_image_file(bucket, topic_id, subscription_id, capsys):
363369
try:
364370
inspect_content.inspect_gcs_file(
@@ -368,14 +374,15 @@ def test_inspect_gcs_image_file(bucket, topic_id, subscription_id, capsys):
368374
topic_id,
369375
subscription_id,
370376
["EMAIL_ADDRESS", "PHONE_NUMBER"],
371-
timeout=1)
377+
timeout=TIMEOUT)
372378

373379
out, _ = capsys.readouterr()
374-
assert "Inspection operation started" in out
380+
assert "Info type: EMAIL_ADDRESS" in out
375381
finally:
376382
cancel_operation(out)
377383

378384

385+
@pytest.mark.flaky(max_runs=2, min_passes=1)
379386
def test_inspect_gcs_multiple_files(bucket, topic_id, subscription_id, capsys):
380387
try:
381388
inspect_content.inspect_gcs_file(
@@ -385,15 +392,16 @@ def test_inspect_gcs_multiple_files(bucket, topic_id, subscription_id, capsys):
385392
topic_id,
386393
subscription_id,
387394
["EMAIL_ADDRESS", "PHONE_NUMBER"],
388-
timeout=1)
395+
timeout=TIMEOUT)
389396

390397
out, _ = capsys.readouterr()
391398

392-
assert "Inspection operation started" in out
399+
assert "Info type: EMAIL_ADDRESS" in out
393400
finally:
394401
cancel_operation(out)
395402

396403

404+
@pytest.mark.flaky(max_runs=2, min_passes=1)
397405
def test_inspect_datastore(
398406
datastore_project, topic_id, subscription_id, capsys):
399407
try:
@@ -404,14 +412,15 @@ def test_inspect_datastore(
404412
topic_id,
405413
subscription_id,
406414
["FIRST_NAME", "EMAIL_ADDRESS", "PHONE_NUMBER"],
407-
timeout=1)
415+
timeout=TIMEOUT)
408416

409417
out, _ = capsys.readouterr()
410-
assert "Inspection operation started" in out
418+
assert "Info type: EMAIL_ADDRESS" in out
411419
finally:
412420
cancel_operation(out)
413421

414422

423+
@pytest.mark.flaky(max_runs=2, min_passes=1)
415424
def test_inspect_datastore_no_results(
416425
datastore_project, topic_id, subscription_id, capsys):
417426
try:
@@ -422,10 +431,10 @@ def test_inspect_datastore_no_results(
422431
topic_id,
423432
subscription_id,
424433
["PHONE_NUMBER"],
425-
timeout=1)
434+
timeout=TIMEOUT)
426435

427436
out, _ = capsys.readouterr()
428-
assert "Inspection operation started" in out
437+
assert "No findings" in out
429438
finally:
430439
cancel_operation(out)
431440

0 commit comments

Comments
 (0)