Skip to content

Commit 0965e26

Browse files
author
Takashi Matsuo
authored
[dlp] fix: fix periodic builds timeout (#3420)
* [dlp] fix: remove gcp-devrel-py-tools fixes #3375 fixes #3416 fixes #3417 * remove wrong usage of `eventually_consistent.call` * only test if the operation has been started * shorter timeout for polling * correct use of `pytest.mark.flaky` * use try-finally * use uuid for job_id * add a filter to allow state = DONE
1 parent b57b562 commit 0965e26

File tree

6 files changed

+298
-294
lines changed

6 files changed

+298
-294
lines changed

dlp/inspect_content.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -474,6 +474,7 @@ def inspect_gcs_file(
474474

475475
operation = dlp.create_dlp_job(parent, inspect_job=inspect_job)
476476
print("Inspection operation started: {}".format(operation.name))
477+
477478
# Create a Pub/Sub client and find the subscription. The subscription is
478479
# expected to already be listening to the topic.
479480
subscriber = google.cloud.pubsub.SubscriberClient()
@@ -636,6 +637,7 @@ def inspect_datastore(
636637
}
637638

638639
operation = dlp.create_dlp_job(parent, inspect_job=inspect_job)
640+
print("Inspection operation started: {}".format(operation.name))
639641

640642
# Create a Pub/Sub client and find the subscription. The subscription is
641643
# expected to already be listening to the topic.
@@ -802,6 +804,7 @@ def inspect_bigquery(
802804
}
803805

804806
operation = dlp.create_dlp_job(parent, inspect_job=inspect_job)
807+
print("Inspection operation started: {}".format(operation.name))
805808

806809
# Create a Pub/Sub client and find the subscription. The subscription is
807810
# expected to already be listening to the topic.

dlp/inspect_content_test.py

Lines changed: 113 additions & 110 deletions
Original file line numberDiff line numberDiff line change
@@ -15,19 +15,18 @@
1515
import os
1616
import uuid
1717

18-
from gcp_devrel.testing import eventually_consistent
19-
from gcp_devrel.testing.flaky import flaky
2018
import google.api_core.exceptions
2119
import google.cloud.bigquery
2220
import google.cloud.datastore
2321
import google.cloud.dlp_v2
2422
import google.cloud.exceptions
2523
import google.cloud.pubsub
2624
import google.cloud.storage
27-
2825
import pytest
26+
2927
import inspect_content
3028

29+
3130
UNIQUE_STRING = str(uuid.uuid4()).split("-")[0]
3231

3332
GCLOUD_PROJECT = os.getenv("GCLOUD_PROJECT")
@@ -95,7 +94,8 @@ def subscription_id(topic_id):
9594
# Subscribes to a topic.
9695
subscriber = google.cloud.pubsub.SubscriberClient()
9796
topic_path = subscriber.topic_path(GCLOUD_PROJECT, topic_id)
98-
subscription_path = subscriber.subscription_path(GCLOUD_PROJECT, SUBSCRIPTION_ID)
97+
subscription_path = subscriber.subscription_path(
98+
GCLOUD_PROJECT, SUBSCRIPTION_ID)
9999
try:
100100
subscriber.create_subscription(subscription_path, topic_path)
101101
except google.api_core.exceptions.AlreadyExists:
@@ -289,157 +289,160 @@ def test_inspect_image_file(capsys):
289289
assert "Info type: PHONE_NUMBER" in out
290290

291291

292+
def cancel_operation(out):
293+
if "Inspection operation started" in out:
294+
# Cancel the operation
295+
operation_id = out.split(
296+
"Inspection operation started: ")[1].split("\n")[0]
297+
client = google.cloud.dlp_v2.DlpServiceClient()
298+
client.cancel_dlp_job(operation_id)
299+
300+
292301
def test_inspect_gcs_file(bucket, topic_id, subscription_id, capsys):
293-
inspect_content.inspect_gcs_file(
294-
GCLOUD_PROJECT,
295-
bucket.name,
296-
"test.txt",
297-
topic_id,
298-
subscription_id,
299-
["EMAIL_ADDRESS", "PHONE_NUMBER"],
300-
timeout=420,
301-
)
302+
try:
303+
inspect_content.inspect_gcs_file(
304+
GCLOUD_PROJECT,
305+
bucket.name,
306+
"test.txt",
307+
topic_id,
308+
subscription_id,
309+
["EMAIL_ADDRESS", "PHONE_NUMBER"],
310+
timeout=1
311+
)
302312

303-
out, _ = capsys.readouterr()
304-
assert "Inspection operation started" in out
305-
# Cancel the operation
306-
operation_id = out.split("Inspection operation started: ")[1].split("\n")[0]
307-
print(operation_id)
308-
client = google.cloud.dlp_v2.DlpServiceClient()
309-
client.cancel_dlp_job(operation_id)
313+
out, _ = capsys.readouterr()
314+
assert "Inspection operation started" in out
315+
finally:
316+
cancel_operation(out)
310317

311318

312319
def test_inspect_gcs_file_with_custom_info_types(
313-
bucket, topic_id, subscription_id, capsys
314-
):
315-
dictionaries = ["[email protected]"]
316-
regexes = ["\\(\\d{3}\\) \\d{3}-\\d{4}"]
320+
bucket, topic_id, subscription_id, capsys):
321+
try:
322+
dictionaries = ["[email protected]"]
323+
regexes = ["\\(\\d{3}\\) \\d{3}-\\d{4}"]
317324

318-
inspect_content.inspect_gcs_file(
319-
GCLOUD_PROJECT,
320-
bucket.name,
321-
"test.txt",
322-
topic_id,
323-
subscription_id,
324-
[],
325-
custom_dictionaries=dictionaries,
326-
custom_regexes=regexes,
327-
timeout=420,
328-
)
325+
inspect_content.inspect_gcs_file(
326+
GCLOUD_PROJECT,
327+
bucket.name,
328+
"test.txt",
329+
topic_id,
330+
subscription_id,
331+
[],
332+
custom_dictionaries=dictionaries,
333+
custom_regexes=regexes,
334+
timeout=1)
329335

330-
out, _ = capsys.readouterr()
336+
out, _ = capsys.readouterr()
331337

332-
assert "Inspection operation started" in out
333-
# Cancel the operation
334-
operation_id = out.split("Inspection operation started: ")[1].split("\n")[0]
335-
print(operation_id)
336-
client = google.cloud.dlp_v2.DlpServiceClient()
337-
client.cancel_dlp_job(operation_id)
338+
assert "Inspection operation started" in out
339+
finally:
340+
cancel_operation(out)
338341

339342

340-
def test_inspect_gcs_file_no_results(bucket, topic_id, subscription_id, capsys):
341-
inspect_content.inspect_gcs_file(
342-
GCLOUD_PROJECT,
343-
bucket.name,
344-
"harmless.txt",
345-
topic_id,
346-
subscription_id,
347-
["EMAIL_ADDRESS", "PHONE_NUMBER"],
348-
timeout=420,
349-
)
343+
def test_inspect_gcs_file_no_results(
344+
bucket, topic_id, subscription_id, capsys):
345+
try:
346+
inspect_content.inspect_gcs_file(
347+
GCLOUD_PROJECT,
348+
bucket.name,
349+
"harmless.txt",
350+
topic_id,
351+
subscription_id,
352+
["EMAIL_ADDRESS", "PHONE_NUMBER"],
353+
timeout=1)
350354

351-
out, _ = capsys.readouterr()
355+
out, _ = capsys.readouterr()
352356

353-
assert "Inspection operation started" in out
354-
# Cancel the operation
355-
operation_id = out.split("Inspection operation started: ")[1].split("\n")[0]
356-
print(operation_id)
357-
client = google.cloud.dlp_v2.DlpServiceClient()
358-
client.cancel_dlp_job(operation_id)
357+
assert "Inspection operation started" in out
358+
finally:
359+
cancel_operation(out)
359360

360361

361-
@pytest.mark.skip(reason="nondeterministically failing")
362362
def test_inspect_gcs_image_file(bucket, topic_id, subscription_id, capsys):
363-
inspect_content.inspect_gcs_file(
364-
GCLOUD_PROJECT,
365-
bucket.name,
366-
"test.png",
367-
topic_id,
368-
subscription_id,
369-
["EMAIL_ADDRESS", "PHONE_NUMBER"],
370-
)
363+
try:
364+
inspect_content.inspect_gcs_file(
365+
GCLOUD_PROJECT,
366+
bucket.name,
367+
"test.png",
368+
topic_id,
369+
subscription_id,
370+
["EMAIL_ADDRESS", "PHONE_NUMBER"],
371+
timeout=1)
371372

372-
out, _ = capsys.readouterr()
373-
assert "Info type: EMAIL_ADDRESS" in out
373+
out, _ = capsys.readouterr()
374+
assert "Inspection operation started" in out
375+
finally:
376+
cancel_operation(out)
374377

375378

376379
def test_inspect_gcs_multiple_files(bucket, topic_id, subscription_id, capsys):
377-
inspect_content.inspect_gcs_file(
378-
GCLOUD_PROJECT,
379-
bucket.name,
380-
"*",
381-
topic_id,
382-
subscription_id,
383-
["EMAIL_ADDRESS", "PHONE_NUMBER"],
384-
)
380+
try:
381+
inspect_content.inspect_gcs_file(
382+
GCLOUD_PROJECT,
383+
bucket.name,
384+
"*",
385+
topic_id,
386+
subscription_id,
387+
["EMAIL_ADDRESS", "PHONE_NUMBER"],
388+
timeout=1)
385389

386-
out, _ = capsys.readouterr()
390+
out, _ = capsys.readouterr()
387391

388-
assert "Inspection operation started" in out
389-
# Cancel the operation
390-
operation_id = out.split("Inspection operation started: ")[1].split("\n")[0]
391-
print(operation_id)
392-
client = google.cloud.dlp_v2.DlpServiceClient()
393-
client.cancel_dlp_job(operation_id)
392+
assert "Inspection operation started" in out
393+
finally:
394+
cancel_operation(out)
394395

395396

396-
@flaky
397-
def test_inspect_datastore(datastore_project, topic_id, subscription_id, capsys):
398-
@eventually_consistent.call
399-
def _():
397+
def test_inspect_datastore(
398+
datastore_project, topic_id, subscription_id, capsys):
399+
try:
400400
inspect_content.inspect_datastore(
401401
GCLOUD_PROJECT,
402402
datastore_project,
403403
DATASTORE_KIND,
404404
topic_id,
405405
subscription_id,
406406
["FIRST_NAME", "EMAIL_ADDRESS", "PHONE_NUMBER"],
407-
)
407+
timeout=1)
408408

409409
out, _ = capsys.readouterr()
410-
assert "Info type: EMAIL_ADDRESS" in out
410+
assert "Inspection operation started" in out
411+
finally:
412+
cancel_operation(out)
411413

412414

413-
@flaky
414415
def test_inspect_datastore_no_results(
415-
datastore_project, topic_id, subscription_id, capsys
416-
):
417-
@eventually_consistent.call
418-
def _():
416+
datastore_project, topic_id, subscription_id, capsys):
417+
try:
419418
inspect_content.inspect_datastore(
420419
GCLOUD_PROJECT,
421420
datastore_project,
422421
DATASTORE_KIND,
423422
topic_id,
424423
subscription_id,
425424
["PHONE_NUMBER"],
426-
)
425+
timeout=1)
427426

428427
out, _ = capsys.readouterr()
429-
assert "No findings" in out
428+
assert "Inspection operation started" in out
429+
finally:
430+
cancel_operation(out)
430431

431432

432-
@pytest.mark.skip(reason="unknown issue")
433433
def test_inspect_bigquery(bigquery_project, topic_id, subscription_id, capsys):
434-
inspect_content.inspect_bigquery(
435-
GCLOUD_PROJECT,
436-
bigquery_project,
437-
BIGQUERY_DATASET_ID,
438-
BIGQUERY_TABLE_ID,
439-
topic_id,
440-
subscription_id,
441-
["FIRST_NAME", "EMAIL_ADDRESS", "PHONE_NUMBER"],
442-
)
434+
try:
435+
inspect_content.inspect_bigquery(
436+
GCLOUD_PROJECT,
437+
bigquery_project,
438+
BIGQUERY_DATASET_ID,
439+
BIGQUERY_TABLE_ID,
440+
topic_id,
441+
subscription_id,
442+
["FIRST_NAME", "EMAIL_ADDRESS", "PHONE_NUMBER"],
443+
timeout=1)
443444

444-
out, _ = capsys.readouterr()
445-
assert "Info type: FIRST_NAME" in out
445+
out, _ = capsys.readouterr()
446+
assert "Inspection operation started" in out
447+
finally:
448+
cancel_operation(out)

dlp/jobs_test.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
# limitations under the License.
1414

1515
import os
16-
from flaky import flaky
16+
import uuid
1717

1818
import pytest
1919

@@ -24,6 +24,7 @@
2424
TEST_TABLE_PROJECT_ID = "bigquery-public-data"
2525
TEST_DATASET_ID = "san_francisco"
2626
TEST_TABLE_ID = "bikeshare_trips"
27+
test_job_id = "test-job-{}".format(uuid.uuid4())
2728

2829

2930
@pytest.fixture(scope="module")
@@ -46,7 +47,7 @@ def test_job_name():
4647
},
4748
}
4849

49-
response = dlp.create_dlp_job(parent, risk_job=risk_job)
50+
response = dlp.create_dlp_job(parent, risk_job=risk_job, job_id=test_job_id)
5051
full_path = response.name
5152
# API expects only job name, not full project path
5253
job_name = full_path[full_path.rfind("/") + 1:]
@@ -66,11 +67,10 @@ def test_list_dlp_jobs(test_job_name, capsys):
6667
assert test_job_name not in out
6768

6869

69-
@flaky
7070
def test_list_dlp_jobs_with_filter(test_job_name, capsys):
7171
jobs.list_dlp_jobs(
7272
GCLOUD_PROJECT,
73-
filter_string="state=RUNNING",
73+
filter_string="state=RUNNING OR state=DONE",
7474
job_type="RISK_ANALYSIS_JOB",
7575
)
7676

dlp/requirements-test.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
pytest==5.3.2
2-
gcp-devrel-py-tools==0.0.15
32
flaky==3.6.1
43
mock==3.0.5
4+

0 commit comments

Comments
 (0)