Skip to content

Commit 91d30e2

Browse files
author
Takashi Matsuo
committed
correct comparison of the job name
* re-enabled some tests * remove delay between retries * appropriate timeout value
1 parent 4c27b63 commit 91d30e2

File tree

4 files changed

+76
-51
lines changed

4 files changed

+76
-51
lines changed

dlp/inspect_content.py

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -486,7 +486,11 @@ def inspect_gcs_file(
486486

487487
def callback(message):
488488
try:
489-
if message.attributes["DlpJobName"] == operation.name:
489+
# The DlpJobName in the Pub/Sub message has the location indicator
490+
# and we need to remove that part for comparison.
491+
dlp_job_name = message.attributes["DlpJobName"].replace(
492+
'/locations/global', '')
493+
if dlp_job_name == operation.name:
490494
# This is the message we're looking for, so acknowledge it.
491495
message.ack()
492496

@@ -650,7 +654,11 @@ def inspect_datastore(
650654

651655
def callback(message):
652656
try:
653-
if message.attributes["DlpJobName"] == operation.name:
657+
# The DlpJobName in the Pub/Sub message has the location indicator
658+
# and we need to remove that part for comparison.
659+
dlp_job_name = message.attributes["DlpJobName"].replace(
660+
'/locations/global', '')
661+
if dlp_job_name == operation.name:
654662
# This is the message we're looking for, so acknowledge it.
655663
message.ack()
656664

@@ -817,7 +825,11 @@ def inspect_bigquery(
817825

818826
def callback(message):
819827
try:
820-
if message.attributes["DlpJobName"] == operation.name:
828+
# The DlpJobName in the Pub/Sub message has the location indicator
829+
# and we need to remove that part for comparison.
830+
dlp_job_name = message.attributes["DlpJobName"].replace(
831+
'/locations/global', '')
832+
if dlp_job_name == operation.name:
821833
# This is the message we're looking for, so acknowledge it.
822834
message.ack()
823835

dlp/inspect_content_test.py

Lines changed: 23 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,8 @@
4040
BIGQUERY_DATASET_ID = "dlp_test_dataset" + UNIQUE_STRING
4141
BIGQUERY_TABLE_ID = "dlp_test_table" + UNIQUE_STRING
4242

43+
TIMEOUT = 180 # 3 minutes
44+
4345

4446
@pytest.fixture(scope="module")
4547
def bucket():
@@ -298,6 +300,7 @@ def cancel_operation(out):
298300
client.cancel_dlp_job(operation_id)
299301

300302

303+
@pytest.mark.flaky(max_runs=2, min_passes=1)
301304
def test_inspect_gcs_file(bucket, topic_id, subscription_id, capsys):
302305
try:
303306
inspect_content.inspect_gcs_file(
@@ -307,15 +310,16 @@ def test_inspect_gcs_file(bucket, topic_id, subscription_id, capsys):
307310
topic_id,
308311
subscription_id,
309312
["EMAIL_ADDRESS", "PHONE_NUMBER"],
310-
timeout=1
313+
timeout=TIMEOUT
311314
)
312315

313316
out, _ = capsys.readouterr()
314-
assert "Inspection operation started" in out
317+
assert "Info type: EMAIL_ADDRESS" in out
315318
finally:
316319
cancel_operation(out)
317320

318321

322+
@pytest.mark.flaky(max_runs=2, min_passes=1)
319323
def test_inspect_gcs_file_with_custom_info_types(
320324
bucket, topic_id, subscription_id, capsys):
321325
try:
@@ -331,15 +335,16 @@ def test_inspect_gcs_file_with_custom_info_types(
331335
[],
332336
custom_dictionaries=dictionaries,
333337
custom_regexes=regexes,
334-
timeout=1)
338+
timeout=TIMEOUT)
335339

336340
out, _ = capsys.readouterr()
337341

338-
assert "Inspection operation started" in out
342+
assert "Info type: EMAIL_ADDRESS" in out
339343
finally:
340344
cancel_operation(out)
341345

342346

347+
@pytest.mark.flaky(max_runs=2, min_passes=1)
343348
def test_inspect_gcs_file_no_results(
344349
bucket, topic_id, subscription_id, capsys):
345350
try:
@@ -350,15 +355,16 @@ def test_inspect_gcs_file_no_results(
350355
topic_id,
351356
subscription_id,
352357
["EMAIL_ADDRESS", "PHONE_NUMBER"],
353-
timeout=1)
358+
timeout=TIMEOUT)
354359

355360
out, _ = capsys.readouterr()
356361

357-
assert "Inspection operation started" in out
362+
assert "No findings" in out
358363
finally:
359364
cancel_operation(out)
360365

361366

367+
@pytest.mark.flaky(max_runs=2, min_passes=1)
362368
def test_inspect_gcs_image_file(bucket, topic_id, subscription_id, capsys):
363369
try:
364370
inspect_content.inspect_gcs_file(
@@ -368,14 +374,15 @@ def test_inspect_gcs_image_file(bucket, topic_id, subscription_id, capsys):
368374
topic_id,
369375
subscription_id,
370376
["EMAIL_ADDRESS", "PHONE_NUMBER"],
371-
timeout=1)
377+
timeout=TIMEOUT)
372378

373379
out, _ = capsys.readouterr()
374-
assert "Inspection operation started" in out
380+
assert "Info type: EMAIL_ADDRESS" in out
375381
finally:
376382
cancel_operation(out)
377383

378384

385+
@pytest.mark.flaky(max_runs=2, min_passes=1)
379386
def test_inspect_gcs_multiple_files(bucket, topic_id, subscription_id, capsys):
380387
try:
381388
inspect_content.inspect_gcs_file(
@@ -385,15 +392,16 @@ def test_inspect_gcs_multiple_files(bucket, topic_id, subscription_id, capsys):
385392
topic_id,
386393
subscription_id,
387394
["EMAIL_ADDRESS", "PHONE_NUMBER"],
388-
timeout=1)
395+
timeout=TIMEOUT)
389396

390397
out, _ = capsys.readouterr()
391398

392-
assert "Inspection operation started" in out
399+
assert "Info type: EMAIL_ADDRESS" in out
393400
finally:
394401
cancel_operation(out)
395402

396403

404+
@pytest.mark.flaky(max_runs=2, min_passes=1)
397405
def test_inspect_datastore(
398406
datastore_project, topic_id, subscription_id, capsys):
399407
try:
@@ -404,14 +412,15 @@ def test_inspect_datastore(
404412
topic_id,
405413
subscription_id,
406414
["FIRST_NAME", "EMAIL_ADDRESS", "PHONE_NUMBER"],
407-
timeout=1)
415+
timeout=TIMEOUT)
408416

409417
out, _ = capsys.readouterr()
410-
assert "Inspection operation started" in out
418+
assert "Info type: EMAIL_ADDRESS" in out
411419
finally:
412420
cancel_operation(out)
413421

414422

423+
@pytest.mark.flaky(max_runs=2, min_passes=1)
415424
def test_inspect_datastore_no_results(
416425
datastore_project, topic_id, subscription_id, capsys):
417426
try:
@@ -422,10 +431,10 @@ def test_inspect_datastore_no_results(
422431
topic_id,
423432
subscription_id,
424433
["PHONE_NUMBER"],
425-
timeout=1)
434+
timeout=TIMEOUT)
426435

427436
out, _ = capsys.readouterr()
428-
assert "Inspection operation started" in out
437+
assert "No findings" in out
429438
finally:
430439
cancel_operation(out)
431440

dlp/risk.py

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,11 @@ def numerical_risk_analysis(
8686
operation = dlp.create_dlp_job(parent, risk_job=risk_job)
8787

8888
def callback(message):
89-
if message.attributes["DlpJobName"] == operation.name:
89+
# The DlpJobName in the Pub/Sub message has the location indicator
90+
# and we need to remove that part for comparison.
91+
dlp_job_name = message.attributes["DlpJobName"].replace(
92+
'/locations/global', '')
93+
if dlp_job_name == operation.name:
9094
# This is the message we're looking for, so acknowledge it.
9195
message.ack()
9296

@@ -196,7 +200,11 @@ def categorical_risk_analysis(
196200
operation = dlp.create_dlp_job(parent, risk_job=risk_job)
197201

198202
def callback(message):
199-
if message.attributes["DlpJobName"] == operation.name:
203+
# The DlpJobName in the Pub/Sub message has the location indicator
204+
# and we need to remove that part for comparison.
205+
dlp_job_name = message.attributes["DlpJobName"].replace(
206+
'/locations/global', '')
207+
if dlp_job_name == operation.name:
200208
# This is the message we're looking for, so acknowledge it.
201209
message.ack()
202210

@@ -324,7 +332,11 @@ def map_fields(field):
324332
operation = dlp.create_dlp_job(parent, risk_job=risk_job)
325333

326334
def callback(message):
327-
if message.attributes["DlpJobName"] == operation.name:
335+
# The DlpJobName in the Pub/Sub message has the location indicator
336+
# and we need to remove that part for comparison.
337+
dlp_job_name = message.attributes["DlpJobName"].replace(
338+
'/locations/global', '')
339+
if dlp_job_name == operation.name:
328340
# This is the message we're looking for, so acknowledge it.
329341
message.ack()
330342

@@ -460,7 +472,11 @@ def map_fields(field):
460472
operation = dlp.create_dlp_job(parent, risk_job=risk_job)
461473

462474
def callback(message):
463-
if message.attributes["DlpJobName"] == operation.name:
475+
# The DlpJobName in the Pub/Sub message has the location indicator
476+
# and we need to remove that part for comparison.
477+
dlp_job_name = message.attributes["DlpJobName"].replace(
478+
'/locations/global', '')
479+
if dlp_job_name == operation.name:
464480
# This is the message we're looking for, so acknowledge it.
465481
message.ack()
466482

@@ -617,7 +633,11 @@ def map_fields(quasi_id, info_type):
617633
operation = dlp.create_dlp_job(parent, risk_job=risk_job)
618634

619635
def callback(message):
620-
if message.attributes["DlpJobName"] == operation.name:
636+
# The DlpJobName in the Pub/Sub message has the location indicator
637+
# and we need to remove that part for comparison.
638+
dlp_job_name = message.attributes["DlpJobName"].replace(
639+
'/locations/global', '')
640+
if dlp_job_name == operation.name:
621641
# This is the message we're looking for, so acknowledge it.
622642
message.ack()
623643

dlp/risk_test.py

Lines changed: 13 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
# limitations under the License.
1414

1515
import os
16-
import time
1716
import uuid
1817

1918
import google.cloud.bigquery
@@ -37,14 +36,14 @@
3736
BIGQUERY_TABLE_ID = "dlp_test_table" + UNIQUE_STRING
3837
BIGQUERY_HARMFUL_TABLE_ID = "harmful" + UNIQUE_STRING
3938

40-
TIMEOUT = 180 # 3 minutes
39+
TIMEOUT = 60 # 1 minutes
4140

4241

4342
# Create new custom topic/subscription
4443
# We observe sometimes all the tests in this file fail. In a
4544
# hypothesis where DLP service somehow loses the connection to the
4645
# topic, now we use function scope for Pub/Sub fixtures.
47-
@pytest.fixture(scope="function")
46+
@pytest.fixture(scope="module")
4847
def topic_id():
4948
# Creates a pubsub topic, and tears it down.
5049
publisher = google.cloud.pubsub.PublisherClient()
@@ -59,7 +58,7 @@ def topic_id():
5958
publisher.delete_topic(topic_path)
6059

6160

62-
@pytest.fixture(scope="function")
61+
@pytest.fixture(scope="module")
6362
def subscription_id(topic_id):
6463
# Subscribes to a topic.
6564
subscriber = google.cloud.pubsub.SubscriberClient()
@@ -166,22 +165,7 @@ def bigquery_project():
166165
bigquery_client.delete_dataset(dataset_ref, delete_contents=True)
167166

168167

169-
def delay(err, *args):
170-
# 20 mins of delay. This sounds like too long a delay, but we
171-
# occasionally observe consequtive time block where operations are
172-
# slow which leads to the test failures. These situations tend to
173-
# get self healed in 20 minutes or so, so I'm trying this strategy.
174-
#
175-
# There are 10 tests, so we don't want the retry delay happening
176-
# for all the tests. When we exhaust the MAX_FLAKY_WAIT, we retry
177-
# the test immediately.
178-
wait_time = min(pytest.MAX_FLAKY_WAIT, 60*20)
179-
pytest.MAX_FLAKY_WAIT -= wait_time
180-
time.sleep(wait_time)
181-
return True
182-
183-
184-
@pytest.mark.flaky(max_runs=2, min_passes=1, rerun_filter=delay)
168+
@pytest.mark.flaky(max_runs=2, min_passes=1)
185169
def test_numerical_risk_analysis(
186170
topic_id, subscription_id, bigquery_project, capsys
187171
):
@@ -200,7 +184,7 @@ def test_numerical_risk_analysis(
200184
assert "Value Range:" in out
201185

202186

203-
@pytest.mark.flaky(max_runs=2, min_passes=1, rerun_filter=delay)
187+
@pytest.mark.flaky(max_runs=2, min_passes=1)
204188
def test_categorical_risk_analysis_on_string_field(
205189
topic_id, subscription_id, bigquery_project, capsys
206190
):
@@ -219,7 +203,7 @@ def test_categorical_risk_analysis_on_string_field(
219203
assert "Most common value occurs" in out
220204

221205

222-
@pytest.mark.flaky(max_runs=2, min_passes=1, rerun_filter=delay)
206+
@pytest.mark.flaky(max_runs=2, min_passes=1)
223207
def test_categorical_risk_analysis_on_number_field(
224208
topic_id, subscription_id, bigquery_project, capsys
225209
):
@@ -238,7 +222,7 @@ def test_categorical_risk_analysis_on_number_field(
238222
assert "Most common value occurs" in out
239223

240224

241-
@pytest.mark.flaky(max_runs=2, min_passes=1, rerun_filter=delay)
225+
@pytest.mark.flaky(max_runs=2, min_passes=1)
242226
def test_k_anonymity_analysis_single_field(
243227
topic_id, subscription_id, bigquery_project, capsys
244228
):
@@ -258,7 +242,7 @@ def test_k_anonymity_analysis_single_field(
258242
assert "Class size:" in out
259243

260244

261-
@pytest.mark.flaky(max_runs=2, min_passes=1, rerun_filter=delay)
245+
@pytest.mark.flaky(max_runs=2, min_passes=1)
262246
def test_k_anonymity_analysis_multiple_fields(
263247
topic_id, subscription_id, bigquery_project, capsys
264248
):
@@ -278,7 +262,7 @@ def test_k_anonymity_analysis_multiple_fields(
278262
assert "Class size:" in out
279263

280264

281-
@pytest.mark.flaky(max_runs=2, min_passes=1, rerun_filter=delay)
265+
@pytest.mark.flaky(max_runs=2, min_passes=1)
282266
def test_l_diversity_analysis_single_field(
283267
topic_id, subscription_id, bigquery_project, capsys
284268
):
@@ -300,7 +284,7 @@ def test_l_diversity_analysis_single_field(
300284
assert "Sensitive value" in out
301285

302286

303-
@pytest.mark.flaky(max_runs=2, min_passes=1, rerun_filter=delay)
287+
@pytest.mark.flaky(max_runs=2, min_passes=1)
304288
def test_l_diversity_analysis_multiple_field(
305289
topic_id, subscription_id, bigquery_project, capsys
306290
):
@@ -322,7 +306,7 @@ def test_l_diversity_analysis_multiple_field(
322306
assert "Sensitive value" in out
323307

324308

325-
@pytest.mark.flaky(max_runs=2, min_passes=1, rerun_filter=delay)
309+
@pytest.mark.flaky(max_runs=2, min_passes=1)
326310
def test_k_map_estimate_analysis_single_field(
327311
topic_id, subscription_id, bigquery_project, capsys
328312
):
@@ -344,7 +328,7 @@ def test_k_map_estimate_analysis_single_field(
344328
assert "Values" in out
345329

346330

347-
@pytest.mark.flaky(max_runs=2, min_passes=1, rerun_filter=delay)
331+
@pytest.mark.flaky(max_runs=2, min_passes=1)
348332
def test_k_map_estimate_analysis_multiple_field(
349333
topic_id, subscription_id, bigquery_project, capsys
350334
):
@@ -366,7 +350,7 @@ def test_k_map_estimate_analysis_multiple_field(
366350
assert "Values" in out
367351

368352

369-
@pytest.mark.flaky(max_runs=2, min_passes=1, rerun_filter=delay)
353+
@pytest.mark.flaky(max_runs=2, min_passes=1)
370354
def test_k_map_estimate_analysis_quasi_ids_info_types_equal(
371355
topic_id, subscription_id, bigquery_project
372356
):

0 commit comments

Comments
 (0)