21
21
22
22
# [START dlp_deidentify_masking]
23
23
def deidentify_with_mask (
24
- project , string , info_types , masking_character = None , number_to_mask = 0
24
+ project , input_str , info_types , masking_character = None , number_to_mask = 0
25
25
):
26
26
"""Uses the Data Loss Prevention API to deidentify sensitive data in a
27
27
string by masking it with a character.
28
28
Args:
29
29
project: The Google Cloud project id to use as a parent resource.
30
- item : The string to deidentify (will be treated as text).
30
+ input_str : The string to deidentify (will be treated as text).
31
31
masking_character: The character to mask matching sensitive data with.
32
32
number_to_mask: The maximum number of sensitive characters to mask in
33
33
a match. If omitted or set to zero, the API will default to no
@@ -67,7 +67,7 @@ def deidentify_with_mask(
67
67
}
68
68
69
69
# Construct item
70
- item = {"value" : string }
70
+ item = {"value" : input_str }
71
71
72
72
# Call the API
73
73
response = dlp .deidentify_content (
@@ -83,11 +83,76 @@ def deidentify_with_mask(
83
83
84
84
# [END dlp_deidentify_masking]
85
85
86
+ # [START dlp_deidentify_replace]
87
+ def deidentify_with_replace (
88
+ project ,
89
+ input_str ,
90
+ info_types ,
91
+ replacement_str = "REPLACEMENT_STR" ,
92
+ ):
93
+ """Uses the Data Loss Prevention API to deidentify sensitive data in a
94
+ string by replacing matched input values with a value you specify.
95
+ Args:
96
+ project: The Google Cloud project id to use as a parent resource.
97
+ input_str: The string to deidentify (will be treated as text).
98
+ info_types: A list of strings representing info types to look for.
99
+ replacement_str: The string to replace all values that match given
100
+ info types.
101
+ Returns:
102
+ None; the response from the API is printed to the terminal.
103
+ """
104
+ import google .cloud .dlp
105
+
106
+ # Instantiate a client
107
+ dlp = google .cloud .dlp_v2 .DlpServiceClient ()
108
+
109
+ # Convert the project id into a full resource id.
110
+ parent = dlp .project_path (project )
111
+
112
+ # Construct inspect configuration dictionary
113
+ inspect_config = {
114
+ "info_types" : [{"name" : info_type } for info_type in info_types ]
115
+ }
116
+
117
+ # Construct deidentify configuration dictionary
118
+ deidentify_config = {
119
+ "info_type_transformations" : {
120
+ "transformations" : [
121
+ {
122
+ "primitive_transformation" : {
123
+ "replace_config" : {
124
+ "new_value" : {
125
+ "string_value" : replacement_str ,
126
+ }
127
+ }
128
+ }
129
+ }
130
+ ]
131
+ }
132
+ }
133
+
134
+ # Construct item
135
+ item = {"value" : input_str }
136
+
137
+ # Call the API
138
+ response = dlp .deidentify_content (
139
+ parent ,
140
+ inspect_config = inspect_config ,
141
+ deidentify_config = deidentify_config ,
142
+ item = item ,
143
+ )
144
+
145
+ # Print out the results.
146
+ print (response .item .value )
147
+
148
+ # [END dlp_deidentify_replace]
86
149
87
150
# [START dlp_deidentify_fpe]
151
+
152
+
88
153
def deidentify_with_fpe (
89
154
project ,
90
- string ,
155
+ input_str ,
91
156
info_types ,
92
157
alphabet = None ,
93
158
surrogate_type = None ,
@@ -98,7 +163,7 @@ def deidentify_with_fpe(
98
163
string using Format Preserving Encryption (FPE).
99
164
Args:
100
165
project: The Google Cloud project id to use as a parent resource.
101
- item : The string to deidentify (will be treated as text).
166
+ input_str : The string to deidentify (will be treated as text).
102
167
alphabet: The set of characters to replace sensitive ones with. For
103
168
more information, see https://cloud.google.com/dlp/docs/reference/
104
169
rest/v2beta2/organizations.deidentifyTemplates#ffxcommonnativealphabet
@@ -166,7 +231,7 @@ def deidentify_with_fpe(
166
231
}
167
232
168
233
# Convert string to item
169
- item = {"value" : string }
234
+ item = {"value" : input_str }
170
235
171
236
# Call the API
172
237
response = dlp .deidentify_content (
@@ -186,7 +251,7 @@ def deidentify_with_fpe(
186
251
# [START dlp_reidentify_fpe]
187
252
def reidentify_with_fpe (
188
253
project ,
189
- string ,
254
+ input_str ,
190
255
alphabet = None ,
191
256
surrogate_type = None ,
192
257
key_name = None ,
@@ -196,7 +261,7 @@ def reidentify_with_fpe(
196
261
string that was encrypted by Format Preserving Encryption (FPE).
197
262
Args:
198
263
project: The Google Cloud project id to use as a parent resource.
199
- item : The string to deidentify (will be treated as text).
264
+ input_str : The string to deidentify (will be treated as text).
200
265
alphabet: The set of characters to replace sensitive ones with. For
201
266
more information, see https://cloud.google.com/dlp/docs/reference/
202
267
rest/v2beta2/organizations.deidentifyTemplates#ffxcommonnativealphabet
@@ -255,7 +320,7 @@ def reidentify_with_fpe(
255
320
}
256
321
257
322
# Convert string to item
258
- item = {"value" : string }
323
+ item = {"value" : input_str }
259
324
260
325
# Call the API
261
326
response = dlp .reidentify_content (
@@ -531,6 +596,28 @@ def redact_sensitive_data(project, item, info_types):
531
596
help = "The character to mask matching sensitive data with." ,
532
597
)
533
598
599
+ replace_parser = subparsers .add_parser (
600
+ "deid_replace" ,
601
+ help = "Deidentify sensitive data in a string by replacing it with "
602
+ "another string." ,
603
+ )
604
+ replace_parser .add_argument (
605
+ "--info_types" ,
606
+ nargs = "+" ,
607
+ help = "Strings representing info types to look for. A full list of "
608
+ "info categories and types is available from the API. Examples "
609
+ 'include "FIRST_NAME", "LAST_NAME", "EMAIL_ADDRESS". '
610
+ "If unspecified, the three above examples will be used." ,
611
+ default = ["FIRST_NAME" , "LAST_NAME" , "EMAIL_ADDRESS" ],
612
+ )
613
+ replace_parser .add_argument (
614
+ "project" ,
615
+ help = "The Google Cloud project id to use as a parent resource." ,
616
+ )
617
+ replace_parser .add_argument ("item" , help = "The string to deidentify." )
618
+ replace_parser .add_argument ("replacement_str" , help = "The string to "
619
+ "replace all matched values with." )
620
+
534
621
fpe_parser = subparsers .add_parser (
535
622
"deid_fpe" ,
536
623
help = "Deidentify sensitive data in a string using Format Preserving "
@@ -715,6 +802,13 @@ def redact_sensitive_data(project, item, info_types):
715
802
masking_character = args .masking_character ,
716
803
number_to_mask = args .number_to_mask ,
717
804
)
805
+ elif args .content == "deid_replace" :
806
+ deidentify_with_replace (
807
+ args .project ,
808
+ args .item ,
809
+ args .info_types ,
810
+ replacement_str = args .replacement_str ,
811
+ )
718
812
elif args .content == "deid_fpe" :
719
813
deidentify_with_fpe (
720
814
args .project ,
0 commit comments