Skip to content

Commit 66e457f

Browse files
billyjacobsonleahecolecrwilcox
authored
bigtable: read and filter snippets (#2707)
* Bigtable write samples * Cleaning up test * Fixing lint issues * Fixing imports in test * Cleaning up samples and showing error handling * removing note about the row commit bug * Add fixture to write test * Read snippets WIP * Cleanup bigtable python: Use new row types for mutations Update bigtable version in requirements Delete table after tests * Change bigtable cluster variable to bigtable instance for consistency Create and delete quickstart table during test * Fixing step size for metric scaler Create unique tables for quickstart tests * Creating fixtures for quickstart tests Fixing hb quickstart test output * Fix quickstart extra delete table Update happybase to use direct row * Use clearer instance names for tests Create unique instances for metric scaler tests * Linting * get session issue in test sorted out * Read snippets with tests working * Filter snippets with tests working * Lint * Update module import * Fix bigtable instance env var * Change scope to module * Don't print empty parens * sort cols * sort by cfs too * Make requirements more specific to samples. LInt fixes Co-authored-by: Leah E. Cole <[email protected]> Co-authored-by: Christopher Wilcox <[email protected]>
1 parent de5b359 commit 66e457f

File tree

10 files changed

+1523
-0
lines changed

10 files changed

+1523
-0
lines changed
Lines changed: 360 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,360 @@
1+
#!/usr/bin/env python
2+
3+
# Copyright 2020, Google LLC
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
16+
# [START bigtable_filters_limit_row_sample]
17+
# [START bigtable_filters_limit_row_regex]
18+
# [START bigtable_filters_limit_cells_per_col]
19+
# [START bigtable_filters_limit_cells_per_row]
20+
# [START bigtable_filters_limit_cells_per_row_offset]
21+
# [START bigtable_filters_limit_col_family_regex]
22+
# [START bigtable_filters_limit_col_qualifier_regex]
23+
# [START bigtable_filters_limit_col_range]
24+
# [START bigtable_filters_limit_value_range]
25+
# [START bigtable_filters_limit_value_regex]
26+
# [START bigtable_filters_limit_timestamp_range]
27+
# [START bigtable_filters_limit_block_all]
28+
# [START bigtable_filters_limit_pass_all]
29+
# [START bigtable_filters_modify_strip_value]
30+
# [START bigtable_filters_modify_apply_label]
31+
# [START bigtable_filters_composing_chain]
32+
# [START bigtable_filters_composing_interleave]
33+
# [START bigtable_filters_composing_condition]
34+
from google.cloud import bigtable
35+
import google.cloud.bigtable.row_filters as row_filters
36+
37+
# [END bigtable_filters_limit_row_sample]
38+
# [END bigtable_filters_limit_row_regex]
39+
# [END bigtable_filters_limit_cells_per_col]
40+
# [END bigtable_filters_limit_cells_per_row]
41+
# [END bigtable_filters_limit_cells_per_row_offset]
42+
# [END bigtable_filters_limit_col_family_regex]
43+
# [END bigtable_filters_limit_col_qualifier_regex]
44+
# [END bigtable_filters_limit_col_range]
45+
# [END bigtable_filters_limit_value_range]
46+
# [END bigtable_filters_limit_value_regex]
47+
# [END bigtable_filters_limit_timestamp_range]
48+
# [END bigtable_filters_limit_block_all]
49+
# [END bigtable_filters_limit_pass_all]
50+
# [END bigtable_filters_modify_strip_value]
51+
# [END bigtable_filters_modify_apply_label]
52+
# [END bigtable_filters_composing_chain]
53+
# [END bigtable_filters_composing_interleave]
54+
# [END bigtable_filters_composing_condition]
55+
56+
# [START bigtable_filters_limit_timestamp_range]
57+
import datetime
58+
59+
60+
# [END bigtable_filters_limit_timestamp_range]
61+
62+
# [START bigtable_filters_limit_row_sample]
63+
def filter_limit_row_sample(project_id, instance_id, table_id):
64+
client = bigtable.Client(project=project_id, admin=True)
65+
instance = client.instance(instance_id)
66+
table = instance.table(table_id)
67+
68+
rows = table.read_rows(filter_=row_filters.RowSampleFilter(.75))
69+
for row in rows:
70+
print_row(row)
71+
72+
73+
# [END bigtable_filters_limit_row_sample]
74+
# [START bigtable_filters_limit_row_regex]
75+
def filter_limit_row_regex(project_id, instance_id, table_id):
76+
client = bigtable.Client(project=project_id, admin=True)
77+
instance = client.instance(instance_id)
78+
table = instance.table(table_id)
79+
80+
rows = table.read_rows(
81+
filter_=row_filters.RowKeyRegexFilter(".*#20190501$".encode("utf-8")))
82+
for row in rows:
83+
print_row(row)
84+
85+
86+
# [END bigtable_filters_limit_row_regex]
87+
# [START bigtable_filters_limit_cells_per_col]
88+
def filter_limit_cells_per_col(project_id, instance_id, table_id):
89+
client = bigtable.Client(project=project_id, admin=True)
90+
instance = client.instance(instance_id)
91+
table = instance.table(table_id)
92+
93+
rows = table.read_rows(filter_=row_filters.CellsColumnLimitFilter(2))
94+
for row in rows:
95+
print_row(row)
96+
97+
98+
# [END bigtable_filters_limit_cells_per_col]
99+
# [START bigtable_filters_limit_cells_per_row]
100+
def filter_limit_cells_per_row(project_id, instance_id, table_id):
101+
client = bigtable.Client(project=project_id, admin=True)
102+
instance = client.instance(instance_id)
103+
table = instance.table(table_id)
104+
105+
rows = table.read_rows(filter_=row_filters.CellsRowLimitFilter(2))
106+
for row in rows:
107+
print_row(row)
108+
109+
110+
# [END bigtable_filters_limit_cells_per_row]
111+
# [START bigtable_filters_limit_cells_per_row_offset]
112+
def filter_limit_cells_per_row_offset(project_id, instance_id, table_id):
113+
client = bigtable.Client(project=project_id, admin=True)
114+
instance = client.instance(instance_id)
115+
table = instance.table(table_id)
116+
117+
rows = table.read_rows(filter_=row_filters.CellsRowOffsetFilter(2))
118+
for row in rows:
119+
print_row(row)
120+
121+
122+
# [END bigtable_filters_limit_cells_per_row_offset]
123+
# [START bigtable_filters_limit_col_family_regex]
124+
def filter_limit_col_family_regex(project_id, instance_id, table_id):
125+
client = bigtable.Client(project=project_id, admin=True)
126+
instance = client.instance(instance_id)
127+
table = instance.table(table_id)
128+
129+
rows = table.read_rows(
130+
filter_=row_filters.FamilyNameRegexFilter("stats_.*$".encode("utf-8")))
131+
for row in rows:
132+
print_row(row)
133+
134+
135+
# [END bigtable_filters_limit_col_family_regex]
136+
# [START bigtable_filters_limit_col_qualifier_regex]
137+
def filter_limit_col_qualifier_regex(project_id, instance_id, table_id):
138+
client = bigtable.Client(project=project_id, admin=True)
139+
instance = client.instance(instance_id)
140+
table = instance.table(table_id)
141+
142+
rows = table.read_rows(
143+
filter_=row_filters.ColumnQualifierRegexFilter(
144+
"connected_.*$".encode("utf-8")))
145+
for row in rows:
146+
print_row(row)
147+
148+
149+
# [END bigtable_filters_limit_col_qualifier_regex]
150+
# [START bigtable_filters_limit_col_range]
151+
def filter_limit_col_range(project_id, instance_id, table_id):
152+
client = bigtable.Client(project=project_id, admin=True)
153+
instance = client.instance(instance_id)
154+
table = instance.table(table_id)
155+
156+
rows = table.read_rows(
157+
filter_=row_filters.ColumnRangeFilter("cell_plan",
158+
b"data_plan_01gb",
159+
b"data_plan_10gb",
160+
inclusive_end=False))
161+
for row in rows:
162+
print_row(row)
163+
164+
165+
# [END bigtable_filters_limit_col_range]
166+
# [START bigtable_filters_limit_value_range]
167+
def filter_limit_value_range(project_id, instance_id, table_id):
168+
client = bigtable.Client(project=project_id, admin=True)
169+
instance = client.instance(instance_id)
170+
table = instance.table(table_id)
171+
172+
rows = table.read_rows(
173+
filter_=row_filters.ValueRangeFilter(b"PQ2A.190405", b"PQ2A.190406"))
174+
175+
for row in rows:
176+
print_row(row)
177+
178+
179+
# [END bigtable_filters_limit_value_range]
180+
# [START bigtable_filters_limit_value_regex]
181+
182+
183+
def filter_limit_value_regex(project_id, instance_id, table_id):
184+
client = bigtable.Client(project=project_id, admin=True)
185+
instance = client.instance(instance_id)
186+
table = instance.table(table_id)
187+
188+
rows = table.read_rows(
189+
filter_=row_filters.ValueRegexFilter("PQ2A.*$".encode("utf-8")))
190+
for row in rows:
191+
print_row(row)
192+
193+
194+
# [END bigtable_filters_limit_value_regex]
195+
# [START bigtable_filters_limit_timestamp_range]
196+
def filter_limit_timestamp_range(project_id, instance_id, table_id):
197+
client = bigtable.Client(project=project_id, admin=True)
198+
instance = client.instance(instance_id)
199+
table = instance.table(table_id)
200+
201+
end = datetime.datetime(2019, 5, 1)
202+
203+
rows = table.read_rows(
204+
filter_=row_filters.TimestampRangeFilter(
205+
row_filters.TimestampRange(end=end)))
206+
for row in rows:
207+
print_row(row)
208+
209+
210+
# [END bigtable_filters_limit_timestamp_range]
211+
# [START bigtable_filters_limit_block_all]
212+
def filter_limit_block_all(project_id, instance_id, table_id):
213+
client = bigtable.Client(project=project_id, admin=True)
214+
instance = client.instance(instance_id)
215+
table = instance.table(table_id)
216+
217+
rows = table.read_rows(filter_=row_filters.BlockAllFilter(True))
218+
for row in rows:
219+
print_row(row)
220+
221+
222+
# [END bigtable_filters_limit_block_all]
223+
# [START bigtable_filters_limit_pass_all]
224+
def filter_limit_pass_all(project_id, instance_id, table_id):
225+
client = bigtable.Client(project=project_id, admin=True)
226+
instance = client.instance(instance_id)
227+
table = instance.table(table_id)
228+
229+
rows = table.read_rows(filter_=row_filters.PassAllFilter(True))
230+
for row in rows:
231+
print_row(row)
232+
233+
234+
# [END bigtable_filters_limit_pass_all]
235+
# [START bigtable_filters_modify_strip_value]
236+
def filter_modify_strip_value(project_id, instance_id, table_id):
237+
client = bigtable.Client(project=project_id, admin=True)
238+
instance = client.instance(instance_id)
239+
table = instance.table(table_id)
240+
241+
rows = table.read_rows(
242+
filter_=row_filters.StripValueTransformerFilter(True))
243+
for row in rows:
244+
print_row(row)
245+
246+
247+
# [END bigtable_filters_modify_strip_value]
248+
# [START bigtable_filters_modify_apply_label]
249+
def filter_modify_apply_label(project_id, instance_id, table_id):
250+
client = bigtable.Client(project=project_id, admin=True)
251+
instance = client.instance(instance_id)
252+
table = instance.table(table_id)
253+
254+
rows = table.read_rows(
255+
filter_=row_filters.ApplyLabelFilter(label="labelled"))
256+
for row in rows:
257+
print_row(row)
258+
259+
260+
# [END bigtable_filters_modify_apply_label]
261+
# [START bigtable_filters_composing_chain]
262+
def filter_composing_chain(project_id, instance_id, table_id):
263+
client = bigtable.Client(project=project_id, admin=True)
264+
instance = client.instance(instance_id)
265+
table = instance.table(table_id)
266+
267+
rows = table.read_rows(filter_=row_filters.RowFilterChain(
268+
filters=[row_filters.CellsColumnLimitFilter(1),
269+
row_filters.FamilyNameRegexFilter("cell_plan")]))
270+
for row in rows:
271+
print_row(row)
272+
273+
274+
# [END bigtable_filters_composing_chain]
275+
# [START bigtable_filters_composing_interleave]
276+
def filter_composing_interleave(project_id, instance_id, table_id):
277+
client = bigtable.Client(project=project_id, admin=True)
278+
instance = client.instance(instance_id)
279+
table = instance.table(table_id)
280+
281+
rows = table.read_rows(filter_=row_filters.RowFilterUnion(
282+
filters=[row_filters.ValueRegexFilter("true"),
283+
row_filters.ColumnQualifierRegexFilter("os_build")]))
284+
for row in rows:
285+
print_row(row)
286+
287+
288+
# [END bigtable_filters_composing_interleave]
289+
# [START bigtable_filters_composing_condition]
290+
def filter_composing_condition(project_id, instance_id, table_id):
291+
client = bigtable.Client(project=project_id, admin=True)
292+
instance = client.instance(instance_id)
293+
table = instance.table(table_id)
294+
295+
rows = table.read_rows(filter_=row_filters.ConditionalRowFilter(
296+
base_filter=row_filters.RowFilterChain(filters=[
297+
row_filters.ColumnQualifierRegexFilter(
298+
"data_plan_10gb"),
299+
row_filters.ValueRegexFilter(
300+
"true")]),
301+
true_filter=row_filters.ApplyLabelFilter(label="passed-filter"),
302+
false_filter=row_filters.ApplyLabelFilter(label="filtered-out")
303+
304+
))
305+
for row in rows:
306+
print_row(row)
307+
308+
309+
# [END bigtable_filters_composing_condition]
310+
311+
312+
# [START bigtable_filters_limit_row_sample]
313+
# [START bigtable_filters_limit_row_regex]
314+
# [START bigtable_filters_limit_cells_per_col]
315+
# [START bigtable_filters_limit_cells_per_row]
316+
# [START bigtable_filters_limit_cells_per_row_offset]
317+
# [START bigtable_filters_limit_col_family_regex]
318+
# [START bigtable_filters_limit_col_qualifier_regex]
319+
# [START bigtable_filters_limit_col_range]
320+
# [START bigtable_filters_limit_value_range]
321+
# [START bigtable_filters_limit_value_regex]
322+
# [START bigtable_filters_limit_timestamp_range]
323+
# [START bigtable_filters_limit_block_all]
324+
# [START bigtable_filters_limit_pass_all]
325+
# [START bigtable_filters_modify_strip_value]
326+
# [START bigtable_filters_modify_apply_label]
327+
# [START bigtable_filters_composing_chain]
328+
# [START bigtable_filters_composing_interleave]
329+
# [START bigtable_filters_composing_condition]
330+
def print_row(row):
331+
print("Reading data for {}:".format(row.row_key.decode('utf-8')))
332+
for cf, cols in sorted(row.cells.items()):
333+
print("Column Family {}".format(cf))
334+
for col, cells in sorted(cols.items()):
335+
for cell in cells:
336+
labels = " [{}]".format(",".join(cell.labels)) \
337+
if len(cell.labels) else ""
338+
print(
339+
"\t{}: {} @{}{}".format(col.decode('utf-8'),
340+
cell.value.decode('utf-8'),
341+
cell.timestamp, labels))
342+
print("")
343+
# [END bigtable_filters_limit_row_sample]
344+
# [END bigtable_filters_limit_row_regex]
345+
# [END bigtable_filters_limit_cells_per_col]
346+
# [END bigtable_filters_limit_cells_per_row]
347+
# [END bigtable_filters_limit_cells_per_row_offset]
348+
# [END bigtable_filters_limit_col_family_regex]
349+
# [END bigtable_filters_limit_col_qualifier_regex]
350+
# [END bigtable_filters_limit_col_range]
351+
# [END bigtable_filters_limit_value_range]
352+
# [END bigtable_filters_limit_value_regex]
353+
# [END bigtable_filters_limit_timestamp_range]
354+
# [END bigtable_filters_limit_block_all]
355+
# [END bigtable_filters_limit_pass_all]
356+
# [END bigtable_filters_modify_strip_value]
357+
# [END bigtable_filters_modify_apply_label]
358+
# [END bigtable_filters_composing_chain]
359+
# [END bigtable_filters_composing_interleave]
360+
# [END bigtable_filters_composing_condition]

0 commit comments

Comments
 (0)