@@ -1423,17 +1423,78 @@ def udf(
1423
1423
packages : Optional [Sequence [str ]] = None ,
1424
1424
):
1425
1425
"""Decorator to turn a Python user defined function (udf) into a
1426
- BigQuery managed function.
1426
+ [ BigQuery managed user-defined function](https://cloud.google.com/bigquery/docs/user-defined-functions-python) .
1427
1427
1428
1428
.. note::
1429
1429
The udf must be self-contained, i.e. it must not contain any
1430
1430
references to an import or variable defined outside the function
1431
1431
body.
1432
1432
1433
1433
.. note::
1434
- Please have following IAM roles enabled for you:
1434
+ Please have BigQuery Data Editor (roles/bigquery.dataEditor) IAM
1435
+ role enabled for you.
1435
1436
1436
- * BigQuery Data Editor (roles/bigquery.dataEditor)
1437
+ **Examples:**
1438
+
1439
+ >>> import bigframes.pandas as bpd
1440
+ >>> import datetime
1441
+ >>> bpd.options.display.progress_bar = None
1442
+
1443
+ Turning an arbitrary python function into a BigQuery managed python udf:
1444
+
1445
+ >>> bq_name = datetime.datetime.now().strftime("bigframes_%Y%m%d%H%M%S%f")
1446
+ >>> @bpd.udf(dataset="bigfranes_testing", name=bq_name)
1447
+ ... def minutes_to_hours(x: int) -> float:
1448
+ ... return x/60
1449
+
1450
+ >>> minutes = bpd.Series([0, 30, 60, 90, 120])
1451
+ >>> minutes
1452
+ 0 0
1453
+ 1 30
1454
+ 2 60
1455
+ 3 90
1456
+ 4 120
1457
+ dtype: Int64
1458
+
1459
+ >>> hours = minutes.apply(minutes_to_hours)
1460
+ >>> hours
1461
+ 0 0.0
1462
+ 1 0.5
1463
+ 2 1.0
1464
+ 3 1.5
1465
+ 4 2.0
1466
+ dtype: Float64
1467
+
1468
+ To turn a user defined function with external package dependencies into
1469
+ a BigQuery managed python udf, you would provide the names of the
1470
+ packages (optionally with the package version) via `packages` param.
1471
+
1472
+ >>> bq_name = datetime.datetime.now().strftime("bigframes_%Y%m%d%H%M%S%f")
1473
+ >>> @bpd.udf(
1474
+ ... dataset="bigfranes_testing",
1475
+ ... name=bq_name,
1476
+ ... packages=["cryptography"]
1477
+ ... )
1478
+ ... def get_hash(input: str) -> str:
1479
+ ... from cryptography.fernet import Fernet
1480
+ ...
1481
+ ... # handle missing value
1482
+ ... if input is None:
1483
+ ... input = ""
1484
+ ...
1485
+ ... key = Fernet.generate_key()
1486
+ ... f = Fernet(key)
1487
+ ... return f.encrypt(input.encode()).decode()
1488
+
1489
+ >>> names = bpd.Series(["Alice", "Bob"])
1490
+ >>> hashes = names.apply(get_hash)
1491
+
1492
+ You can clean-up the BigQuery functions created above using the BigQuery
1493
+ client from the BigQuery DataFrames session:
1494
+
1495
+ >>> session = bpd.get_global_session()
1496
+ >>> session.bqclient.delete_routine(minutes_to_hours.bigframes_bigquery_function)
1497
+ >>> session.bqclient.delete_routine(get_hash.bigframes_bigquery_function)
1437
1498
1438
1499
Args:
1439
1500
input_types (type or sequence(type), Optional):
0 commit comments