@@ -1663,3 +1663,54 @@ def api_fetch(page_num):
16631663 load_info = pipeline .run (product ())
16641664 assert_load_info (load_info )
16651665 assert pipeline .last_trace .last_normalize_info .row_counts ["product" ] == 12
1666+
1667+
1668+ def test_run_with_pua_payload () -> None :
1669+ # prepare some data and complete load with run
1670+ os .environ ["COMPLETED_PROB" ] = "1.0"
1671+ pipeline_name = "pipe_" + uniq_id ()
1672+ p = dlt .pipeline (pipeline_name = pipeline_name , destination = "duckdb" )
1673+ print (pipeline_name )
1674+ from dlt .common .json import PUA_START , PUA_CHARACTER_MAX
1675+
1676+ def some_data ():
1677+ yield from [
1678+ # text is only PUA
1679+ {"id" : 1 , "text" : chr (PUA_START )},
1680+ {"id" : 2 , "text" : chr (PUA_START - 1 )},
1681+ {"id" : 3 , "text" : chr (PUA_START + 1 )},
1682+ {"id" : 4 , "text" : chr (PUA_START + PUA_CHARACTER_MAX + 1 )},
1683+ # PUA inside text
1684+ {"id" : 5 , "text" : f"a{ chr (PUA_START )} b" },
1685+ {"id" : 6 , "text" : f"a{ chr (PUA_START - 1 )} b" },
1686+ {"id" : 7 , "text" : f"a{ chr (PUA_START + 1 )} b" },
1687+ # text starts with PUA
1688+ {"id" : 8 , "text" : f"{ chr (PUA_START )} a" },
1689+ {"id" : 9 , "text" : f"{ chr (PUA_START - 1 )} a" },
1690+ {"id" : 10 , "text" : f"{ chr (PUA_START + 1 )} a" },
1691+ ]
1692+
1693+ @dlt .source
1694+ def source ():
1695+ return dlt .resource (some_data (), name = "pua_data" )
1696+
1697+ load_info = p .run (source ())
1698+ assert p .last_trace .last_normalize_info .row_counts ["pua_data" ] == 11
1699+
1700+ with p .sql_client () as client :
1701+ rows = client .execute_sql ("SELECT text FROM pua_data ORDER BY id" )
1702+
1703+ values = [r [0 ] for r in rows ]
1704+ assert values == [
1705+ "\uf026 " ,
1706+ "\uf025 " ,
1707+ "\uf027 " ,
1708+ "\uf02f " ,
1709+ "a\uf026 b" ,
1710+ "a\uf025 b" ,
1711+ "a\uf027 b" ,
1712+ "\uf026 a" ,
1713+ "\uf025 a" ,
1714+ "\uf027 a" ,
1715+ ]
1716+ assert len (load_info .loads_ids ) == 1
0 commit comments