From 63fb3aff45b8b2e3ab00605c99cc3dd46146370a Mon Sep 17 00:00:00 2001 From: jordan-m-young Date: Thu, 4 Apr 2024 11:14:05 -0500 Subject: [PATCH 1/5] test semicolon support for single str query --- connectorx-python/connectorx/tests/test_postgres.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/connectorx-python/connectorx/tests/test_postgres.py b/connectorx-python/connectorx/tests/test_postgres.py index 4f636fb020..7bb9d44147 100644 --- a/connectorx-python/connectorx/tests/test_postgres.py +++ b/connectorx-python/connectorx/tests/test_postgres.py @@ -1138,4 +1138,16 @@ def test_postgres_name_type(postgres_url: str) -> None: "test_name": pd.Series(["0", "21", "someName", "101203203-1212323-22131235"]), }, ) + assert_frame_equal(df, expected, check_names=True) + + + +def test_postgres_semicolon_support_str_query(postgres_url: str) -> None: + query = "SELECT test_name FROM test_types;" + df = read_sql(postgres_url, query) + expected = pd.DataFrame( + data={ + "test_name": pd.Series(["0", "21", "someName", "101203203-1212323-22131235"]), + }, + ) assert_frame_equal(df, expected, check_names=True) \ No newline at end of file From 13808f83b0e7a6904c55cb355b0068cf45159931 Mon Sep 17 00:00:00 2001 From: jordan-m-young Date: Thu, 4 Apr 2024 11:14:37 -0500 Subject: [PATCH 2/5] replace semicolon of query with null string "" --- connectorx-python/connectorx/__init__.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/connectorx-python/connectorx/__init__.py b/connectorx-python/connectorx/__init__.py index 64e82d66e2..145d4587d7 100644 --- a/connectorx-python/connectorx/__init__.py +++ b/connectorx-python/connectorx/__init__.py @@ -206,6 +206,7 @@ def read_sql( """ if isinstance(query, list) and len(query) == 1: query = query[0] + query = query.replace(";","") if isinstance(conn, dict): assert partition_on is None and isinstance( @@ -214,6 +215,8 @@ def read_sql( assert ( protocol is None ), "Federated query does not support specifying protocol for now" + + query = query.replace(";","") result = _read_sql2(query, conn) df = reconstruct_arrow(result) if return_type == "pandas": @@ -232,6 +235,7 @@ def read_sql( return df if isinstance(query, str): + query = query.replace(";","") if partition_on is None: queries = [query] partition_query = None @@ -245,7 +249,7 @@ def read_sql( } queries = None elif isinstance(query, list): - queries = query + queries = [subquery.replace(";","") for subquery in query] partition_query = None if partition_on is not None: From 039f8ab25832322bb61a2cbd80dc70da377ada42 Mon Sep 17 00:00:00 2001 From: jordan-m-young Date: Thu, 4 Apr 2024 11:50:08 -0500 Subject: [PATCH 3/5] test for list of queries --- .../connectorx/tests/test_postgres.py | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/connectorx-python/connectorx/tests/test_postgres.py b/connectorx-python/connectorx/tests/test_postgres.py index 7bb9d44147..ba3b2a6ba4 100644 --- a/connectorx-python/connectorx/tests/test_postgres.py +++ b/connectorx-python/connectorx/tests/test_postgres.py @@ -1150,4 +1150,30 @@ def test_postgres_semicolon_support_str_query(postgres_url: str) -> None: "test_name": pd.Series(["0", "21", "someName", "101203203-1212323-22131235"]), }, ) + assert_frame_equal(df, expected, check_names=True) + + +def test_postgres_semicolon_list_queries(postgres_url: str) -> None: + queries = [ + "SELECT * FROM test_table WHERE test_int < 2;", + "SELECT * FROM test_table WHERE test_int >= 2;", + ] + + df = read_sql(postgres_url, query=queries) + + expected = pd.DataFrame( + index=range(6), + data={ + "test_int": pd.Series([0, 1, 2, 3, 4, 1314], dtype="Int64"), + "test_nullint": pd.Series([5, 3, None, 7, 9, 2], dtype="Int64"), + "test_str": pd.Series( + ["a", "str1", "str2", "b", "c", None], dtype="object" + ), + "test_float": pd.Series([3.1, None, 2.2, 3, 7.8, -10], dtype="float64"), + "test_bool": pd.Series( + [None, True, False, False, None, True], dtype="boolean" + ), + }, + ) + df.sort_values(by="test_int", inplace=True, ignore_index=True) assert_frame_equal(df, expected, check_names=True) \ No newline at end of file From 40a18a1521a30b3318337468cabe07bda95a88d5 Mon Sep 17 00:00:00 2001 From: jordan-m-young Date: Thu, 4 Apr 2024 12:22:20 -0500 Subject: [PATCH 4/5] formatting, sorry. --- connectorx-python/connectorx/tests/test_postgres.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/connectorx-python/connectorx/tests/test_postgres.py b/connectorx-python/connectorx/tests/test_postgres.py index ba3b2a6ba4..620c5333b5 100644 --- a/connectorx-python/connectorx/tests/test_postgres.py +++ b/connectorx-python/connectorx/tests/test_postgres.py @@ -1154,7 +1154,7 @@ def test_postgres_semicolon_support_str_query(postgres_url: str) -> None: def test_postgres_semicolon_list_queries(postgres_url: str) -> None: - queries = [ + queries = [ "SELECT * FROM test_table WHERE test_int < 2;", "SELECT * FROM test_table WHERE test_int >= 2;", ] From f20a2c32cef14805a76a6f64d420dc4e2c0876fb Mon Sep 17 00:00:00 2001 From: Jordan-M-Young Date: Mon, 8 Apr 2024 20:14:33 -0500 Subject: [PATCH 5/5] only remove ; if last char --- connectorx-python/connectorx/__init__.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/connectorx-python/connectorx/__init__.py b/connectorx-python/connectorx/__init__.py index 145d4587d7..b58590e80d 100644 --- a/connectorx-python/connectorx/__init__.py +++ b/connectorx-python/connectorx/__init__.py @@ -206,7 +206,8 @@ def read_sql( """ if isinstance(query, list) and len(query) == 1: query = query[0] - query = query.replace(";","") + query = remove_ending_semicolon(query) + if isinstance(conn, dict): assert partition_on is None and isinstance( @@ -216,7 +217,8 @@ def read_sql( protocol is None ), "Federated query does not support specifying protocol for now" - query = query.replace(";","") + query = remove_ending_semicolon(query) + result = _read_sql2(query, conn) df = reconstruct_arrow(result) if return_type == "pandas": @@ -235,7 +237,9 @@ def read_sql( return df if isinstance(query, str): - query = query.replace(";","") + + query = remove_ending_semicolon(query) + if partition_on is None: queries = [query] partition_query = None @@ -249,7 +253,7 @@ def read_sql( } queries = None elif isinstance(query, list): - queries = [subquery.replace(";","") for subquery in query] + queries = [remove_ending_semicolon(subquery) for subquery in query] partition_query = None if partition_on is not None: @@ -381,3 +385,11 @@ def reconstruct_pandas(df_infos: Dict[str, Any]): ) df = pd.DataFrame(block_manager) return df + + +def remove_ending_semicolon(query: str) -> str: + if query[-1] == ';': + query= list(query) + query.pop(-1) + query = "".join(query) + return query