88import sqlfluff
99
1010from dlt .common .utils import uniq_id
11- from dlt .common .schema import Schema , utils
11+ from dlt .common .schema import Schema
12+ from dlt .common .schema .utils import new_table
1213from dlt .destinations import snowflake
1314from dlt .destinations .impl .snowflake .snowflake import SnowflakeClient , SUPPORTED_HINTS
1415from dlt .destinations .impl .snowflake .configuration import (
@@ -119,46 +120,103 @@ def test_create_table_with_hints(snowflake_client: SnowflakeClient) -> None:
119120 assert 'CONSTRAINT "PK_EVENT_TEST_TABLE_' in sql
120121 assert 'PRIMARY KEY ("COL1", "COL6")' in sql
121122
122- # generate alter
123- mod_update = deepcopy (TABLE_UPDATE [11 :])
124- mod_update [0 ]["primary_key" ] = True
125- mod_update [1 ]["unique" ] = True
126-
127- sql = ";" .join (snowflake_client ._get_table_update_sql ("event_test_table" , mod_update , True ))
128- # PK constraint ignored for alter
129- assert "PRIMARY KEY" not in sql
130- assert '"COL2_NULL" FLOAT UNIQUE' in sql
131-
132123
133124def test_alter_table (snowflake_client : SnowflakeClient ) -> None :
134- statements = snowflake_client ._get_table_update_sql ("event_test_table" , TABLE_UPDATE , True )
135- assert len (statements ) == 1
136- sql = statements [0 ]
125+ new_columns = deepcopy (TABLE_UPDATE [1 :10 ])
126+ statements = snowflake_client ._get_table_update_sql ("event_test_table" , new_columns , True )
137127
138- # TODO: sqlfluff doesn't parse snowflake multi ADD COLUMN clause correctly
139- # sqlfluff.parse(sql, dialect='snowflake')
128+ assert len ( statements ) == 2 , "Should have one ADD COLUMN and one DROP CLUSTERING KEY statement"
129+ add_column_sql = statements [ 0 ]
140130
141- assert sql .startswith ("ALTER TABLE" )
142- assert sql .count ("ALTER TABLE" ) == 1
143- assert sql .count ("ADD COLUMN" ) == 1
144- assert '"EVENT_TEST_TABLE"' in sql
145- assert '"COL1" NUMBER(19,0) NOT NULL' in sql
146- assert '"COL2" FLOAT NOT NULL' in sql
147- assert '"COL3" BOOLEAN NOT NULL' in sql
148- assert '"COL4" TIMESTAMP_TZ NOT NULL' in sql
149- assert '"COL5" VARCHAR' in sql
150- assert '"COL6" NUMBER(38,9) NOT NULL' in sql
151- assert '"COL7" BINARY' in sql
152- assert '"COL8" NUMBER(38,0)' in sql
153- assert '"COL9" VARIANT NOT NULL' in sql
154- assert '"COL10" DATE' in sql
131+ # TODO: sqlfluff doesn't parse snowflake multi ADD COLUMN clause correctly
132+ # sqlfluff.parse(add_column_sql, dialect='snowflake')
133+
134+ assert add_column_sql .startswith ("ALTER TABLE" )
135+ assert add_column_sql .count ("ALTER TABLE" ) == 1
136+ assert add_column_sql .count ("ADD COLUMN" ) == 1
137+ assert '"EVENT_TEST_TABLE"' in add_column_sql
138+ assert '"COL1"' not in add_column_sql
139+ assert '"COL2" FLOAT NOT NULL' in add_column_sql
140+ assert '"COL3" BOOLEAN NOT NULL' in add_column_sql
141+ assert '"COL4" TIMESTAMP_TZ NOT NULL' in add_column_sql
142+ assert '"COL5" VARCHAR' in add_column_sql
143+ assert '"COL6" NUMBER(38,9) NOT NULL' in add_column_sql
144+ assert '"COL7" BINARY' in add_column_sql
145+ assert '"COL8" NUMBER(38,0)' in add_column_sql
146+ assert '"COL9" VARIANT NOT NULL' in add_column_sql
147+ assert '"COL10" DATE' in add_column_sql
148+
149+
150+ def test_alter_table_with_hints (snowflake_client : SnowflakeClient ) -> None :
151+ table_name = "event_test_table"
155152
156- mod_table = deepcopy (TABLE_UPDATE )
157- mod_table .pop (0 )
158- sql = snowflake_client ._get_table_update_sql ("event_test_table" , mod_table , True )[0 ]
153+ # mock hints
154+ snowflake_client .active_hints = SUPPORTED_HINTS
159155
160- assert '"COL1"' not in sql
161- assert '"COL2" FLOAT NOT NULL' in sql
156+ # test primary key and unique hints
157+ new_columns = deepcopy (TABLE_UPDATE [11 :])
158+ new_columns [0 ]["primary_key" ] = True
159+ new_columns [1 ]["unique" ] = True
160+ statements = snowflake_client ._get_table_update_sql (table_name , new_columns , True )
161+
162+ assert len (statements ) == 2 , "Should have one ADD COLUMN and one DROP CLUSTERING KEY statement"
163+ add_column_sql = statements [0 ]
164+ assert "PRIMARY KEY" not in add_column_sql # PK constraint ignored for alter
165+ assert '"COL2_NULL" FLOAT UNIQUE' in add_column_sql
166+
167+ # test cluster hint
168+
169+ # case: drop clustering (always run if no cluster hints present in table schema)
170+ cluster_by_sql = statements [1 ]
171+
172+ assert cluster_by_sql .startswith ("ALTER TABLE" )
173+ assert f'"{ table_name .upper ()} "' in cluster_by_sql
174+ assert cluster_by_sql .endswith ("DROP CLUSTERING KEY" )
175+
176+ # case: add clustering (without clustering -> with clustering)
177+ old_columns = deepcopy (TABLE_UPDATE [:1 ])
178+ new_columns = deepcopy (TABLE_UPDATE [1 :2 ])
179+ new_columns [0 ]["cluster" ] = True # COL2
180+ all_columns = deepcopy (old_columns + new_columns )
181+ snowflake_client .schema .update_table (new_table (table_name , columns = deepcopy (all_columns )))
182+ statements = snowflake_client ._get_table_update_sql (table_name , new_columns , True )
183+
184+ assert len (statements ) == 2 , "Should have one ADD COLUMN and one CLUSTER BY statement"
185+ cluster_by_sql = statements [1 ]
186+ assert cluster_by_sql .startswith ("ALTER TABLE" )
187+ assert f'"{ table_name .upper ()} "' in cluster_by_sql
188+ assert 'CLUSTER BY ("COL2")' in cluster_by_sql
189+
190+ # case: modify clustering (extend cluster columns)
191+ old_columns = deepcopy (TABLE_UPDATE [:2 ])
192+ old_columns [1 ]["cluster" ] = True # COL2
193+ new_columns = deepcopy (TABLE_UPDATE [2 :5 ])
194+ new_columns [2 ]["cluster" ] = True # COL5
195+ all_columns = deepcopy (old_columns + new_columns )
196+ snowflake_client .schema .update_table (new_table (table_name , columns = all_columns ))
197+ statements = snowflake_client ._get_table_update_sql (table_name , new_columns , True )
198+
199+ assert len (statements ) == 2 , "Should have one ADD COLUMN and one CLUSTER BY statement"
200+ cluster_by_sql = statements [1 ]
201+ assert cluster_by_sql .count ("ALTER TABLE" ) == 1
202+ assert cluster_by_sql .count ("CLUSTER BY" ) == 1
203+ assert 'CLUSTER BY ("COL2","COL5")' in cluster_by_sql
204+
205+ # case: modify clustering (reorder cluster columns)
206+ old_columns = deepcopy (TABLE_UPDATE [:5 ])
207+ old_columns [1 ]["cluster" ] = True # COL2
208+ old_columns [4 ]["cluster" ] = True # COL5
209+ old_columns [1 ], old_columns [4 ] = old_columns [4 ], old_columns [1 ] # swap order
210+ new_columns = deepcopy (TABLE_UPDATE [5 :6 ])
211+ all_columns = deepcopy (old_columns + new_columns )
212+ # cannot change column order in existing table schema, so we drop and recreate
213+ snowflake_client .schema .drop_tables ([table_name ])
214+ snowflake_client .schema .update_table (new_table (table_name , columns = all_columns ))
215+ statements = snowflake_client ._get_table_update_sql (table_name , new_columns , True )
216+
217+ assert len (statements ) == 2 , "Should have one ADD COLUMN and one CLUSTER BY statement"
218+ cluster_by_sql = statements [1 ]
219+ assert 'CLUSTER BY ("COL5","COL2")' in cluster_by_sql # reordered (COL5 first)
162220
163221
164222def test_create_table_case_sensitive (cs_client : SnowflakeClient ) -> None :
@@ -170,9 +228,7 @@ def test_create_table_case_sensitive(cs_client: SnowflakeClient) -> None:
170228 assert cs_client .sql_client .dataset_name .endswith ("staginG" )
171229 assert cs_client .sql_client .staging_dataset_name .endswith ("staginG" )
172230 # check tables
173- cs_client .schema .update_table (
174- utils .new_table ("event_test_table" , columns = deepcopy (TABLE_UPDATE ))
175- )
231+ cs_client .schema .update_table (new_table ("event_test_table" , columns = deepcopy (TABLE_UPDATE )))
176232 sql = cs_client ._get_table_update_sql (
177233 "Event_test_tablE" ,
178234 list (cs_client .schema .get_table_columns ("Event_test_tablE" ).values ()),
@@ -192,7 +248,9 @@ def test_create_table_with_partition_and_cluster(snowflake_client: SnowflakeClie
192248 mod_update [3 ]["partition" ] = True
193249 mod_update [4 ]["cluster" ] = True
194250 mod_update [1 ]["cluster" ] = True
195- statements = snowflake_client ._get_table_update_sql ("event_test_table" , mod_update , False )
251+ table_name = "event_test_table"
252+ snowflake_client .schema .update_table (new_table (table_name , columns = deepcopy (mod_update )))
253+ statements = snowflake_client ._get_table_update_sql (table_name , mod_update , False )
196254 assert len (statements ) == 1
197255 sql = statements [0 ]
198256
0 commit comments