@@ -1212,7 +1212,7 @@ def test_map(table, map):
1212
1212
1213
1213
1214
1214
@pytest .mark .parametrize ("storage_format" , ["csv" , "parquet" ])
1215
- def test_hash ( tmpdir , storage_format ) :
1215
+ class TestHash :
1216
1216
r"""Test if PARQUET file hash changes with table.
1217
1217
1218
1218
We store a MD5 sum associated with the dataframe,
@@ -1229,82 +1229,98 @@ def test_hash(tmpdir, storage_format):
1229
1229
1230
1230
"""
1231
1231
1232
- def get_md5 (path : str ) -> str :
1232
+ def db (self , tmpdir , storage_format ):
1233
+ r"""Create minimal database with scheme and table."""
1234
+ self .db_root = audeer .path (tmpdir , "db" )
1235
+ self .storage_format = storage_format
1236
+ self .table_file = audeer .path (self .db_root , f"db.table.{ storage_format } " )
1237
+ db = audformat .Database ("mydb" )
1238
+ db .schemes ["int" ] = audformat .Scheme ("int" )
1239
+ index = audformat .segmented_index (["f1" , "f2" ], [0 , 1 ], [1 , 2 ])
1240
+ db ["table" ] = audformat .Table (index )
1241
+ db ["table" ]["column" ] = audformat .Column (scheme_id = "int" )
1242
+ db ["table" ]["column" ].set ([0 , 1 ])
1243
+ db .save (self .db_root , storage_format = self .storage_format )
1244
+ return db
1245
+
1246
+ def md5 (self ) -> str :
1233
1247
r"""Get MD5 sum for table file."""
1234
- ext = audeer .file_extension (path )
1235
- if ext == "csv" :
1236
- md5 = audeer .md5 (path )
1237
- elif ext == "parquet" :
1238
- md5 = parquet .read_schema (path ).metadata [b"hash" ].decode ()
1239
- return md5
1240
-
1241
- db_root = audeer .path (tmpdir , "db" )
1242
- db = audformat .Database ("mydb" )
1243
- db .schemes ["int" ] = audformat .Scheme ("int" )
1244
- index = audformat .segmented_index (["f1" , "f2" ], [0 , 1 ], [1 , 2 ])
1245
- db ["table" ] = audformat .Table (index )
1246
- db ["table" ]["column" ] = audformat .Column (scheme_id = "int" )
1247
- db ["table" ]["column" ].set ([0 , 1 ])
1248
- db .save (db_root , storage_format = storage_format )
1249
-
1250
- table_file = audeer .path (db_root , f"db.table.{ storage_format } " )
1251
- assert os .path .exists (table_file )
1252
- md5 = get_md5 (table_file )
1253
-
1254
- # Replace table with identical copy
1255
- table = db ["table" ].copy ()
1256
- db ["table" ] = table
1257
- db .save (db_root , storage_format = storage_format )
1258
- assert get_md5 (table_file ) == md5
1259
-
1260
- # Change order of rows
1261
- index = audformat .segmented_index (["f2" , "f1" ], [1 , 0 ], [2 , 1 ])
1262
- db ["table" ] = audformat .Table (index )
1263
- db ["table" ]["column" ] = audformat .Column (scheme_id = "int" )
1264
- db ["table" ]["column" ].set ([1 , 0 ])
1265
- db .save (db_root , storage_format = storage_format )
1266
- assert get_md5 (table_file ) != md5
1267
-
1268
- # Change index entry
1269
- index = audformat .segmented_index (["f1" , "f1" ], [0 , 1 ], [1 , 2 ])
1270
- db ["table" ] = audformat .Table (index )
1271
- db ["table" ]["column" ] = audformat .Column (scheme_id = "int" )
1272
- db ["table" ]["column" ].set ([0 , 1 ])
1273
- db .save (db_root , storage_format = storage_format )
1274
- assert get_md5 (table_file ) != md5
1275
-
1276
- # Change data entry
1277
- index = audformat .segmented_index (["f1" , "f2" ], [0 , 1 ], [1 , 2 ])
1278
- db ["table" ] = audformat .Table (index )
1279
- db ["table" ]["column" ] = audformat .Column (scheme_id = "int" )
1280
- db ["table" ]["column" ].set ([1 , 0 ])
1281
- db .save (db_root , storage_format = storage_format )
1282
- assert get_md5 (table_file ) != md5
1283
-
1284
- # Change column name
1285
- index = audformat .segmented_index (["f1" , "f2" ], [0 , 1 ], [1 , 2 ])
1286
- db ["table" ] = audformat .Table (index )
1287
- db ["table" ]["col" ] = audformat .Column (scheme_id = "int" )
1288
- db ["table" ]["col" ].set ([0 , 1 ])
1289
- db .save (db_root , storage_format = storage_format )
1290
- assert get_md5 (table_file ) != md5
1291
-
1292
- # Change order of columns
1293
- index = audformat .segmented_index (["f1" , "f2" ], [0 , 1 ], [1 , 2 ])
1294
- db ["table" ] = audformat .Table (index )
1295
- db ["table" ]["col1" ] = audformat .Column (scheme_id = "int" )
1296
- db ["table" ]["col1" ].set ([0 , 1 ])
1297
- db ["table" ]["col2" ] = audformat .Column (scheme_id = "int" )
1298
- db ["table" ]["col2" ].set ([0 , 1 ])
1299
- db .save (db_root , storage_format = storage_format )
1300
- md5 = get_md5 (table_file )
1301
- db ["table" ] = audformat .Table (index )
1302
- db ["table" ]["col2" ] = audformat .Column (scheme_id = "int" )
1303
- db ["table" ]["col2" ].set ([0 , 1 ])
1304
- db ["table" ]["col1" ] = audformat .Column (scheme_id = "int" )
1305
- db ["table" ]["col1" ].set ([0 , 1 ])
1306
- db .save (db_root , storage_format = storage_format )
1307
- assert get_md5 (table_file ) != md5
1248
+ if self .storage_format == "csv" :
1249
+ return audeer .md5 (self .table_file )
1250
+ elif self .storage_format == "parquet" :
1251
+ return parquet .read_schema (self .table_file ).metadata [b"hash" ].decode ()
1252
+
1253
+ def test_change_index (self , tmpdir , storage_format ):
1254
+ r"""Change table index."""
1255
+ db = self .db (tmpdir , storage_format )
1256
+ md5 = self .md5 ()
1257
+ index = audformat .segmented_index (["f1" , "f1" ], [0 , 1 ], [1 , 2 ])
1258
+ db ["table" ] = audformat .Table (index )
1259
+ db ["table" ]["column" ] = audformat .Column (scheme_id = "int" )
1260
+ db ["table" ]["column" ].set ([0 , 1 ])
1261
+ db .save (self .db_root , storage_format = self .storage_format )
1262
+ assert self .md5 () != md5
1263
+
1264
+ def test_change_column_name (self , tmpdir , storage_format ):
1265
+ r"""Change table column name."""
1266
+ db = self .db (tmpdir , storage_format )
1267
+ md5 = self .md5 ()
1268
+ index = audformat .segmented_index (["f1" , "f2" ], [0 , 1 ], [1 , 2 ])
1269
+ db ["table" ] = audformat .Table (index )
1270
+ db ["table" ]["col" ] = audformat .Column (scheme_id = "int" )
1271
+ db ["table" ]["col" ].set ([0 , 1 ])
1272
+ db .save (self .db_root , storage_format = self .storage_format )
1273
+ assert self .md5 () != md5
1274
+
1275
+ def test_change_column_order (self , tmpdir , storage_format ):
1276
+ r"""Change order of table columns."""
1277
+ db = self .db (tmpdir , storage_format )
1278
+ index = audformat .segmented_index (["f1" , "f2" ], [0 , 1 ], [1 , 2 ])
1279
+ db ["table" ] = audformat .Table (index )
1280
+ db ["table" ]["col1" ] = audformat .Column (scheme_id = "int" )
1281
+ db ["table" ]["col1" ].set ([0 , 1 ])
1282
+ db ["table" ]["col2" ] = audformat .Column (scheme_id = "int" )
1283
+ db ["table" ]["col2" ].set ([0 , 1 ])
1284
+ db .save (self .db_root , storage_format = self .storage_format )
1285
+ md5 = self .md5 ()
1286
+ db ["table" ] = audformat .Table (index )
1287
+ db ["table" ]["col2" ] = audformat .Column (scheme_id = "int" )
1288
+ db ["table" ]["col2" ].set ([0 , 1 ])
1289
+ db ["table" ]["col1" ] = audformat .Column (scheme_id = "int" )
1290
+ db ["table" ]["col1" ].set ([0 , 1 ])
1291
+ db .save (self .db_root , storage_format = self .storage_format )
1292
+ assert self .md5 () != md5
1293
+
1294
+ def test_change_row_order (self , tmpdir , storage_format ):
1295
+ r"""Change order of table rows."""
1296
+ db = self .db (tmpdir , storage_format )
1297
+ md5 = self .md5 ()
1298
+ index = audformat .segmented_index (["f2" , "f1" ], [1 , 0 ], [2 , 1 ])
1299
+ db ["table" ] = audformat .Table (index )
1300
+ db ["table" ]["column" ] = audformat .Column (scheme_id = "int" )
1301
+ db ["table" ]["column" ].set ([1 , 0 ])
1302
+ db .save (self .db_root , storage_format = storage_format )
1303
+ assert self .md5 () != md5
1304
+
1305
+ def test_change_values (self , tmpdir , storage_format ):
1306
+ r"""Change table values."""
1307
+ db = self .db (tmpdir , storage_format )
1308
+ md5 = self .md5 ()
1309
+ index = audformat .segmented_index (["f1" , "f2" ], [0 , 1 ], [1 , 2 ])
1310
+ db ["table" ] = audformat .Table (index )
1311
+ db ["table" ]["column" ] = audformat .Column (scheme_id = "int" )
1312
+ db ["table" ]["column" ].set ([1 , 0 ])
1313
+ db .save (self .db_root , storage_format = self .storage_format )
1314
+ assert self .md5 () != md5
1315
+
1316
+ def test_copy_table (self , tmpdir , storage_format ):
1317
+ r"""Replace table with identical copy."""
1318
+ db = self .db (tmpdir , storage_format )
1319
+ md5 = self .md5 ()
1320
+ table = db ["table" ].copy ()
1321
+ db ["table" ] = table
1322
+ db .save (self .db_root , storage_format = self .storage_format )
1323
+ assert self .md5 () == md5
1308
1324
1309
1325
1310
1326
@pytest .mark .parametrize (
0 commit comments