Skip to content

Commit fe29490

Browse files
committed
review comment changes
1 parent 796669d commit fe29490

File tree

2 files changed

+96
-52
lines changed

2 files changed

+96
-52
lines changed

athena-datalakegen2/src/main/java/com/amazonaws/athena/connectors/datalakegen2/DataLakeGen2MetadataHandler.java

Lines changed: 59 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -218,15 +218,18 @@ protected Schema getSchema(Connection jdbcConnection, TableName tableName, Schem
218218
{
219219
LOGGER.info("Inside getSchema");
220220

221-
String dataTypeQuery = "SELECT C.NAME AS COLUMN_NAME, TYPE_NAME(C.USER_TYPE_ID) AS DATA_TYPE " +
221+
String dataTypeQuery = "SELECT C.NAME AS COLUMN_NAME, TYPE_NAME(C.USER_TYPE_ID) AS DATA_TYPE, " +
222+
"C.PRECISION, C.SCALE " +
222223
"FROM sys.columns C " +
223224
"JOIN sys.types T " +
224225
"ON C.USER_TYPE_ID=T.USER_TYPE_ID " +
225226
"WHERE C.OBJECT_ID=OBJECT_ID(?)";
226227

227228
String dataType;
228229
String columnName;
229-
HashMap<String, String> hashMap = new HashMap<>();
230+
int precision;
231+
int scale;
232+
HashMap<String, ColumnInfo> hashMap = new HashMap<>();
230233

231234
SchemaBuilder schemaBuilder = SchemaBuilder.newBuilder();
232235
try (Connection connection = getJdbcConnectionFactory().getConnection(getCredentialProvider());
@@ -237,7 +240,9 @@ protected Schema getSchema(Connection jdbcConnection, TableName tableName, Schem
237240
while (dataTypeResultSet.next()) {
238241
dataType = dataTypeResultSet.getString("DATA_TYPE");
239242
columnName = dataTypeResultSet.getString("COLUMN_NAME");
240-
hashMap.put(columnName.trim(), dataType.trim());
243+
precision = dataTypeResultSet.getInt("PRECISION");
244+
scale = dataTypeResultSet.getInt("SCALE");
245+
hashMap.put(columnName.trim(), new ColumnInfo(dataType.trim(), precision, scale));
241246
}
242247
}
243248
}
@@ -257,26 +262,35 @@ protected Schema getSchema(Connection jdbcConnection, TableName tableName, Schem
257262
return schemaBuilder.build();
258263
}
259264

260-
private SchemaBuilder doDataTypeConversion(HashMap<String, String> columnNameAndDataTypeMap)
265+
private SchemaBuilder doDataTypeConversion(HashMap<String, ColumnInfo> columnNameAndDataTypeMap)
261266
{
262267
SchemaBuilder schemaBuilder = SchemaBuilder.newBuilder();
263268

264-
for (Map.Entry<String, String> entry : columnNameAndDataTypeMap.entrySet()) {
269+
for (Map.Entry<String, ColumnInfo> entry : columnNameAndDataTypeMap.entrySet()) {
265270
String columnName = entry.getKey();
266-
String dataType = entry.getValue();
271+
ColumnInfo columnInfo = entry.getValue();
272+
String dataType = columnInfo.getDataType();
267273
ArrowType columnType = Types.MinorType.VARCHAR.getType();
268274

269-
if ("char".equalsIgnoreCase(dataType) || "varchar".equalsIgnoreCase(dataType) || "binary".equalsIgnoreCase(dataType) ||
270-
"nchar".equalsIgnoreCase(dataType) || "nvarchar".equalsIgnoreCase(dataType) || "varbinary".equalsIgnoreCase(dataType)
275+
if ("char".equalsIgnoreCase(dataType) || "varchar".equalsIgnoreCase(dataType) ||
276+
"nchar".equalsIgnoreCase(dataType) || "nvarchar".equalsIgnoreCase(dataType)
271277
|| "time".equalsIgnoreCase(dataType) || "uniqueidentifier".equalsIgnoreCase(dataType)) {
272278
columnType = Types.MinorType.VARCHAR.getType();
273279
}
274280

281+
if ("binary".equalsIgnoreCase(dataType) || "varbinary".equalsIgnoreCase(dataType)) {
282+
columnType = Types.MinorType.VARBINARY.getType();
283+
}
284+
275285
if ("bit".equalsIgnoreCase(dataType)) {
286+
columnType = Types.MinorType.BIT.getType();
287+
}
288+
289+
if ("tinyint".equalsIgnoreCase(dataType)) {
276290
columnType = Types.MinorType.TINYINT.getType();
277291
}
278292

279-
if ("tinyint".equalsIgnoreCase(dataType) || "smallint".equalsIgnoreCase(dataType)) {
293+
if ("smallint".equalsIgnoreCase(dataType)) {
280294
columnType = Types.MinorType.SMALLINT.getType();
281295
}
282296

@@ -288,11 +302,11 @@ private SchemaBuilder doDataTypeConversion(HashMap<String, String> columnNameAnd
288302
columnType = Types.MinorType.BIGINT.getType();
289303
}
290304

291-
if ("decimal".equalsIgnoreCase(dataType) || "money".equalsIgnoreCase(dataType)) {
292-
columnType = Types.MinorType.FLOAT8.getType();
305+
if ("decimal".equalsIgnoreCase(dataType)) {
306+
columnType = new ArrowType.Decimal(columnInfo.getPrecision(), columnInfo.getScale(), 128);
293307
}
294308

295-
if ("numeric".equalsIgnoreCase(dataType) || "float".equalsIgnoreCase(dataType) || "smallmoney".equalsIgnoreCase(dataType)) {
309+
if ("numeric".equalsIgnoreCase(dataType) || "float".equalsIgnoreCase(dataType) || "smallmoney".equalsIgnoreCase(dataType) || "money".equalsIgnoreCase(dataType)) {
296310
columnType = Types.MinorType.FLOAT8.getType();
297311
}
298312

@@ -314,7 +328,7 @@ private SchemaBuilder doDataTypeConversion(HashMap<String, String> columnNameAnd
314328
return schemaBuilder;
315329
}
316330

317-
private SchemaBuilder doDataTypeConversionForNonCompatible(Connection jdbcConnection, TableName tableName, HashMap<String, String> columnNameAndDataTypeMap) throws SQLException
331+
private SchemaBuilder doDataTypeConversionForNonCompatible(Connection jdbcConnection, TableName tableName, HashMap<String, ColumnInfo> columnNameAndDataTypeMap) throws SQLException
318332
{
319333
SchemaBuilder schemaBuilder = SchemaBuilder.newBuilder();
320334

@@ -327,10 +341,10 @@ private SchemaBuilder doDataTypeConversionForNonCompatible(Connection jdbcConnec
327341
resultSet.getInt("DECIMAL_DIGITS"),
328342
configOptions);
329343
String columnName = resultSet.getString("COLUMN_NAME");
330-
String dataType = columnNameAndDataTypeMap.get(columnName);
344+
ColumnInfo columnInfo = columnNameAndDataTypeMap.get(columnName);
331345

332-
if (dataType != null && DataLakeGen2DataType.isSupported(dataType)) {
333-
columnType = Optional.of(DataLakeGen2DataType.fromType(dataType));
346+
if (columnInfo != null && DataLakeGen2DataType.isSupported(columnInfo.getDataType())) {
347+
columnType = Optional.of(DataLakeGen2DataType.fromType(columnInfo.getDataType()));
334348
}
335349

336350
/**
@@ -356,3 +370,32 @@ private SchemaBuilder doDataTypeConversionForNonCompatible(Connection jdbcConnec
356370
return schemaBuilder;
357371
}
358372
}
373+
374+
class ColumnInfo
375+
{
376+
private final String dataType;
377+
private final int precision;
378+
private final int scale;
379+
380+
public ColumnInfo(String dataType, int precision, int scale)
381+
{
382+
this.dataType = dataType;
383+
this.precision = precision;
384+
this.scale = scale;
385+
}
386+
387+
public String getDataType()
388+
{
389+
return dataType;
390+
}
391+
392+
public int getPrecision()
393+
{
394+
return precision;
395+
}
396+
397+
public int getScale()
398+
{
399+
return scale;
400+
}
401+
}

athena-datalakegen2/src/test/java/com/amazonaws/athena/connectors/datalakegen2/DataLakeGen2MetadataHandlerTest.java

Lines changed: 37 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -217,8 +217,8 @@ public void doGetTable()
217217
when(connection.getAutoCommit()).thenReturn(true);
218218

219219
// Mock the data type query result set
220-
String[] dataTypeSchema = {"COLUMN_NAME", "DATA_TYPE"};
221-
Object[][] dataTypeValues = {{"testCol1", "int"}, {"testCol2", "varchar"}, {"testCol3", "datetime"}, {"testCol4", "datetimeoffset"}};
220+
String[] dataTypeSchema = {"COLUMN_NAME", "DATA_TYPE", "PRECISION", "SCALE"};
221+
Object[][] dataTypeValues = {{"testCol1", "int", 10, 0}, {"testCol2", "varchar", 255, 0}, {"testCol3", "datetime", 23, 3}, {"testCol4", "datetimeoffset", 34, 7}};
222222
AtomicInteger dataTypeRowNumber = new AtomicInteger(-1);
223223
ResultSet dataTypeResultSet = mockResultSet(dataTypeSchema, dataTypeValues, dataTypeRowNumber);
224224

@@ -241,49 +241,49 @@ public void testGetSchemaWithAzureServerlessEnvironment()
241241
BlockAllocator blockAllocator = new BlockAllocatorImpl();
242242

243243
// Mock the data type query result set for Azure serverless with all supported data types
244-
String[] dataTypeSchema = {"COLUMN_NAME", "DATA_TYPE"};
244+
String[] dataTypeSchema = {"COLUMN_NAME", "DATA_TYPE", "PRECISION", "SCALE"};
245245
Object[][] dataTypeValues = {
246246
// Primary Key
247-
{"id", "int"},
247+
{"id", "int", 10, 0},
248248

249249
// Integer Types
250-
{"small_int_col", "smallint"},
251-
{"tiny_int_col", "tinyint"},
252-
{"big_int_col", "bigint"},
250+
{"small_int_col", "smallint", 5, 0},
251+
{"tiny_int_col", "tinyint", 3, 0},
252+
{"big_int_col", "bigint", 19, 0},
253253

254254
// Decimal/Numeric Types
255-
{"decimal_col", "decimal"},
256-
{"numeric_col", "numeric"},
257-
{"money_col", "money"},
258-
{"small_money_col", "smallmoney"},
255+
{"decimal_col", "decimal", 18, 2},
256+
{"numeric_col", "numeric", 18, 2},
257+
{"money_col", "money", 19, 4},
258+
{"small_money_col", "smallmoney", 10, 4},
259259

260260
// Floating Point Types
261-
{"float_col", "float"},
262-
{"real_col", "real"},
261+
{"float_col", "float", 53, 0},
262+
{"real_col", "real", 24, 0},
263263

264264
// Character Types
265-
{"char_col", "char"},
266-
{"varchar_col", "varchar"},
267-
{"nchar_col", "nchar"},
268-
{"nvarchar_col", "nvarchar"},
265+
{"char_col", "char", 10, 0},
266+
{"varchar_col", "varchar", 255, 0},
267+
{"nchar_col", "nchar", 10, 0},
268+
{"nvarchar_col", "nvarchar", 255, 0},
269269

270270
// Binary Types
271-
{"binary_col", "binary"},
272-
{"varbinary_col", "varbinary"},
271+
{"binary_col", "binary", 16, 0},
272+
{"varbinary_col", "varbinary", 255, 0},
273273

274274
// Date and Time Types
275-
{"date_col", "date"},
276-
{"time_col", "time"},
277-
{"datetime_col", "datetime"},
278-
{"datetime2_col", "datetime2"},
279-
{"smalldatetime_col", "smalldatetime"},
280-
{"datetimeoffset_col", "datetimeoffset"},
275+
{"date_col", "date", 10, 0},
276+
{"time_col", "time", 16, 7},
277+
{"datetime_col", "datetime", 23, 3},
278+
{"datetime2_col", "datetime2", 27, 7},
279+
{"smalldatetime_col", "smalldatetime", 16, 0},
280+
{"datetimeoffset_col", "datetimeoffset", 34, 7},
281281

282282
// Special Types
283-
{"uniqueidentifier_col", "uniqueidentifier"},
283+
{"uniqueidentifier_col", "uniqueidentifier", 36, 0},
284284

285285
// Boolean
286-
{"bit_col", "bit"}
286+
{"bit_col", "bit", 1, 0}
287287
};
288288
AtomicInteger dataTypeRowNumber = new AtomicInteger(-1);
289289
ResultSet dataTypeResultSet = mockResultSet(dataTypeSchema, dataTypeValues, dataTypeRowNumber);
@@ -317,11 +317,11 @@ public void testGetSchemaWithAzureServerlessEnvironment()
317317
// Verify integer types (based on actual DataLakeGen2MetadataHandler mappings)
318318
assertTrue("Should contain INT field", fields.stream().anyMatch(f -> f.getName().equals("id") && f.getType().equals(org.apache.arrow.vector.types.Types.MinorType.INT.getType())));
319319
assertTrue("Should contain SMALLINT field", fields.stream().anyMatch(f -> f.getName().equals("small_int_col") && f.getType().equals(org.apache.arrow.vector.types.Types.MinorType.SMALLINT.getType())));
320-
assertTrue("Should contain TINYINT field (mapped to SMALLINT)", fields.stream().anyMatch(f -> f.getName().equals("tiny_int_col") && f.getType().equals(org.apache.arrow.vector.types.Types.MinorType.SMALLINT.getType())));
320+
assertTrue("Should contain TINYINT field", fields.stream().anyMatch(f -> f.getName().equals("tiny_int_col") && f.getType().equals(org.apache.arrow.vector.types.Types.MinorType.TINYINT.getType())));
321321
assertTrue("Should contain BIGINT field", fields.stream().anyMatch(f -> f.getName().equals("big_int_col") && f.getType().equals(org.apache.arrow.vector.types.Types.MinorType.BIGINT.getType())));
322322

323-
// Verify decimal types (mapped to FLOAT8 in DataLakeGen2MetadataHandler)
324-
assertTrue("Should contain DECIMAL field (mapped to FLOAT8)", fields.stream().anyMatch(f -> f.getName().equals("decimal_col") && f.getType().equals(org.apache.arrow.vector.types.Types.MinorType.FLOAT8.getType())));
323+
// Verify decimal types (mapped to DECIMAL in DataLakeGen2MetadataHandler)
324+
assertTrue("Should contain DECIMAL field", fields.stream().anyMatch(f -> f.getName().equals("decimal_col") && f.getType() instanceof org.apache.arrow.vector.types.pojo.ArrowType.Decimal));
325325
assertTrue("Should contain NUMERIC field (mapped to FLOAT8)", fields.stream().anyMatch(f -> f.getName().equals("numeric_col") && f.getType().equals(org.apache.arrow.vector.types.Types.MinorType.FLOAT8.getType())));
326326

327327
// Verify floating point types
@@ -334,9 +334,10 @@ public void testGetSchemaWithAzureServerlessEnvironment()
334334
assertTrue("Should contain NCHAR field (mapped to VARCHAR)", fields.stream().anyMatch(f -> f.getName().equals("nchar_col") && f.getType().equals(org.apache.arrow.vector.types.Types.MinorType.VARCHAR.getType())));
335335
assertTrue("Should contain NVARCHAR field (mapped to VARCHAR)", fields.stream().anyMatch(f -> f.getName().equals("nvarchar_col") && f.getType().equals(org.apache.arrow.vector.types.Types.MinorType.VARCHAR.getType())));
336336

337-
// Verify binary types (mapped to VARCHAR in DataLakeGen2MetadataHandler)
338-
assertTrue("Should contain BINARY field (mapped to VARCHAR)", fields.stream().anyMatch(f -> f.getName().equals("binary_col") && f.getType().equals(org.apache.arrow.vector.types.Types.MinorType.VARCHAR.getType())));
339-
assertTrue("Should contain VARBINARY field (mapped to VARCHAR)", fields.stream().anyMatch(f -> f.getName().equals("varbinary_col") && f.getType().equals(org.apache.arrow.vector.types.Types.MinorType.VARCHAR.getType())));
337+
// Verify binary types (mapped to VARBINARY in DataLakeGen2MetadataHandler)
338+
assertTrue("Should contain BINARY field (mapped to VARBINARY)", fields.stream().anyMatch(f -> f.getName().equals("binary_col") && f.getType().equals(org.apache.arrow.vector.types.Types.MinorType.VARBINARY.getType())));
339+
assertTrue("Should contain VARBINARY field", fields.stream().anyMatch(f -> f.getName().equals("varbinary_col") && f.getType().equals(org.apache.arrow.vector.types.Types.MinorType.VARBINARY.getType())));
340+
340341

341342
// Verify date/time types (based on actual mappings)
342343
assertTrue("Should contain DATE field", fields.stream().anyMatch(f -> f.getName().equals("date_col") && f.getType().equals(org.apache.arrow.vector.types.Types.MinorType.DATEDAY.getType())));
@@ -348,7 +349,7 @@ public void testGetSchemaWithAzureServerlessEnvironment()
348349

349350
// Verify special types
350351
assertTrue("Should contain UNIQUEIDENTIFIER field (mapped to VARCHAR)", fields.stream().anyMatch(f -> f.getName().equals("uniqueidentifier_col") && f.getType().equals(org.apache.arrow.vector.types.Types.MinorType.VARCHAR.getType())));
351-
assertTrue("Should contain BIT field (mapped to TINYINT)", fields.stream().anyMatch(f -> f.getName().equals("bit_col") && f.getType().equals(org.apache.arrow.vector.types.Types.MinorType.TINYINT.getType())));
352+
assertTrue("Should contain BIT field (mapped to BIT)", fields.stream().anyMatch(f -> f.getName().equals("bit_col") && f.getType().equals(org.apache.arrow.vector.types.Types.MinorType.BIT.getType())));
352353

353354
// Verify money types (mapped to FLOAT8 in DataLakeGen2MetadataHandler)
354355
assertTrue("Should contain MONEY field (mapped to FLOAT8)", fields.stream().anyMatch(f -> f.getName().equals("money_col") && f.getType().equals(org.apache.arrow.vector.types.Types.MinorType.FLOAT8.getType())));
@@ -362,8 +363,8 @@ public void testGetSchemaWithStandardEnvironment()
362363
BlockAllocator blockAllocator = new BlockAllocatorImpl();
363364

364365
// Mock the data type query result set
365-
String[] dataTypeSchema = {"COLUMN_NAME", "DATA_TYPE"};
366-
Object[][] dataTypeValues = {{"testCol1", "int"}, {"testCol2", "varchar"}};
366+
String[] dataTypeSchema = {"COLUMN_NAME", "DATA_TYPE", "PRECISION", "SCALE"};
367+
Object[][] dataTypeValues = {{"testCol1", "int", 10, 0}, {"testCol2", "varchar", 255, 0}};
367368
AtomicInteger dataTypeRowNumber = new AtomicInteger(-1);
368369
ResultSet dataTypeResultSet = mockResultSet(dataTypeSchema, dataTypeValues, dataTypeRowNumber);
369370

0 commit comments

Comments
 (0)