1919 */
2020package com .amazonaws .athena .connectors .msk ;
2121
22+ import com .amazonaws .athena .connector .lambda .exceptions .AthenaConnectorException ;
23+ import com .amazonaws .athena .connectors .msk .dto .MSKField ;
2224import com .fasterxml .jackson .databind .DeserializationFeature ;
2325import com .fasterxml .jackson .databind .ObjectMapper ;
26+ import com .github .os72 .protocjar .Protoc ;
27+ import com .google .protobuf .DescriptorProtos .DescriptorProto ;
28+ import com .google .protobuf .DescriptorProtos .FieldDescriptorProto ;
29+ import com .google .protobuf .DescriptorProtos .FileDescriptorSet ;
30+ import org .slf4j .Logger ;
31+ import org .slf4j .LoggerFactory ;
2432import software .amazon .awssdk .services .glue .GlueClient ;
33+ import software .amazon .awssdk .services .glue .model .ErrorDetails ;
34+ import software .amazon .awssdk .services .glue .model .FederationSourceErrorCode ;
2535import software .amazon .awssdk .services .glue .model .GetSchemaRequest ;
2636import software .amazon .awssdk .services .glue .model .GetSchemaResponse ;
2737import software .amazon .awssdk .services .glue .model .GetSchemaVersionRequest ;
2838import software .amazon .awssdk .services .glue .model .GetSchemaVersionResponse ;
2939import software .amazon .awssdk .services .glue .model .SchemaId ;
3040import software .amazon .awssdk .services .glue .model .SchemaVersionNumber ;
3141
42+ import java .io .FileInputStream ;
43+ import java .io .IOException ;
44+ import java .nio .file .Files ;
45+ import java .nio .file .Path ;
46+ import java .nio .file .Paths ;
47+ import java .util .ArrayList ;
48+ import java .util .List ;
49+ import java .util .UUID ;
50+
3251public class GlueRegistryReader
3352{
53+ private static final Logger logger = LoggerFactory .getLogger (GlueRegistryReader .class );
3454 private static final ObjectMapper objectMapper ;
55+ private static final String PROTO_FILE = "schema.proto" ;
56+ private static final String DESC_FILE = "schema.desc" ;
3557
3658 static {
3759 objectMapper = new ObjectMapper ();
3860 objectMapper .enable (DeserializationFeature .ACCEPT_EMPTY_STRING_AS_NULL_OBJECT );
3961 objectMapper .disable (DeserializationFeature .FAIL_ON_UNKNOWN_PROPERTIES );
4062 }
4163
64+ /**
65+ * Parse protobuf schema definition from Glue Schema Registry using protoc compiler
66+ * @param glueRegistryName Registry name
67+ * @param glueSchemaName Schema name
68+ * @return List of MSKField objects containing field information
69+ * @throws AthenaConnectorException if schema parsing fails
70+ */
71+ public List <MSKField > getProtobufFields (String glueRegistryName , String glueSchemaName )
72+ {
73+ // Get schema from Glue
74+ GetSchemaVersionResponse schemaVersionResponse = getSchemaVersionResult (glueRegistryName , glueSchemaName );
75+ String schemaDef = schemaVersionResponse .schemaDefinition ();
76+
77+ // Create a unique temp directory using UUID
78+ Path protoDir = Paths .get ("/tmp" , "proto_" + UUID .randomUUID ());
79+ Path protoFile = protoDir .resolve (PROTO_FILE );
80+ Path descFile = protoDir .resolve (DESC_FILE );
81+
82+ try {
83+ Files .createDirectories (protoDir );
84+ Files .writeString (protoFile , schemaDef );
85+ // Compile using protoc-jar
86+ int exitCode = Protoc .runProtoc (new String []{
87+ "--descriptor_set_out=" + descFile .toAbsolutePath (),
88+ "--proto_path=" + protoDir .toAbsolutePath (),
89+ protoFile .getFileName ().toString ()
90+ });
91+
92+ if (exitCode != 0 || !Files .exists (descFile )) {
93+ throw new AthenaConnectorException (
94+ "Failed to generate descriptor set with protoc" ,
95+ ErrorDetails .builder ()
96+ .errorCode (FederationSourceErrorCode .INTERNAL_SERVICE_EXCEPTION .toString ())
97+ .build ()
98+ );
99+ }
100+
101+ try (FileInputStream fis = new FileInputStream (descFile .toFile ())) {
102+ FileDescriptorSet descriptorSet = FileDescriptorSet .parseFrom (fis );
103+
104+ if (descriptorSet .getFileList ().isEmpty () ||
105+ descriptorSet .getFile (0 ).getMessageTypeList ().isEmpty ()) {
106+ throw new AthenaConnectorException (
107+ "No message types found in compiled schema" ,
108+ ErrorDetails .builder ()
109+ .errorCode (FederationSourceErrorCode .INVALID_RESPONSE_EXCEPTION .toString ())
110+ .build ()
111+ );
112+ }
113+
114+ List <MSKField > fields = new ArrayList <>();
115+ DescriptorProto messageType = descriptorSet .getFile (0 ).getMessageType (0 );
116+ for (FieldDescriptorProto field : messageType .getFieldList ()) {
117+ String fieldType = getFieldTypeString (field );
118+ fields .add (new MSKField (field .getName (), fieldType ));
119+ }
120+
121+ return fields ;
122+ }
123+ }
124+ catch (IOException | InterruptedException e ) {
125+ throw new AthenaConnectorException (
126+ "Error while handling schema files or protoc execution" ,
127+ ErrorDetails .builder ()
128+ .errorCode (FederationSourceErrorCode .INTERNAL_SERVICE_EXCEPTION .toString ())
129+ .build ()
130+ );
131+ }
132+ finally {
133+ // Clean up temporary files
134+ try {
135+ Files .deleteIfExists (protoFile );
136+ Files .deleteIfExists (descFile );
137+ Files .deleteIfExists (protoDir );
138+ }
139+ catch (IOException e ) {
140+ logger .warn ("Failed to clean up temporary proto directory: {}" , protoDir .toAbsolutePath (), e );
141+ }
142+ }
143+ }
144+
145+ /**
146+ * Convert protobuf field type to string representation
147+ */
148+ private String getFieldTypeString (FieldDescriptorProto field )
149+ {
150+ String baseType = field .getType ().toString ().toLowerCase ().replace ("type_" , "" );
151+ return field .getLabel () == FieldDescriptorProto .Label .LABEL_REPEATED ?
152+ "repeated " + baseType : baseType ;
153+ }
154+
42155 /**
43156 * Fetch glue schema content for latest version
44157 * @param glueRegistryName
@@ -62,6 +175,7 @@ public GetSchemaVersionResponse getSchemaVersionResult(String glueRegistryName,
62175 .build ()
63176 );
64177 }
178+
65179 /**
66180 * fetch schema file content from glue schema.
67181 *
@@ -77,14 +191,10 @@ public <T> T getGlueSchema(String glueRegistryName, String glueSchemaName, Class
77191 GetSchemaVersionResponse result = getSchemaVersionResult (glueRegistryName , glueSchemaName );
78192 return objectMapper .readValue (result .schemaDefinition (), clazz );
79193 }
194+
80195 public String getGlueSchemaType (String glueRegistryName , String glueSchemaName )
81196 {
82197 GetSchemaVersionResponse result = getSchemaVersionResult (glueRegistryName , glueSchemaName );
83198 return result .dataFormatAsString ();
84199 }
85- public String getSchemaDef (String glueRegistryName , String glueSchemaName )
86- {
87- GetSchemaVersionResponse result = getSchemaVersionResult (glueRegistryName , glueSchemaName );
88- return result .schemaDefinition ();
89- }
90200}
0 commit comments