Skip to content

Commit 4438533

Browse files
Feature / Full support for STRUCT data type (#591)
* Text file producers * Test file consumers * Text file reader / writer * Arrow VSR and schema mapping * Common text codec * Move JSON codec to common text codec base * Move CSV codec to common text codec base * Update common codec API * Make arrow codecs use the new base API * Java test updates * Use trac schemas in data storage framework instead of arrow * Schema mapping fixes * Schema validator * Add a test case for schema mapping * Test data for JSON structs * Proto updates * Runtime struct updates * Runtime framework updates * Test case fixes * Test case fixes * Handle dict encoded fields in data comparison test util * Record named enums in define_struct * Allow qualified identifiers for named types in schema validator * Throw a more meaningful error for unrecognized fields in json struct parser * Fix sample data for structured run config * Fix test schema for runtime structs * Support aliases for format types in struct processor * Special case the storage layout for struct objects in the runtime * Handle storage layout for structs in the data service * Add an end-to-end test for a struct model * Add a test case for chained structs in the examples * Handle different cases for alt struct formats * In context impl, allow that structs may already be decoded * Add a test case for running struct models through a flow * Remove debug logging * Update job consistency validator to handle nested types * Bump versions for compliance * Remove previous version forcing for Azure * Force version on Nimbus JOSE in Azure plugin
1 parent 9a934ef commit 4438533

File tree

120 files changed

+8092
-1771
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

120 files changed

+8092
-1771
lines changed
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
2+
job:
3+
runFlow:
4+
5+
flow: ./chaining_struct_flow.yaml
6+
7+
parameters:
8+
t0_date: "2025-07-01"
9+
projection_period: 365
10+
11+
inputs:
12+
run_config: "inputs/structured_run_config.yaml"
13+
14+
outputs:
15+
modified_config: "outputs/chaining/modified_run_config.yaml"
16+
17+
models:
18+
model_1: tutorial.structured_objects.StructModel
19+
model_2: tutorial.structured_objects.StructModel
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
2+
nodes:
3+
4+
run_config:
5+
nodeType: "INPUT_NODE"
6+
7+
model_1:
8+
nodeType: "MODEL_NODE"
9+
inputs: [run_config]
10+
outputs: [modified_config]
11+
12+
model_2:
13+
nodeType: "MODEL_NODE"
14+
inputs: [run_config]
15+
outputs: [modified_config]
16+
17+
modified_config:
18+
nodeType: "OUTPUT_NODE"
19+
20+
edges:
21+
22+
- source: { node: run_config }
23+
target: { node: model_1, socket: run_config }
24+
25+
- source: { node: model_1, socket: modified_config }
26+
target: { node: model_2, socket: run_config }
27+
28+
- source: { node: model_2, socket: modified_config }
29+
target: { node: modified_config }

examples/models/python/data/inputs/structured_run_config.json

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4,26 +4,20 @@
44
"base_scenario": {
55
"scenario_name": "base",
66
"default_weight": 1.0,
7-
"evolution_model": "perturb",
8-
"apply_smoothing": true,
9-
"correlated_scenario": {
10-
"scenario_name": "correlated",
11-
"default_weight": 1.0,
12-
"evolution_model": "perturb_smooth",
13-
"apply_smoothing": false
14-
}
7+
"evolution_model": "PERTURB",
8+
"apply_smoothing": true
159
},
1610
"stress_scenarios": {
1711
"downturn": {
1812
"scenario_name": "downturn",
1913
"default_weight": 0.37,
20-
"evolution_model": "perturb",
14+
"evolution_model": "PERTURB",
2115
"apply_smoothing": false
2216
},
2317
"high_interest": {
2418
"scenario_name": "high_interest",
2519
"default_weight": 0.66,
26-
"evolution_model": "perturb",
20+
"evolution_model": "PERTURB",
2721
"apply_smoothing": false
2822
}
2923
}

examples/models/python/data/inputs/structured_run_config.yaml

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,6 @@ base_scenario:
44
default_weight: 1.0
55
evolution_model: "perturb"
66
apply_smoothing: true
7-
correlated_scenario:
8-
scenario_name: "correlated"
9-
default_weight: 1.0
10-
evolution_model: "perturb_smooth"
11-
apply_smoothing: false
127

138
stress_scenarios:
149
downturn:

gradle/versions.gradle

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ ext {
6363

6464
aws_sdk_version = '2.31.59'
6565
gcp_sdk_version = '26.61.0'
66-
azure_sdk_version = '1.2.35'
66+
azure_sdk_version = '1.2.36'
6767

6868
apache_sshd_version = "2.12.1"
6969

tracdap-api/tracdap-metadata/src/main/proto/tracdap/metadata/data.proto

Lines changed: 23 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -34,13 +34,16 @@ import "tracdap/metadata/object_id.proto";
3434
*/
3535
enum SchemaType {
3636

37+
option allow_alias = true;
38+
3739
SCHEMA_TYPE_NOT_SET = 0;
3840

3941
/// Tabular data
4042
TABLE = 1;
43+
TABLE_SCHEMA = 1;
4144

4245
/// Structured objects
43-
STRUCT = 2;
46+
STRUCT_SCHEMA = 2;
4447
}
4548

4649
/**
@@ -63,8 +66,18 @@ message FieldSchema {
6366
optional bool notNull = 8;
6467

6568
optional string formatCode = 7;
69+
70+
optional Value defaultValue = 9;
71+
optional string namedType = 10;
72+
optional string namedEnum = 11;
73+
74+
repeated FieldSchema children = 12;
6675
}
6776

77+
message EnumValues {
78+
79+
repeated string values = 1;
80+
}
6881

6982
/**
7083
* Schema for a tabular dataset
@@ -75,38 +88,6 @@ message TableSchema {
7588
}
7689

7790

78-
/**
79-
* Schema for an individual field in a structured object dataset
80-
*/
81-
message StructField {
82-
83-
TypeDescriptor fieldType = 3;
84-
85-
string label = 4;
86-
87-
bool businessKey = 5;
88-
bool categorical = 6;
89-
90-
// This could become mandatory with the next metadata update
91-
optional bool notNull = 8;
92-
93-
optional string formatCode = 7;
94-
95-
Value defaultValue = 9;
96-
97-
optional string structType = 10;
98-
}
99-
100-
101-
/**
102-
* Schema for a structured object dataset
103-
*/
104-
message StructSchema {
105-
106-
map<string, StructField> fields = 1;
107-
108-
map<string, StructSchema> namedTypes = 2;
109-
}
11091

11192

11293
/**
@@ -129,8 +110,13 @@ message SchemaDefinition {
129110
oneof schemaDetails {
130111

131112
TableSchema table = 3;
132-
StructSchema struct = 4;
133113
}
114+
115+
repeated FieldSchema fields = 5;
116+
117+
map<string, SchemaDefinition> namedTypes = 6;
118+
119+
map<string, EnumValues> namedEnums = 7;
134120
}
135121

136122

@@ -139,8 +125,11 @@ message SchemaDefinition {
139125
*/
140126
enum PartType {
141127

128+
option allow_alias = true;
129+
142130
/// Dataset has a single partition called the root partition (this is the default)
143131
PART_ROOT = 0;
132+
NOT_PARTITIONED = 0;
144133

145134
/// Partition by range over an ordered variable (not available yet)
146135
PART_BY_RANGE = 1;

tracdap-api/tracdap-metadata/src/main/proto/tracdap/metadata/type.proto

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -103,15 +103,16 @@ enum BasicType {
103103
/**
104104
* An key-value map with string keys, values may be primitive or composite values.
105105
*
106-
* Maps may be uniform, in which case all the values are of the same type, or non-
107-
* uniform in which case values can be of any type. For uniform maps the type
108-
* descriptor will specify the type contained in the map. For non-uniform maps the
109-
* type descriptor can only specify that the map is non-uniform, values must be
110-
* examined at run time to determine their type.
106+
* All values in a map must have the same type (i.e. the same type descriptor).
111107
*
112108
* @see TypeDescriptor
113109
*/
114110
MAP = 9;
111+
112+
/**
113+
* A nested structure with a named set of fields, which may be primitive or composite values.
114+
*/
115+
STRUCT = 10;
115116
}
116117

117118

@@ -140,6 +141,13 @@ message TypeDescriptor {
140141
* be inspected individually to determine their type.
141142
*/
142143
optional TypeDescriptor mapType = 3;
144+
145+
reserved 4;
146+
reserved "mapKeyType";
147+
148+
map<string, TypeDescriptor> structTypes = 5;
149+
150+
optional string typeName = 6;
143151
}
144152

145153

tracdap-libs/tracdap-lib-common/src/main/java/org/finos/tracdap/common/metadata/MetadataConstants.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,9 +58,11 @@ public class MetadataConstants {
5858
// ^...$ would allow matches like "my_var\n_gotcha"
5959
public static final Pattern VALID_IDENTIFIER = Pattern.compile("\\A[a-zA-Z_]\\w*\\Z");
6060

61+
public static final Pattern QUALIFIED_IDENTIFIER = Pattern.compile("\\A[a-zA-Z_]\\w*(\\.[a-zA-Z_]\\w*)*\\Z");
62+
6163
// Identifiers starting trac_ are reserved for use by the TRAC platform
6264
// Identifiers starting _ are also reserved by convention, for private / protected / system variables
63-
public static final Pattern TRAC_RESERVED_IDENTIFIER = Pattern.compile("\\A(trac[_\\-.]|[_\\-.]).*", Pattern.CASE_INSENSITIVE);
65+
public static final Pattern TRAC_RESERVED_IDENTIFIER = Pattern.compile("\\A(trac(dap)?[_\\-.]|[_\\-.]).*", Pattern.CASE_INSENSITIVE);
6466

6567
public static final String TRAC_CREATE_TIME = "trac_create_time";
6668
public static final String TRAC_CREATE_USER_ID = "trac_create_user_id";

tracdap-libs/tracdap-lib-data/src/main/java/org/finos/tracdap/common/codec/CodecManager.java

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,25 +23,39 @@
2323
import org.slf4j.Logger;
2424
import org.slf4j.LoggerFactory;
2525

26+
import java.util.Collections;
27+
import java.util.HashMap;
28+
import java.util.Map;
29+
2630

2731
public class CodecManager implements ICodecManager {
2832

2933
private final Logger log = LoggerFactory.getLogger(getClass());
3034

31-
private final IPluginManager plugins;
32-
private final ConfigManager configManager;
35+
private final Map<String, ICodec> codecs;
3336

3437
public CodecManager(IPluginManager plugins, ConfigManager configManager) {
35-
this.plugins = plugins;
36-
this.configManager = configManager;
38+
39+
var protocols = plugins.availableProtocols(ICodec.class);
40+
var codecs = new HashMap<String, ICodec>();
41+
42+
for (var protocol : protocols) {
43+
var codec = plugins.createService(ICodec.class, protocol, configManager);
44+
codecs.put(protocol.toLowerCase(), codec);
45+
}
46+
47+
this.codecs = Collections.unmodifiableMap(codecs);
3748
}
3849

3950
@Override
4051
public ICodec getCodec(String format) {
4152

42-
if (plugins.isServiceAvailable(ICodec.class, format))
43-
return plugins.createService(ICodec.class, format, configManager);
53+
var protocol = format.toLowerCase();
54+
55+
if (codecs.containsKey(protocol)) {
4456

57+
return codecs.get(protocol);
58+
}
4559
else {
4660

4761
// Make a slightly prettier message that the regular plugin not available message

tracdap-libs/tracdap-lib-data/src/main/java/org/finos/tracdap/common/codec/ICodec.java

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
package org.finos.tracdap.common.codec;
1919

2020
import org.finos.tracdap.common.data.ArrowVsrSchema;
21+
import org.finos.tracdap.metadata.SchemaDefinition;
2122
import org.finos.tracdap.common.data.DataPipeline;
2223

2324
import org.apache.arrow.memory.BufferAllocator;
@@ -47,6 +48,15 @@ Encoder<?> getEncoder(
4748

4849
Decoder<?> getDecoder(
4950
BufferAllocator allocator,
50-
ArrowVsrSchema schema,
51+
Map<String, String> options);
52+
53+
Decoder<?> getDecoder(
54+
SchemaDefinition tracSchema,
55+
BufferAllocator allocator,
56+
Map<String, String> options);
57+
58+
Decoder<?> getDecoder(
59+
ArrowVsrSchema arrowSchema,
60+
BufferAllocator allocator,
5161
Map<String, String> options);
5262
}

0 commit comments

Comments
 (0)