@@ -136,20 +136,59 @@ public void TestUdfWithReturnAsMapType()
136
136
[ Fact ]
137
137
public void TestUdfWithRowType ( )
138
138
{
139
- Func < Column , Column > udf = Udf < Row , string > (
140
- ( row ) =>
141
- {
142
- string city = row . GetAs < string > ( "city" ) ;
143
- string state = row . GetAs < string > ( "state" ) ;
144
- return $ "{ city } ,{ state } ";
145
- } ) ;
139
+ // Single Row
140
+ {
141
+ Func < Column , Column > udf = Udf < Row , string > (
142
+ ( row ) => row . GetAs < string > ( "city" ) ) ;
146
143
147
- Row [ ] rows = _df . Select ( udf ( _df [ "info " ] ) ) . Collect ( ) . ToArray ( ) ;
148
- Assert . Equal ( 3 , rows . Length ) ;
144
+ Row [ ] rows = _df . Select ( udf ( _df [ "info1 " ] ) ) . Collect ( ) . ToArray ( ) ;
145
+ Assert . Equal ( 3 , rows . Length ) ;
149
146
150
- var expected = new [ ] { "Burdwan,Paschimbanga" , "Los Angeles,California" , "Seattle," } ;
151
- string [ ] actual = rows . Select ( x => x [ 0 ] . ToString ( ) ) . ToArray ( ) ;
152
- Assert . Equal ( expected , actual ) ;
147
+ var expected = new [ ] { "Burdwan" , "Los Angeles" , "Seattle" } ;
148
+ string [ ] actual = rows . Select ( x => x [ 0 ] . ToString ( ) ) . ToArray ( ) ;
149
+ Assert . Equal ( expected , actual ) ;
150
+ }
151
+
152
+ // Multiple Rows
153
+ {
154
+ Func < Column , Column , Column , Column > udf = Udf < Row , Row , string , string > (
155
+ ( row1 , row2 , str ) =>
156
+ {
157
+ string city = row1 . GetAs < string > ( "city" ) ;
158
+ string state = row2 . GetAs < string > ( "state" ) ;
159
+ return $ "{ str } :{ city } ,{ state } ";
160
+ } ) ;
161
+
162
+ Row [ ] rows = _df
163
+ . Select ( udf ( _df [ "info1" ] , _df [ "info2" ] , _df [ "name" ] ) )
164
+ . Collect ( )
165
+ . ToArray ( ) ;
166
+ Assert . Equal ( 3 , rows . Length ) ;
167
+
168
+ var expected = new [ ] {
169
+ "Michael:Burdwan,Paschimbanga" ,
170
+ "Andy:Los Angeles,California" ,
171
+ "Justin:Seattle,Washington" } ;
172
+ string [ ] actual = rows . Select ( x => x [ 0 ] . ToString ( ) ) . ToArray ( ) ;
173
+ Assert . Equal ( expected , actual ) ;
174
+ }
175
+
176
+ // Nested Row
177
+ {
178
+ Func < Column , Column > udf = Udf < Row , string > (
179
+ ( row ) =>
180
+ {
181
+ Row outerCol = row . GetAs < Row > ( "company" ) ;
182
+ return outerCol . GetAs < string > ( "job" ) ;
183
+ } ) ;
184
+
185
+ Row [ ] rows = _df . Select ( udf ( _df [ "info3" ] ) ) . Collect ( ) . ToArray ( ) ;
186
+ Assert . Equal ( 3 , rows . Length ) ;
187
+
188
+ var expected = new [ ] { "Developer" , "Developer" , "Developer" } ;
189
+ string [ ] actual = rows . Select ( x => x [ 0 ] . ToString ( ) ) . ToArray ( ) ;
190
+ Assert . Equal ( expected , actual ) ;
191
+ }
153
192
}
154
193
155
194
/// <summary>
@@ -168,14 +207,40 @@ public void TestUdfWithReturnAsRowType()
168
207
Func < Column , Column > udf = Udf < string > (
169
208
str => new GenericRow ( new object [ ] { 1 , "abc" } ) , schema ) ;
170
209
171
- Row [ ] rows = _df . Select ( udf ( _df [ "name" ] ) ) . Collect ( ) . ToArray ( ) ;
210
+ Row [ ] rows = _df . Select ( udf ( _df [ "name" ] ) . As ( "col" ) ) . Collect ( ) . ToArray ( ) ;
211
+ Assert . Equal ( 3 , rows . Length ) ;
212
+ foreach ( Row row in rows )
213
+ {
214
+ Assert . Equal ( 1 , row . Size ( ) ) ;
215
+ Row outerCol = row . GetAs < Row > ( "col" ) ;
216
+ Assert . Equal ( 2 , outerCol . Size ( ) ) ;
217
+ Assert . Equal ( 1 , outerCol . GetAs < int > ( "col1" ) ) ;
218
+ Assert . Equal ( "abc" , outerCol . GetAs < string > ( "col2" ) ) ;
219
+ }
220
+ }
221
+
222
+ // Generic row is a part of top-level column.
223
+ {
224
+ var schema = new StructType ( new [ ]
225
+ {
226
+ new StructField ( "col1" , new IntegerType ( ) )
227
+ } ) ;
228
+ Func < Column , Column > udf = Udf < string > (
229
+ str => new GenericRow ( new object [ ] { 111 } ) , schema ) ;
230
+
231
+ Column nameCol = _df [ "name" ] ;
232
+ Row [ ] rows = _df . Select ( udf ( nameCol ) . As ( "col" ) , nameCol ) . Collect ( ) . ToArray ( ) ;
172
233
Assert . Equal ( 3 , rows . Length ) ;
173
234
174
235
foreach ( Row row in rows )
175
236
{
176
237
Assert . Equal ( 2 , row . Size ( ) ) ;
177
- Assert . Equal ( 1 , row . GetAs < int > ( "col1" ) ) ;
178
- Assert . Equal ( "abc" , row . GetAs < string > ( "col2" ) ) ;
238
+ Row col1 = row . GetAs < Row > ( "col" ) ;
239
+ Assert . Equal ( 1 , col1 . Size ( ) ) ;
240
+ Assert . Equal ( 111 , col1 . GetAs < int > ( "col1" ) ) ;
241
+
242
+ string col2 = row . GetAs < string > ( "name" ) ;
243
+ Assert . NotEmpty ( col2 ) ;
179
244
}
180
245
}
181
246
@@ -211,21 +276,23 @@ public void TestUdfWithReturnAsRowType()
211
276
} ) ,
212
277
schema ) ;
213
278
214
- Row [ ] rows = _df . Select ( udf ( _df [ "name" ] ) ) . Collect ( ) . ToArray ( ) ;
279
+ Row [ ] rows = _df . Select ( udf ( _df [ "name" ] ) . As ( "col" ) ) . Collect ( ) . ToArray ( ) ;
215
280
Assert . Equal ( 3 , rows . Length ) ;
216
281
217
282
foreach ( Row row in rows )
218
283
{
219
- Assert . Equal ( 3 , row . Size ( ) ) ;
220
- Assert . Equal ( 1 , row . GetAs < int > ( "col1" ) ) ;
284
+ Assert . Equal ( 1 , row . Size ( ) ) ;
285
+ Row outerCol = row . GetAs < Row > ( "col" ) ;
286
+ Assert . Equal ( 3 , outerCol . Size ( ) ) ;
287
+ Assert . Equal ( 1 , outerCol . GetAs < int > ( "col1" ) ) ;
221
288
Assert . Equal (
222
289
new Row ( new object [ ] { 1 } , subSchema1 ) ,
223
- row . GetAs < Row > ( "col2" ) ) ;
290
+ outerCol . GetAs < Row > ( "col2" ) ) ;
224
291
Assert . Equal (
225
292
new Row (
226
293
new object [ ] { "abc" , new Row ( new object [ ] { 10 } , subSchema1 ) } ,
227
294
subSchema2 ) ,
228
- row . GetAs < Row > ( "col3" ) ) ;
295
+ outerCol . GetAs < Row > ( "col3" ) ) ;
229
296
}
230
297
}
231
298
}
0 commit comments