dotnet · imback82 · Jan 10, 2020 · Nov 15, 2019 · Nov 15, 2019 · Nov 15, 2019
diff --git a/examples/Microsoft.Spark.CSharp.Examples/Sql/Batch/Basic.cs b/examples/Microsoft.Spark.CSharp.Examples/Sql/Batch/Basic.cs
@@ -3,6 +3,7 @@
 // See the LICENSE file in the project root for more information.
 
 using System;
+using System.Collections;
 using System.Collections.Generic;
 using Microsoft.Spark.Sql;
 using Microsoft.Spark.Sql.Types;
@@ -30,9 +31,9 @@ public void Run(string[] args)
                 .Config("spark.some.config.option", "some-value")
                 .GetOrCreate();
 
-            // Need to explicitly specify the schema since pickling vs. arrow formatting
-            // will return different types. Pickling will turn longs into ints if the values fit.
-            // Same as the "age INT, name STRING" DDL-format string.
+            // Need to explicitly specify the schema since pickling vs.arrow formatting
+            // will return different types.Pickling will turn longs into ints if the values fit.
+            // Same as the "age INT, name STRING" DDL - format string.
             var inputSchema = new StructType(new[]
             {
                 new StructField("age", new IntegerType()),
@@ -107,7 +108,7 @@ public void Run(string[] args)
             joinedDf2.Show();
 
             DataFrame joinedDf3 = df.Join(df, df["name"] == df["name"], "outer");
-            joinedDf3.Show();
+            joinedDf3.Show();                     
 
             spark.Stop();
         }

diff --git a/src/csharp/Microsoft.Spark.UnitTest/Sql/GenericRowTests.cs b/src/csharp/Microsoft.Spark.UnitTest/Sql/GenericRowTests.cs
@@ -0,0 +1,44 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using Microsoft.Spark.Interop.Ipc;
+using Microsoft.Spark.Network;
+using Microsoft.Spark.Sql;
+using Microsoft.Spark.Sql.Types;
+using Microsoft.Spark.UnitTest.TestUtils;
+using Microsoft.Spark.Utils;
+using Moq;
+using Razorvine.Pickle;
+using Xunit;
+
+namespace Microsoft.Spark.UnitTest
+{
+    public class GenericRowTests
+    {        
+        [Fact]
+        public void GenericRowTest()
+        {            
+            var row = new GenericRow(new object[] { 1, "abc" });
+
+            // Validate Size().
+            Assert.Equal(2, row.Size());
+
+            // Validate [] operator.
+            Assert.Equal(1, row[0]);
+            Assert.Equal("abc", row[1]);
+
+            // Validate Get*(int).
+            Assert.Equal(1, row.Get(0));
+            Assert.Equal("abc", row.Get(1));
+            Assert.Equal(1, row.GetAs<int>(0));
+            Assert.ThrowsAny<Exception>(() => row.GetAs<string>(0));
+            Assert.Equal("abc", row.GetAs<string>(1));
+            Assert.ThrowsAny<Exception>(() => row.GetAs<int>(1));            
+        }        
+    }
+}
diff --git a/src/csharp/Microsoft.Spark/Interop/Ipc/PayloadHelper.cs b/src/csharp/Microsoft.Spark/Interop/Ipc/PayloadHelper.cs
@@ -8,6 +8,7 @@
 using System.Collections.Generic;
 using System.IO;
 using System.Linq;
+using Microsoft.Spark.Sql;
 
 namespace Microsoft.Spark.Interop.Ipc
 {
@@ -25,6 +26,7 @@ internal class PayloadHelper
         private static readonly byte[] s_byteArrayTypeId = new[] { (byte)'r' };
         private static readonly byte[] s_intArrayTypeId = new[] { (byte)'l' };
         private static readonly byte[] s_dictionaryTypeId = new[] { (byte)'e' };
+        private static readonly byte[] s_rowArrTypeId = new[] { (byte)'R' };
 
         private static readonly ConcurrentDictionary<Type, bool> s_isDictionaryTable =
             new ConcurrentDictionary<Type, bool>();
@@ -231,6 +233,15 @@ internal static void ConvertArgsToBytes(
                                 SerDe.Write(destination, argProvider.Reference.Id);
                                 break;
 
+                            case IEnumerable<GenericRow> argRowArray:
+                                SerDe.Write(destination, (int)argRowArray.Count());
+                                foreach (GenericRow r in argRowArray)
+                                {
+                                    SerDe.Write(destination, (int)r.Values.Length);
+                                    ConvertArgsToBytes(destination, r.Values, true);                                    
+                                }
+                                break;
+
                             default:
                                 throw new NotSupportedException(
                                     string.Format($"Type {arg.GetType()} is not supported"));
@@ -283,6 +294,11 @@ internal static byte[] GetTypeId(Type type)
                     {
                         return s_intArrayTypeId;
                     }
+
+                    if (type == typeof(IEnumerable<GenericRow>))                        
+                    {
+                        return s_rowArrTypeId;
+                    }
                     break;
             }
 

diff --git a/src/csharp/Microsoft.Spark/Sql/GenericRow.cs b/src/csharp/Microsoft.Spark/Sql/GenericRow.cs
@@ -0,0 +1,168 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using Microsoft.Spark.Sql.Types;
+
+namespace Microsoft.Spark.Sql
+{
+    /// <summary>
+    /// Represents a row object in RDD, equivalent to GenericRow in Spark.
+    /// </summary>
+    public sealed class GenericRow
+    {
+        /// <summary>
+        /// Constructor for the Row class.
+        /// </summary>
+        /// <param name="values">Column values for a row</param>        
+        internal GenericRow(object[] values)
+        {
+            Values = values;   
+            //TODO:
+            //Convert() -> implement type checking for not implemented exception
+        }
+
+        /// <summary>
+        /// Values representing this row.
+        /// </summary>
+        public object[] Values { get; }
+
+        /// <summary>
+        /// Returns the number of columns in this row.
+        /// </summary>
+        /// <returns>Number of columns in this row</returns>
+        public int Size() => Values.Length;
+
+        /// <summary>
+        /// Returns the column value at the given index.
+        /// </summary>
+        /// <param name="index">Index to look up</param>
+        /// <returns>A column value</returns>
+        public object this[int index] => Get(index);
+
+        /// <summary>
+        /// Returns the column value at the given index.
+        /// </summary>
+        /// <param name="index">Index to look up</param>
+        /// <returns>A column value</returns>
+        public object Get(int index)
+        {
+            if (index >= Size())
+            {
+                throw new IndexOutOfRangeException($"index ({index}) >= column counts ({Size()})");
+            }
+            else if (index < 0)
+            {
+                throw new IndexOutOfRangeException($"index ({index}) < 0)");
+            }
+
+            return Values[index];
+        }
+
+        ///// <summary>
+        ///// Returns the column value whose column name is given.
+        ///// </summary>
+        ///// <param name="columnName">Column name to look up</param>
+        ///// <returns>A column value</returns>
+        //public object Get(string columnName) =>
+        //    Get(Schema.Fields.FindIndex(f => f.Name == columnName));
+
+        /// <summary>
+        /// Returns the string version of this row.
+        /// </summary>
+        /// <returns>String version of this row</returns>
+        public override string ToString()
+        {
+            var cols = new List<string>();
+            foreach (object item in Values)
+            {
+                cols.Add(item?.ToString() ?? string.Empty);
+            }
+
+            return $"[{(string.Join(",", cols.ToArray()))}]";
+        }
+
+        /// <summary>
+        /// Returns the column value at the given index, as a type T.
+        /// TODO: If the original type is "long" and its value can be
+        /// fit into the "int", Pickler will serialize the value as int.
+        /// Since the value is boxed, <see cref="GetAs{T}(int)"/> will throw an exception.
+        /// </summary>
+        /// <typeparam name="T">Type to convert to</typeparam>
+        /// <param name="index">Index to look up</param>
+        /// <returns>A column value as a type T</returns>
+        public T GetAs<T>(int index) => (T)Get(index);
+
+        ///// <summary>
+        ///// Returns the column value whose column name is given, as a type T.
+        ///// TODO: If the original type is "long" and its value can be
+        ///// fit into the "int", Pickler will serialize the value as int.
+        ///// Since the value is boxed, <see cref="GetAs{T}(string)"/> will throw an exception.
+        ///// </summary>
+        ///// <typeparam name="T">Type to convert to</typeparam>
+        ///// <param name="columnName">Column name to look up</param>
+        ///// <returns>A column value as a type T</returns>
+        //public T GetAs<T>(string columnName) => (T)Get(columnName);
+
+        /// <summary>
+        /// Checks if the given object is same as the current object.
+        /// </summary>
+        /// <param name="obj">Other object to compare against</param>
+        /// <returns>True if the other object is equal.</returns>
+        public override bool Equals(object obj)
+        {
+            if (obj is null)
+            {
+                return false;
+            }
+
+            if (ReferenceEquals(this, obj))
+            {
+                return true;
+            }
+
+            if (obj is Row otherRow)
+            {
+                return Values.SequenceEqual(otherRow.Values);
+            }
+
+            return false;
+        }
+
+        /// <summary>
+        /// Returns the hash code of the current object.
+        /// </summary>
+        /// <returns>The hash code of the current object</returns>
+        public override int GetHashCode() => base.GetHashCode();
+
+        //TODO:
+        ///// <summary>
+        ///// Converts the values to .NET values. Currently, only the simple types such as
+        ///// int, string, etc. are supported (which are already converted correctly by
+        ///// the Pickler). Note that explicit type checks against the schema are not performed.
+        ///// </summary>
+        //private void Convert()
+        //{
+        //    foreach (object val in Values)
+        //    {
+        //        TypeCode valType = Type.GetTypeCode(val.GetType());
+        //        if (valType == TypeCode.Object)
+        //        {
+        //            switch (valType)
+        //            {
+        //                case object[]:
+        //                    SerDe.Write(destination, (int)arg);
+        //                    break;
+
+        //                case TypeCode.Int64:
+        //                    SerDe.Write(destination, (long)arg);
+        //                    break;
+        //            }
+        //        }
+        //    }
+        //}
+    }
+}
diff --git a/src/csharp/Microsoft.Spark/Sql/Row.cs b/src/csharp/Microsoft.Spark/Sql/Row.cs
@@ -23,7 +23,6 @@ internal Row(object[] values, StructType schema)
         {
             Values = values;
             Schema = schema;
-
             var schemaColumnCount = Schema.Fields.Count;
             if (Size() != schemaColumnCount)
             {

diff --git a/src/csharp/Microsoft.Spark/Sql/SparkSession.cs b/src/csharp/Microsoft.Spark/Sql/SparkSession.cs
@@ -3,10 +3,13 @@
 // See the LICENSE file in the project root for more information.
 
 using System;
+using System.Collections.Generic;
+using System.Runtime.InteropServices.ComTypes;
 using Microsoft.Spark.Interop;
 using Microsoft.Spark.Interop.Internal.Scala;
 using Microsoft.Spark.Interop.Ipc;
 using Microsoft.Spark.Sql.Streaming;
+using Microsoft.Spark.Sql.Types;
 
 namespace Microsoft.Spark.Sql
 {
@@ -136,6 +139,15 @@ public SparkSession NewSession() =>
         public DataFrame Table(string tableName) =>
             new DataFrame((JvmObjectReference)_jvmObject.Invoke("table", tableName));
 
+        /// <summary>
+        /// Returns a dataframe as per the schema and data.
+        /// </summary>
+        /// <param name="data">List of Row objects</param>
+        /// <param name="schema">Schema as StructType</param>
+        /// <returns>DataFrame object</returns>
+        public DataFrame CreateDataFrame(IEnumerable<GenericRow> data, StructType schema) =>
+            new DataFrame((JvmObjectReference)_jvmObject.Invoke("createDataFrame", data, DataType.FromJson(_jvmObject.Jvm, schema.Json)));                
+
         /// <summary>
         /// Executes a SQL query using Spark, returning the result as a DataFrame.
         /// </summary>

diff --git a/src/scala/microsoft-spark-2.3.x/src/main/scala/org/apache/spark/api/dotnet/SerDe.scala b/src/scala/microsoft-spark-2.3.x/src/main/scala/org/apache/spark/api/dotnet/SerDe.scala
@@ -9,7 +9,12 @@ package org.apache.spark.api.dotnet
 import java.io.{DataInputStream, DataOutputStream}
 import java.sql.{Date, Time, Timestamp}
 
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.catalyst.expressions.GenericRow
+
 import scala.collection.JavaConverters._
+import scala.collection.JavaConversions._
+import scala.collection.mutable.ListBuffer
 
 /**
  * Functions to serialize and deserialize between CLR & JVM.
@@ -39,6 +44,7 @@ object SerDe {
       case 'D' => readDate(dis)
       case 't' => readTime(dis)
       case 'j' => JVMObjectTracker.getObject(readString(dis))
+      case 'R' => readRowArr(dis)
       case _ => throw new IllegalArgumentException(s"Invalid type $dataType")
     }
   }
@@ -90,6 +96,16 @@ object SerDe {
     t
   }
 
+    def readRow(in: DataInputStream): Row = {
+        val rowLen = readInt(in)
+        var rowValues: ListBuffer[Any] = ListBuffer()
+        for ( j <- 0 until rowLen) {
+            val elemType = readObjectType(in)
+            rowValues += readTypedObject(in, elemType)
+        }
+        Row.fromSeq(rowValues.toList)
+    }
+
   def readBytesArr(in: DataInputStream): Array[Array[Byte]] = {
     val len = readInt(in)
     (0 until len).map(_ => readBytes(in)).toArray
@@ -120,6 +136,15 @@ object SerDe {
     (0 until len).map(_ => readString(in)).toArray
   }
 
+  def readRowArr(in: DataInputStream): java.util.List[Row] = {
+    val arrLen = readInt(in)
+    val arr = new Array[Row](arrLen)
+    for ( i <- 0 until arrLen) {
+        arr(i) = readRow(in)
+    }
+    ListBuffer(arr: _*)
+  }
+
   def readList(dis: DataInputStream): Array[_] = {
     val arrType = readObjectType(dis)
     arrType match {

diff --git a/...crosoft-spark-2.4.x/src/main/scala/org/apache/spark/api/dotnet/DotnetBackendHandler.scala b/...crosoft-spark-2.4.x/src/main/scala/org/apache/spark/api/dotnet/DotnetBackendHandler.scala
@@ -222,7 +222,7 @@ class DotnetBackendHandler(server: DotnetBackend)
       return false
     }
 
-    for (i <- 0 to numArgs - 1) {
+    for (i <- 0 until numArgs) {
       val parameterType = parameterTypes(i)
       var parameterWrapperType = parameterType