8
8
using System . Data . Common ;
9
9
using System . Globalization ;
10
10
using System . IO ;
11
+ using System . Linq ;
11
12
using System . Text ;
12
13
using System . Threading . Tasks ;
13
14
@@ -349,8 +350,8 @@ private static DataFrameColumn CreateColumn(Type kind, string[] columnNames, int
349
350
private static DataFrame ReadCsvLinesIntoDataFrame ( WrappedStreamReaderOrStringReader wrappedReader ,
350
351
char separator = ',' , bool header = true ,
351
352
string [ ] columnNames = null , Type [ ] dataTypes = null ,
352
- long numberOfRowsToRead = - 1 , int guessRows = 10 , bool addIndexColumn = false
353
- )
353
+ long numberOfRowsToRead = - 1 , int guessRows = 10 , bool addIndexColumn = false ,
354
+ bool renameDuplicatedColumns = false )
354
355
{
355
356
if ( dataTypes == null && guessRows <= 0 )
356
357
{
@@ -376,6 +377,25 @@ private static DataFrame ReadCsvLinesIntoDataFrame(WrappedStreamReaderOrStringRe
376
377
// First pass: schema and number of rows.
377
378
while ( ( fields = parser . ReadFields ( ) ) != null )
378
379
{
380
+ if ( renameDuplicatedColumns )
381
+ {
382
+ var names = new Dictionary < string , int > ( ) ;
383
+
384
+ for ( int i = 0 ; i < fields . Length ; i ++ )
385
+ {
386
+ if ( names . TryGetValue ( fields [ i ] , out int index ) )
387
+ {
388
+ var newName = String . Format ( "{0}.{1}" , fields [ i ] , index ) ;
389
+ names [ fields [ i ] ] = ++ index ;
390
+ fields [ i ] = newName ;
391
+ }
392
+ else
393
+ {
394
+ names . Add ( fields [ i ] , 1 ) ;
395
+ }
396
+ }
397
+ }
398
+
379
399
if ( ( numberOfRowsToRead == - 1 ) || rowline < numberOfRowsToRead )
380
400
{
381
401
if ( linesForGuessType . Count < guessRows || ( header && rowline == 0 ) )
@@ -524,12 +544,13 @@ public static DataFrame LoadCsvFromString(string csvString,
524
544
/// <param name="guessRows">number of rows used to guess types</param>
525
545
/// <param name="addIndexColumn">add one column with the row index</param>
526
546
/// <param name="encoding">The character encoding. Defaults to UTF8 if not specified</param>
547
+ /// <param name="renameDuplicatedColumns">If set to true, columns with repeated names are auto-renamed.</param>
527
548
/// <returns><see cref="DataFrame"/></returns>
528
549
public static DataFrame LoadCsv ( Stream csvStream ,
529
550
char separator = ',' , bool header = true ,
530
551
string [ ] columnNames = null , Type [ ] dataTypes = null ,
531
552
long numberOfRowsToRead = - 1 , int guessRows = 10 , bool addIndexColumn = false ,
532
- Encoding encoding = null )
553
+ Encoding encoding = null , bool renameDuplicatedColumns = false )
533
554
{
534
555
if ( ! csvStream . CanSeek )
535
556
{
@@ -542,7 +563,7 @@ public static DataFrame LoadCsv(Stream csvStream,
542
563
}
543
564
544
565
WrappedStreamReaderOrStringReader wrappedStreamReaderOrStringReader = new WrappedStreamReaderOrStringReader ( csvStream , encoding ?? Encoding . UTF8 ) ;
545
- return ReadCsvLinesIntoDataFrame ( wrappedStreamReaderOrStringReader , separator , header , columnNames , dataTypes , numberOfRowsToRead , guessRows , addIndexColumn ) ;
566
+ return ReadCsvLinesIntoDataFrame ( wrappedStreamReaderOrStringReader , separator , header , columnNames , dataTypes , numberOfRowsToRead , guessRows , addIndexColumn , renameDuplicatedColumns ) ;
546
567
}
547
568
548
569
/// <summary>
0 commit comments