-
Notifications
You must be signed in to change notification settings - Fork 9
Expand file tree
/
Copy pathparser.go
More file actions
95 lines (87 loc) · 2.44 KB
/
parser.go
File metadata and controls
95 lines (87 loc) · 2.44 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
package sv
import (
"bufio"
"github.com/viant/toolbox"
"reflect"
"strings"
)
//SeparatedValueParser represents separated value parser, it discover and convert undelying data
type SeparatedValueParser struct {
factory toolbox.DecoderFactory
delimiter string
}
func (p *SeparatedValueParser) Parse(data []byte) ([]map[string]interface{}, error) {
text := strings.Replace(string(data), "\r", "", len(data))
scanner := bufio.NewScanner(strings.NewReader(text))
record := &toolbox.DelimitedRecord{Delimiter: p.delimiter}
if scanner.Scan() {
p.factory.Create(strings.NewReader(scanner.Text())).Decode(record)
}
var result = make([]map[string]interface{}, 0)
var i = 0
var hasFirstEmptyRow = false
for scanner.Scan() {
var line = scanner.Text()
if strings.TrimSpace(line) == "" {
continue
}
if i < 2 && strings.Count(line, p.delimiter) == len(line) {
continue
hasFirstEmptyRow = true
}
i++
record.Record = make(map[string]interface{})
if err := p.factory.Create(strings.NewReader(line)).Decode(record); err != nil {
return nil, err
}
result = append(result, record.Record)
}
p.discoverDataTypes(record.Columns, result)
if hasFirstEmptyRow {
result = append([]map[string]interface{}{{}}, result...)
}
return result, nil
}
func (p *SeparatedValueParser) discoverDataTypes(columns []string, records []map[string]interface{}) {
var columnKinds = make(map[string]reflect.Kind)
for _, column := range columns {
columnKinds[column] = reflect.Invalid
}
for _, record := range records {
for column, value := range record {
if columnKinds[column] == reflect.String {
continue
}
textValue := toolbox.AsString(value)
if textValue == "" {
continue
}
discovered, kind := toolbox.DiscoverValueAndKind(textValue)
record[column] = discovered
if kind == reflect.Int && columnKinds[column] == reflect.Float64 {
continue
}
columnKinds[column] = kind
}
}
for _, record := range records {
for column, value := range record {
switch columnKinds[column] {
case reflect.Int:
record[column] = toolbox.AsInt(value)
case reflect.Float64:
record[column] = toolbox.AsFloat(value)
case reflect.Bool:
record[column] = toolbox.AsBoolean(value)
default:
record[column] = toolbox.AsString(value)
}
}
}
}
func NewSeparatedValueParser(delimiter string) *SeparatedValueParser {
return &SeparatedValueParser{
delimiter: delimiter,
factory: toolbox.NewDelimiterDecoderFactory(),
}
}