-
Notifications
You must be signed in to change notification settings - Fork 9
Expand file tree
/
Copy pathcsv_parser.go
More file actions
126 lines (114 loc) · 3.22 KB
/
csv_parser.go
File metadata and controls
126 lines (114 loc) · 3.22 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
package dsunit
import (
"bufio"
"github.com/viant/toolbox"
"io"
"path"
"reflect"
"strings"
)
var delimiterKeyword = "delimiter"
func convertValueIfNeeded(headers []string, headerTypes map[string]*reflect.Kind, rows *[][]interface{}) {
for i, row := range *rows {
for j, column := range headers {
if (*rows)[i][j] == nil {
continue
}
kindType := *headerTypes[column]
value := row[j]
if reflect.ValueOf(value).Kind() == kindType {
continue
}
(*rows)[i][j] = toolbox.AsString(value)
}
}
}
func ParseColumnarData(reader io.Reader, separator string) ([]string, [][]interface{}) {
var rows = make([][]interface{}, 0)
var headerTypes = make(map[string]*reflect.Kind)
scanner := bufio.NewScanner(reader)
var headers = make([]string, 0)
if scanner.Scan() {
for _, header := range strings.Split(scanner.Text(), separator) {
invalid := reflect.Invalid
headerTypes[header] = &invalid
headers = append(headers, header)
}
}
for scanner.Scan() {
var isInDoubleQuote = false
var index = 0
line := scanner.Text()
var fragment = ""
var row = make([]interface{}, len(headerTypes))
for i := 0; i < len(line) && index < len(row); i++ {
aChar := line[i : i+1]
//escape " only if value is already inside "s
if isInDoubleQuote && ((aChar == "\\" || aChar == "\"") && i+2 < len(line)) {
nextChar := line[i+1 : i+2]
if nextChar == "\"" {
i++
fragment = fragment + nextChar
continue
}
}
//allow unescaped " be inside text if the whole text is not enclosed in "s
if aChar == "\"" && (len(fragment) == 0 || isInDoubleQuote) {
isInDoubleQuote = !isInDoubleQuote
continue
}
if line[i:i+1] == separator && !isInDoubleQuote {
value, valueKind := toolbox.DiscoverValueAndKind(fragment)
row[index] = value
if *headerTypes[headers[index]] == reflect.Invalid && *headerTypes[headers[index]] != reflect.String {
headerTypes[headers[index]] = &valueKind
}
fragment = ""
index++
continue
}
fragment = fragment + aChar
}
if len(fragment) > 0 {
value, valueKind := toolbox.DiscoverValueAndKind(fragment)
row[index] = value
if *headerTypes[headers[index]] == reflect.Invalid && *headerTypes[headers[index]] != reflect.String {
headerTypes[headers[index]] = &valueKind
}
}
rows = append(rows, row)
}
convertValueIfNeeded(headers, headerTypes, &rows)
return headers, rows
}
func hasDelimiter(line, delimiter string, index int) (contains bool, indexIncrease int) {
if !(index+len(delimiter) <= len(line)) {
return false, 0
}
if line[index:index+len(delimiter)] == delimiter {
return true, len(delimiter) - 1
}
return false, 0
}
func convertToLowerUnderscore(upperCamelCase string) string {
if len(upperCamelCase) == 0 {
return ""
}
result := strings.ToLower(upperCamelCase[0:1])
for i := 1; i < len(upperCamelCase); i++ {
aChar := upperCamelCase[i : i+1]
if strings.ToUpper(aChar) == aChar && !(aChar >= "0" && aChar <= "9") {
result = result + "_" + strings.ToLower(aChar)
} else {
result = result + aChar
}
}
return result
}
func removeFileExtension(file string) string {
extensionLength := len(path.Ext(file))
if extensionLength > 0 {
return file[0 : len(file)-extensionLength]
}
return file
}