Skip to content

Commit 625ed54

Browse files
committed
Allow input file read ahead
Input file now pre-reads N requests, sort them by timestamp and emit on demand. You can control read depth using --input-file-read-depth which is 100 by default. It makes implementaiton faster, and it fix various issues when due to concurrenccy, or another issues requests gets addeed out of order.
1 parent 889c1e6 commit 625ed54

File tree

8 files changed

+131
-38
lines changed

8 files changed

+131
-38
lines changed

emitter.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@ func CopyMulty(src PluginReader, writers ...PluginWriter) error {
152152
}
153153
} else {
154154
for _, dst := range writers {
155-
if _, err := dst.PluginWrite(msg); err != nil {
155+
if _, err := dst.PluginWrite(msg); err != nil && err != io.ErrClosedPipe {
156156
return err
157157
}
158158
}

input_file.go

Lines changed: 114 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"bufio"
55
"bytes"
66
"compress/gzip"
7+
"container/heap"
78
"errors"
89
"fmt"
910
"io"
@@ -20,49 +21,123 @@ import (
2021
"github.com/aws/aws-sdk-go/service/s3"
2122
)
2223

24+
type filePayload struct {
25+
data []byte
26+
timestamp int64
27+
}
28+
29+
// An IntHeap is a min-heap of ints.
30+
type payloadQueue struct {
31+
sync.RWMutex
32+
s []*filePayload
33+
}
34+
35+
func (h payloadQueue) Len() int { return len(h.s) }
36+
func (h payloadQueue) Less(i, j int) bool { return h.s[i].timestamp < h.s[j].timestamp }
37+
func (h payloadQueue) Swap(i, j int) { h.s[i], h.s[j] = h.s[j], h.s[i] }
38+
39+
func (h *payloadQueue) Push(x interface{}) {
40+
// Push and Pop use pointer receivers because they modify the slice's length,
41+
// not just its contents.
42+
h.s = append(h.s, x.(*filePayload))
43+
}
44+
45+
func (h *payloadQueue) Pop() interface{} {
46+
old := h.s
47+
n := len(old)
48+
x := old[n-1]
49+
h.s = old[0 : n-1]
50+
return x
51+
}
52+
53+
func (h payloadQueue) Idx(i int) *filePayload {
54+
h.RLock()
55+
defer h.RUnlock()
56+
57+
return h.s[i]
58+
}
59+
2360
type fileInputReader struct {
2461
reader *bufio.Reader
25-
data []byte
2662
file io.ReadCloser
27-
timestamp int64
2863
closed int32 // Value of 0 indicates that the file is still open.
2964
s3 bool
65+
queue payloadQueue
66+
readDepth int
3067
}
3168

32-
func (f *fileInputReader) parseNext() error {
69+
func (f *fileInputReader) parse(init chan struct{}) error {
3370
payloadSeparatorAsBytes := []byte(payloadSeparator)
3471
var buffer bytes.Buffer
72+
var initialized bool
73+
3574
for {
3675
line, err := f.reader.ReadBytes('\n')
3776

3877
if err != nil {
3978
if err != io.EOF {
4079
Debug(1, err)
41-
} else {
42-
f.Close()
4380
}
81+
82+
f.Close()
83+
84+
if !initialized {
85+
close(init)
86+
initialized = true
87+
}
88+
4489
return err
4590
}
4691

4792
if bytes.Equal(payloadSeparatorAsBytes[1:], line) {
4893
asBytes := buffer.Bytes()
4994
meta := payloadMeta(asBytes)
5095

51-
f.timestamp, _ = strconv.ParseInt(string(meta[2]), 10, 64)
52-
f.data = asBytes[:len(asBytes)-1]
96+
timestamp, _ := strconv.ParseInt(string(meta[2]), 10, 64)
97+
data := asBytes[:len(asBytes)-1]
5398

54-
return nil
99+
f.queue.Lock()
100+
heap.Push(&f.queue, &filePayload{
101+
timestamp: timestamp,
102+
data: data,
103+
})
104+
f.queue.Unlock()
105+
106+
for {
107+
if f.queue.Len() < f.readDepth {
108+
break
109+
}
110+
111+
if !initialized {
112+
close(init)
113+
initialized = true
114+
}
115+
116+
time.Sleep(100 * time.Millisecond)
117+
}
118+
119+
buffer = bytes.Buffer{}
120+
continue
55121
}
56122

57123
buffer.Write(line)
58124
}
59-
60125
}
61126

62-
func (f *fileInputReader) ReadPayload() []byte {
63-
defer f.parseNext()
127+
func (f *fileInputReader) wait() {
128+
for {
129+
if atomic.LoadInt32(&f.closed) == 1 {
130+
return
131+
}
132+
133+
if f.queue.Len() > 0 {
134+
return
135+
}
136+
137+
time.Sleep(100 * time.Millisecond)
138+
}
64139

65-
return f.data
140+
return
66141
}
67142

68143
// Close closes this plugin
@@ -75,7 +150,7 @@ func (f *fileInputReader) Close() error {
75150
return nil
76151
}
77152

78-
func newFileInputReader(path string) *fileInputReader {
153+
func newFileInputReader(path string, readDepth int) *fileInputReader {
79154
var file io.ReadCloser
80155
var err error
81156

@@ -90,7 +165,7 @@ func newFileInputReader(path string) *fileInputReader {
90165
return nil
91166
}
92167

93-
r := &fileInputReader{file: file, closed: 0}
168+
r := &fileInputReader{file: file, closed: 0, readDepth: readDepth}
94169
if strings.HasSuffix(path, ".gz") {
95170
gzReader, err := gzip.NewReader(file)
96171
if err != nil {
@@ -102,7 +177,11 @@ func newFileInputReader(path string) *fileInputReader {
102177
r.reader = bufio.NewReader(file)
103178
}
104179

105-
r.parseNext()
180+
heap.Init(&r.queue)
181+
182+
init := make(chan struct{})
183+
go r.parse(init)
184+
<-init
106185

107186
return r
108187
}
@@ -116,16 +195,18 @@ type FileInput struct {
116195
readers []*fileInputReader
117196
speedFactor float64
118197
loop bool
198+
readDepth int
119199
}
120200

121201
// NewFileInput constructor for FileInput. Accepts file path as argument.
122-
func NewFileInput(path string, loop bool) (i *FileInput) {
202+
func NewFileInput(path string, loop bool, readDepth int) (i *FileInput) {
123203
i = new(FileInput)
124204
i.data = make(chan []byte, 1000)
125205
i.exit = make(chan bool)
126206
i.path = path
127207
i.speedFactor = 1
128208
i.loop = loop
209+
i.readDepth = readDepth
129210

130211
if err := i.init(); err != nil {
131212
return
@@ -176,7 +257,7 @@ func (i *FileInput) init() (err error) {
176257
i.readers = make([]*fileInputReader, len(matches))
177258

178259
for idx, p := range matches {
179-
i.readers[idx] = newFileInputReader(p)
260+
i.readers[idx] = newFileInputReader(p, i.readDepth)
180261
}
181262

182263
return nil
@@ -201,11 +282,17 @@ func (i *FileInput) String() string {
201282
// Find reader with smallest timestamp e.g next payload in row
202283
func (i *FileInput) nextReader() (next *fileInputReader) {
203284
for _, r := range i.readers {
204-
if r == nil || atomic.LoadInt32(&r.closed) != 0 {
285+
if r == nil {
205286
continue
206287
}
207288

208-
if next == nil || r.timestamp < next.timestamp {
289+
r.wait()
290+
291+
if r.queue.Len() == 0 {
292+
continue
293+
}
294+
295+
if next == nil || r.queue.Idx(0).timestamp > next.queue.Idx(0).timestamp {
209296
next = r
210297
continue
211298
}
@@ -236,27 +323,31 @@ func (i *FileInput) emit() {
236323
}
237324
}
238325

326+
reader.queue.RLock()
327+
payload := heap.Pop(&reader.queue).(*filePayload)
328+
reader.queue.RUnlock()
329+
239330
if lastTime != -1 {
240-
diff := reader.timestamp - lastTime
331+
diff := payload.timestamp - lastTime
241332

242333
if i.speedFactor != 1 {
243334
diff = int64(float64(diff) / i.speedFactor)
244335
}
245336

246337
if diff >= 0 {
247-
lastTime = reader.timestamp
338+
lastTime = payload.timestamp
248339
time.Sleep(time.Duration(diff))
249340
}
250341
} else {
251-
lastTime = reader.timestamp
342+
lastTime = payload.timestamp
252343
}
253344

254345
// Recheck if we have exited since last check.
255346
select {
256347
case <-i.exit:
257348
return
258349
default:
259-
i.data <- reader.ReadPayload()
350+
i.data <- payload.data
260351
}
261352
}
262353

input_file_test.go

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ func TestInputFileMultipleFilesWithRequestsOnly(t *testing.T) {
104104
file2.Write([]byte(payloadSeparator))
105105
file2.Close()
106106

107-
input := NewFileInput(fmt.Sprintf("/tmp/%d*", rnd), false)
107+
input := NewFileInput(fmt.Sprintf("/tmp/%d*", rnd), false, 100)
108108

109109
for i := '1'; i <= '4'; i++ {
110110
msg, _ := input.PluginRead()
@@ -130,7 +130,7 @@ func TestInputFileRequestsWithLatency(t *testing.T) {
130130
file.Write([]byte("1 3 250000000\nrequest3"))
131131
file.Write([]byte(payloadSeparator))
132132

133-
input := NewFileInput(fmt.Sprintf("/tmp/%d", rnd), false)
133+
input := NewFileInput(fmt.Sprintf("/tmp/%d", rnd), false, 100)
134134

135135
start := time.Now().UnixNano()
136136
for i := 0; i < 3; i++ {
@@ -170,7 +170,7 @@ func TestInputFileMultipleFilesWithRequestsAndResponses(t *testing.T) {
170170
file2.Write([]byte(payloadSeparator))
171171
file2.Close()
172172

173-
input := NewFileInput(fmt.Sprintf("/tmp/%d*", rnd), false)
173+
input := NewFileInput(fmt.Sprintf("/tmp/%d*", rnd), false, 100)
174174

175175
for i := '1'; i <= '4'; i++ {
176176
msg, _ := input.PluginRead()
@@ -198,7 +198,7 @@ func TestInputFileLoop(t *testing.T) {
198198
file.Write([]byte(payloadSeparator))
199199
file.Close()
200200

201-
input := NewFileInput(fmt.Sprintf("/tmp/%d", rnd), true)
201+
input := NewFileInput(fmt.Sprintf("/tmp/%d", rnd), true, 100)
202202

203203
// Even if we have just 2 requests in file, it should indifinitly loop
204204
for i := 0; i < 1000; i++ {
@@ -226,7 +226,7 @@ func TestInputFileCompressed(t *testing.T) {
226226
name2 := output2.file.Name()
227227
output2.Close()
228228

229-
input := NewFileInput(fmt.Sprintf("/tmp/%d*", rnd), false)
229+
input := NewFileInput(fmt.Sprintf("/tmp/%d*", rnd), false, 100)
230230
for i := 0; i < 2000; i++ {
231231
input.PluginRead()
232232
}
@@ -326,7 +326,7 @@ func CreateCaptureFile(requestGenerator *RequestGenerator) *CaptureFile {
326326
func ReadFromCaptureFile(captureFile *os.File, count int, callback writeCallback) (err error) {
327327
wg := new(sync.WaitGroup)
328328

329-
input := NewFileInput(captureFile.Name(), false)
329+
input := NewFileInput(captureFile.Name(), false, 100)
330330
output := NewTestOutput(func(msg *Message) {
331331
callback(msg)
332332
wg.Done()

output_dummy.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ func (i *DummyOutput) PluginWrite(msg *Message) (int, error) {
2424
n += nn
2525
nn, err = os.Stdout.Write(payloadSeparatorAsBytes)
2626
n += nn
27+
2728
return n, err
2829
}
2930

output_file_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ func TestFileOutput(t *testing.T) {
3939
emitter.Close()
4040

4141
var counter int64
42-
input2 := NewFileInput("/tmp/test_requests.gor", false)
42+
input2 := NewFileInput("/tmp/test_requests.gor", false, 100)
4343
output2 := NewTestOutput(func(*Message) {
4444
atomic.AddInt64(&counter, 1)
4545
wg.Done()

plugins.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,6 @@ func (plugins *InOutPlugins) registerPlugin(constructor interface{}, options ...
8383
plugins.Outputs = append(plugins.Outputs, w)
8484
}
8585
plugins.All = append(plugins.All, plugin)
86-
8786
}
8887

8988
// NewPlugins specify and initialize all available plugins
@@ -119,7 +118,7 @@ func NewPlugins() *InOutPlugins {
119118
}
120119

121120
for _, options := range Settings.InputFile {
122-
plugins.registerPlugin(NewFileInput, options, Settings.InputFileLoop)
121+
plugins.registerPlugin(NewFileInput, options, Settings.InputFileLoop, Settings.InputFileReadDepth)
123122
}
124123

125124
for _, path := range Settings.OutputFile {

s3_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@ func TestInputFileFromS3(t *testing.T) {
127127
<-output.closeCh
128128
}
129129

130-
input := NewFileInput(fmt.Sprintf("s3://test-gor-eu/%d", rnd), false)
130+
input := NewFileInput(fmt.Sprintf("s3://test-gor-eu/%d", rnd, 100), false)
131131

132132
buf := make([]byte, 1000)
133133
for i := 0; i <= 19999; i++ {

settings.go

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -45,10 +45,11 @@ type AppSettings struct {
4545
OutputTCPConfig TCPOutputConfig
4646
OutputTCPStats bool `json:"output-tcp-stats"`
4747

48-
InputFile MultiOption `json:"input-file"`
49-
InputFileLoop bool `json:"input-file-loop"`
50-
OutputFile MultiOption `json:"output-file"`
51-
OutputFileConfig FileOutputConfig
48+
InputFile MultiOption `json:"input-file"`
49+
InputFileLoop bool `json:"input-file-loop"`
50+
InputFileReadDepth int `json:"input-file-read-depth"`
51+
OutputFile MultiOption `json:"output-file"`
52+
OutputFileConfig FileOutputConfig
5253

5354
InputRAW MultiOption `json:"input_raw"`
5455
RAWInputConfig
@@ -113,6 +114,7 @@ func init() {
113114

114115
flag.Var(&Settings.InputFile, "input-file", "Read requests from file: \n\tgor --input-file ./requests.gor --output-http staging.com")
115116
flag.BoolVar(&Settings.InputFileLoop, "input-file-loop", false, "Loop input files, useful for performance testing.")
117+
flag.IntVar(&Settings.InputFileReadDepth, "input-file-read-depth", 100, "GoReplay tries to read and cache multiple records, in advance. In parallel it also perform sorting of requests, if they came out of order. Since it needs hold this buffer in memory, bigger values can cause worse performance")
116118

117119
flag.Var(&Settings.OutputFile, "output-file", "Write incoming requests to file: \n\tgor --input-raw :80 --output-file ./requests.gor")
118120
flag.DurationVar(&Settings.OutputFileConfig.FlushInterval, "output-file-flush-interval", time.Second, "Interval for forcing buffer flush to the file, default: 1s.")

0 commit comments

Comments
 (0)