@@ -39,8 +39,8 @@ type WALConfig struct {
39
39
40
40
// RegisterFlags adds the flags required to config this to the given FlagSet
41
41
func (cfg * WALConfig ) RegisterFlags (f * flag.FlagSet ) {
42
- f .BoolVar (& cfg .walEnabled , "ingester.wal-enable " , false , "Enable the WAL." )
43
- f .BoolVar (& cfg .checkpointEnabled , "ingester.checkpoint-enable " , false , "Enable checkpointing." )
42
+ f .BoolVar (& cfg .walEnabled , "ingester.wal-enabled " , false , "Enable the WAL." )
43
+ f .BoolVar (& cfg .checkpointEnabled , "ingester.checkpoint-enabled " , false , "Enable checkpointing." )
44
44
f .BoolVar (& cfg .recover , "ingester.recover-from-wal" , false , "Recover data from existing WAL." )
45
45
f .StringVar (& cfg .dir , "ingester.wal-dir" , "wal" , "Directory to store the WAL." )
46
46
f .DurationVar (& cfg .checkpointDuration , "ingester.checkpoint-duration" , 1 * time .Hour , "Duration over which to checkpoint." )
@@ -437,10 +437,17 @@ func segmentsExist(dir string) (bool, error) {
437
437
// processCheckpoint loads the chunks of the series present in the last checkpoint.
438
438
func processCheckpoint (name string , userStates * userStates , nWorkers int ,
439
439
stateCache []map [string ]* userState , seriesCache []map [string ]map [uint64 ]* memorySeries ) error {
440
+
441
+ reader , closer , err := newWalReader (name , - 1 )
442
+ if err != nil {
443
+ return err
444
+ }
445
+ defer closer .Close ()
446
+
440
447
var (
441
448
inputs = make ([]chan * Series , nWorkers )
442
449
// errChan is to capture the errors from goroutine.
443
- // The channel size is nWorkers to not block any worker if all of them error out.
450
+ // The channel size is nWorkers+1 to not block any worker if all of them error out.
444
451
errChan = make (chan error , nWorkers )
445
452
wg = sync.WaitGroup {}
446
453
seriesPool = & sync.Pool {
@@ -450,12 +457,6 @@ func processCheckpoint(name string, userStates *userStates, nWorkers int,
450
457
}
451
458
)
452
459
453
- reader , closer , err := newWalReader (name , - 1 )
454
- if err != nil {
455
- return err
456
- }
457
- defer closer .Close ()
458
-
459
460
wg .Add (nWorkers )
460
461
for i := 0 ; i < nWorkers ; i ++ {
461
462
inputs [i ] = make (chan * Series , 300 )
@@ -465,12 +466,15 @@ func processCheckpoint(name string, userStates *userStates, nWorkers int,
465
466
}(inputs [i ], stateCache [i ], seriesCache [i ])
466
467
}
467
468
468
- var errFromChan error
469
+ var capturedErr error
469
470
Loop:
470
471
for reader .Next () {
471
472
s := seriesPool .Get ().(* Series )
472
473
if err := proto .Unmarshal (reader .Record (), s ); err != nil {
473
- return err
474
+ // We don't return here in order to close/drain all the channels and
475
+ // make sure all goroutines exit.
476
+ capturedErr = err
477
+ break Loop
474
478
}
475
479
// The yoloString from the unmarshal of LabelAdapter gets corrupted
476
480
// when travelling through the channel. Hence making a copy of that.
@@ -479,7 +483,7 @@ Loop:
479
483
s .Labels = copyLabelAdapters (s .Labels )
480
484
481
485
select {
482
- case errFromChan = <- errChan :
486
+ case capturedErr = <- errChan :
483
487
// Exit early on an error.
484
488
// Only acts upon the first error received.
485
489
break Loop
@@ -488,17 +492,24 @@ Loop:
488
492
inputs [mod ] <- s
489
493
}
490
494
}
495
+
491
496
for i := 0 ; i < nWorkers ; i ++ {
492
497
close (inputs [i ])
493
498
}
494
499
wg .Wait ()
500
+ // If any worker errored out, some input channels might not be empty.
501
+ // Hence drain them.
502
+ for i := 0 ; i < nWorkers ; i ++ {
503
+ for range inputs [i ] {
504
+ }
505
+ }
495
506
496
- if errFromChan != nil {
497
- return errFromChan
507
+ if capturedErr != nil {
508
+ return capturedErr
498
509
}
499
510
select {
500
- case errFromChan = <- errChan :
501
- return errFromChan
511
+ case capturedErr = <- errChan :
512
+ return capturedErr
502
513
default :
503
514
if err := reader .Err (); err != nil {
504
515
return err
@@ -566,6 +577,13 @@ type samplesWithUserID struct {
566
577
// processWAL processes the records in the WAL concurrently.
567
578
func processWAL (name string , startSegment int , userStates * userStates , nWorkers int ,
568
579
stateCache []map [string ]* userState , seriesCache []map [string ]map [uint64 ]* memorySeries ) error {
580
+
581
+ reader , closer , err := newWalReader (name , startSegment )
582
+ if err != nil {
583
+ return err
584
+ }
585
+ defer closer .Close ()
586
+
569
587
var (
570
588
wg sync.WaitGroup
571
589
inputs = make ([]chan * samplesWithUserID , nWorkers )
@@ -589,27 +607,24 @@ func processWAL(name string, startSegment int, userStates *userStates, nWorkers
589
607
}(inputs [i ], outputs [i ], stateCache [i ], seriesCache [i ])
590
608
}
591
609
592
- reader , closer , err := newWalReader (name , startSegment )
593
- if err != nil {
594
- return err
595
- }
596
- defer closer .Close ()
597
-
598
610
var (
599
- errFromChan error
611
+ capturedErr error
600
612
record = & Record {}
601
613
)
602
614
Loop:
603
615
for reader .Next () {
604
616
select {
605
- case errFromChan = <- errChan :
617
+ case capturedErr = <- errChan :
606
618
// Exit early on an error.
607
619
// Only acts upon the first error received.
608
620
break Loop
609
621
default :
610
622
}
611
623
if err := proto .Unmarshal (reader .Record (), record ); err != nil {
612
- return err
624
+ // We don't return here in order to close/drain all the channels and
625
+ // make sure all goroutines exit.
626
+ capturedErr = err
627
+ break Loop
613
628
}
614
629
615
630
if len (record .Labels ) > 0 {
@@ -622,7 +637,10 @@ Loop:
622
637
}
623
638
_ , err := state .createSeriesWithFingerprint (model .Fingerprint (labels .Fingerprint ), labels .Labels , nil , true )
624
639
if err != nil {
625
- return err
640
+ // We don't return here in order to close/drain all the channels and
641
+ // make sure all goroutines exit.
642
+ capturedErr = err
643
+ break Loop
626
644
}
627
645
}
628
646
}
@@ -680,12 +698,12 @@ Loop:
680
698
}
681
699
}
682
700
683
- if errFromChan != nil {
684
- return errFromChan
701
+ if capturedErr != nil {
702
+ return capturedErr
685
703
}
686
704
select {
687
- case errFromChan = <- errChan :
688
- return errFromChan
705
+ case capturedErr = <- errChan :
706
+ return capturedErr
689
707
default :
690
708
if err := reader .Err (); err != nil {
691
709
return err
0 commit comments