@@ -16,13 +16,16 @@ import (
16
16
"github.com/prometheus/client_golang/prometheus"
17
17
"github.com/thanos-io/objstore"
18
18
"github.com/thanos-io/thanos/pkg/block/metadata"
19
+ "golang.org/x/sync/errgroup"
19
20
20
21
"github.com/cortexproject/cortex/pkg/util/runutil"
21
22
)
22
23
23
24
const (
24
- // BlockVisitMarkerFile is the known json filename for representing the most recent compactor visit.
25
- BlockVisitMarkerFile = "visit-mark.json"
25
+ // BlockVisitMarkerFileSuffix is the known suffix of json filename for representing the most recent compactor visit.
26
+ BlockVisitMarkerFileSuffix = "visit-mark.json"
27
+ // BlockVisitMarkerFilePrefix is the known prefix of json filename for representing the most recent compactor visit.
28
+ BlockVisitMarkerFilePrefix = "partition-"
26
29
// VisitMarkerVersion1 is the current supported version of visit-mark file.
27
30
VisitMarkerVersion1 = 1
28
31
)
@@ -34,23 +37,34 @@ var (
34
37
)
35
38
36
39
type BlockVisitMarker struct {
37
- CompactorID string `json:"compactorID"`
40
+ CompactorID string `json:"compactorID"`
41
+ Status VisitStatus `json:"status"`
42
+ PartitionedGroupID uint32 `json:"partitionedGroupID"`
43
+ PartitionID int `json:"partitionID"`
38
44
// VisitTime is a unix timestamp of when the block was visited (mark updated).
39
45
VisitTime int64 `json:"visitTime"`
40
46
// Version of the file.
41
47
Version int `json:"version"`
42
48
}
43
49
44
- func (b * BlockVisitMarker ) isVisited (blockVisitMarkerTimeout time.Duration ) bool {
45
- return time .Now ().Before (time .Unix (b .VisitTime , 0 ).Add (blockVisitMarkerTimeout ))
50
+ func (b * BlockVisitMarker ) isVisited (blockVisitMarkerTimeout time.Duration , partitionID int ) bool {
51
+ return b . isCompleted () || partitionID == b . PartitionID && time .Now ().Before (time .Unix (b .VisitTime , 0 ).Add (blockVisitMarkerTimeout ))
46
52
}
47
53
48
- func (b * BlockVisitMarker ) isVisitedByCompactor (blockVisitMarkerTimeout time.Duration , compactorID string ) bool {
49
- return b .CompactorID == compactorID && time . Now (). Before ( time . Unix ( b . VisitTime , 0 ). Add (blockVisitMarkerTimeout ) )
54
+ func (b * BlockVisitMarker ) isVisitedByCompactor (blockVisitMarkerTimeout time.Duration , partitionID int , compactorID string ) bool {
55
+ return b .CompactorID == compactorID && b . isVisited (blockVisitMarkerTimeout , partitionID )
50
56
}
51
57
52
- func ReadBlockVisitMarker (ctx context.Context , bkt objstore.InstrumentedBucketReader , logger log.Logger , blockID string , blockVisitMarkerReadFailed prometheus.Counter ) (* BlockVisitMarker , error ) {
53
- visitMarkerFile := path .Join (blockID , BlockVisitMarkerFile )
58
+ func (b * BlockVisitMarker ) isCompleted () bool {
59
+ return b .Status == Completed
60
+ }
61
+
62
+ func GetBlockVisitMarkerFile (blockID string , partitionID int ) string {
63
+ return path .Join (blockID , fmt .Sprintf ("%s%d-%s" , BlockVisitMarkerFilePrefix , partitionID , BlockVisitMarkerFileSuffix ))
64
+ }
65
+
66
+ func ReadBlockVisitMarker (ctx context.Context , bkt objstore.InstrumentedBucketReader , logger log.Logger , blockID string , partitionID int , blockVisitMarkerReadFailed prometheus.Counter ) (* BlockVisitMarker , error ) {
67
+ visitMarkerFile := GetBlockVisitMarkerFile (blockID , partitionID )
54
68
visitMarkerFileReader , err := bkt .ReaderWithExpectedErrs (bkt .IsObjNotFoundErr ).Get (ctx , visitMarkerFile )
55
69
if err != nil {
56
70
if bkt .IsObjNotFoundErr (err ) {
@@ -76,15 +90,23 @@ func ReadBlockVisitMarker(ctx context.Context, bkt objstore.InstrumentedBucketRe
76
90
return & blockVisitMarker , nil
77
91
}
78
92
79
- func UpdateBlockVisitMarker (ctx context.Context , bkt objstore.Bucket , blockID string , reader io.Reader , blockVisitMarkerWriteFailed prometheus.Counter ) error {
80
- blockVisitMarkerFilePath := path . Join (blockID , BlockVisitMarkerFile )
93
+ func UpdateBlockVisitMarker (ctx context.Context , bkt objstore.Bucket , blockID string , partitionID int , reader io.Reader , blockVisitMarkerWriteFailed prometheus.Counter ) error {
94
+ blockVisitMarkerFilePath := GetBlockVisitMarkerFile (blockID , partitionID )
81
95
if err := bkt .Upload (ctx , blockVisitMarkerFilePath , reader ); err != nil {
82
96
blockVisitMarkerWriteFailed .Inc ()
83
97
return err
84
98
}
85
99
return nil
86
100
}
87
101
102
+ func generateBlocksInfo (blocks []* metadata.Meta ) string {
103
+ var blockIds []string
104
+ for _ , block := range blocks {
105
+ blockIds = append (blockIds , block .ULID .String ())
106
+ }
107
+ return strings .Join (blockIds , "," )
108
+ }
109
+
88
110
func markBlocksVisited (
89
111
ctx context.Context ,
90
112
bkt objstore.Bucket ,
@@ -98,54 +120,121 @@ func markBlocksVisited(
98
120
blockVisitMarkerWriteFailed .Inc ()
99
121
return
100
122
}
101
- reader := bytes .NewReader (visitMarkerFileContent )
123
+ g , _ := errgroup .WithContext (ctx )
124
+ g .SetLimit (32 )
102
125
for _ , block := range blocks {
103
- select {
104
- // Exit early if possible.
105
- case <- ctx .Done ():
106
- return
107
- default :
108
- }
109
-
110
126
blockID := block .ULID .String ()
111
- if err := UpdateBlockVisitMarker (ctx , bkt , blockID , reader , blockVisitMarkerWriteFailed ); err != nil {
112
- level .Error (logger ).Log ("msg" , "unable to upsert visit marker file content for block" , "blockID" , blockID , "err" , err )
113
- }
114
- reader .Reset (visitMarkerFileContent )
127
+ g .Go (func () error {
128
+ select {
129
+ // Exit early if possible.
130
+ case <- ctx .Done ():
131
+ return nil
132
+ default :
133
+ }
134
+
135
+ reader := bytes .NewReader (visitMarkerFileContent )
136
+ if err := UpdateBlockVisitMarker (ctx , bkt , blockID , marker .PartitionID , reader , blockVisitMarkerWriteFailed ); err != nil {
137
+ level .Error (logger ).Log ("msg" , "unable to upsert visit marker file content for block" , "partition_id" , marker .PartitionID , "block_id" , blockID , "err" , err )
138
+ }
139
+ reader .Reset (visitMarkerFileContent )
140
+ return nil
141
+ })
142
+ }
143
+ if err := g .Wait (); err != nil {
144
+ blockVisitMarkerWriteFailed .Inc ()
145
+ return
115
146
}
147
+ level .Debug (logger ).Log ("msg" , "marked blocks visited" , "partition_id" , marker .PartitionID , "blocks" , generateBlocksInfo (blocks ))
116
148
}
117
149
118
- func markBlocksVisitedHeartBeat (ctx context.Context , bkt objstore.Bucket , logger log.Logger , blocks []* metadata.Meta , compactorID string , blockVisitMarkerFileUpdateInterval time.Duration , blockVisitMarkerWriteFailed prometheus.Counter ) {
119
- var blockIds []string
120
- for _ , block := range blocks {
121
- blockIds = append (blockIds , block .ULID .String ())
122
- }
123
- blocksInfo := strings .Join (blockIds , "," )
124
- level .Info (logger ).Log ("msg" , fmt .Sprintf ("start heart beat for blocks: %s" , blocksInfo ))
150
+ func markBlocksVisitedHeartBeat (
151
+ ctx context.Context ,
152
+ bkt objstore.Bucket ,
153
+ logger log.Logger ,
154
+ blocks []* metadata.Meta ,
155
+ partitionedGroupID uint32 ,
156
+ partitionID int ,
157
+ compactorID string ,
158
+ blockVisitMarkerFileUpdateInterval time.Duration ,
159
+ blockVisitMarkerWriteFailed prometheus.Counter ,
160
+ errChan chan error ,
161
+ ) {
162
+ blocksInfo := generateBlocksInfo (blocks )
163
+ level .Info (logger ).Log ("msg" , "start visit marker heart beat" , "partitioned_group_id" , partitionedGroupID , "partition_id" , partitionID , "blocks" , blocksInfo )
125
164
ticker := time .NewTicker (blockVisitMarkerFileUpdateInterval )
126
165
defer ticker .Stop ()
166
+ isComplete := false
127
167
heartBeat:
128
168
for {
129
169
level .Debug (logger ).Log ("msg" , fmt .Sprintf ("heart beat for blocks: %s" , blocksInfo ))
130
170
blockVisitMarker := BlockVisitMarker {
131
- VisitTime : time .Now ().Unix (),
132
- CompactorID : compactorID ,
133
- Version : VisitMarkerVersion1 ,
171
+ VisitTime : time .Now ().Unix (),
172
+ CompactorID : compactorID ,
173
+ Status : Pending ,
174
+ PartitionedGroupID : partitionedGroupID ,
175
+ PartitionID : partitionID ,
176
+ Version : VisitMarkerVersion1 ,
134
177
}
135
178
markBlocksVisited (ctx , bkt , logger , blocks , blockVisitMarker , blockVisitMarkerWriteFailed )
136
179
137
180
select {
138
181
case <- ctx .Done ():
182
+ level .Warn (logger ).Log ("msg" , "visit marker heart beat got cancelled" , "partitioned_group_id" , partitionedGroupID , "partition_id" , partitionID , "blocks" , blocksInfo )
139
183
break heartBeat
140
184
case <- ticker .C :
141
185
continue
186
+ case err := <- errChan :
187
+ isComplete = err == nil
188
+ if err != nil {
189
+ level .Warn (logger ).Log ("msg" , "stop visit marker heart beat due to error" , "err" , err , "partitioned_group_id" , partitionedGroupID , "partition_id" , partitionID , "blocks" , blocksInfo )
190
+ }
191
+ break heartBeat
142
192
}
143
193
}
144
- level .Info (logger ).Log ("msg" , fmt .Sprintf ("stop heart beat for blocks: %s" , blocksInfo ))
194
+ if isComplete {
195
+ level .Info (logger ).Log ("msg" , "update visit marker to completed status" , "partitioned_group_id" , partitionedGroupID , "partition_id" , partitionID , "blocks" , blocksInfo )
196
+ markBlocksVisitMarkerCompleted (context .Background (), bkt , logger , blocks , partitionedGroupID , partitionID , compactorID , blockVisitMarkerWriteFailed )
197
+ }
198
+ level .Info (logger ).Log ("msg" , "stop visit marker heart beat" , "partitioned_group_id" , partitionedGroupID , "partition_id" , partitionID , "blocks" , blocksInfo )
199
+ }
200
+
201
+ func markBlocksVisitMarkerCompleted (
202
+ ctx context.Context ,
203
+ bkt objstore.Bucket ,
204
+ logger log.Logger ,
205
+ blocks []* metadata.Meta ,
206
+ partitionedGroupID uint32 ,
207
+ partitionID int ,
208
+ compactorID string ,
209
+ blockVisitMarkerWriteFailed prometheus.Counter ,
210
+ ) {
211
+ blockVisitMarker := BlockVisitMarker {
212
+ VisitTime : time .Now ().Unix (),
213
+ CompactorID : compactorID ,
214
+ Status : Completed ,
215
+ PartitionedGroupID : partitionedGroupID ,
216
+ PartitionID : partitionID ,
217
+ Version : VisitMarkerVersion1 ,
218
+ }
219
+ visitMarkerFileContent , err := json .Marshal (blockVisitMarker )
220
+ if err != nil {
221
+ blockVisitMarkerWriteFailed .Inc ()
222
+ return
223
+ }
224
+ reader := bytes .NewReader (visitMarkerFileContent )
225
+ for _ , block := range blocks {
226
+ blockID := block .ULID .String ()
227
+ if err := UpdateBlockVisitMarker (ctx , bkt , blockID , blockVisitMarker .PartitionID , reader , blockVisitMarkerWriteFailed ); err != nil {
228
+ level .Error (logger ).Log ("msg" , "unable to upsert completed visit marker file content for block" , "partitioned_group_id" , blockVisitMarker .PartitionedGroupID , "partition_id" , blockVisitMarker .PartitionID , "block_id" , blockID , "err" , err )
229
+ } else {
230
+ level .Info (logger ).Log ("msg" , "block partition is completed" , "partitioned_group_id" , blockVisitMarker .PartitionedGroupID , "partition_id" , blockVisitMarker .PartitionID , "block_id" , blockID )
231
+ }
232
+ reader .Reset (visitMarkerFileContent )
233
+ }
145
234
}
146
235
147
236
func IsBlockVisitMarker (path string ) bool {
148
- return strings .HasSuffix (path , BlockVisitMarkerFile )
237
+ return strings .HasSuffix (path , BlockVisitMarkerFileSuffix )
149
238
}
150
239
151
240
func IsNotBlockVisitMarkerError (err error ) bool {
0 commit comments