-
Notifications
You must be signed in to change notification settings - Fork 753
tso:update tso timestamp when met logic exhaust #10137
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from 1 commit
4b29385
a262d63
5151b13
e06517a
f50d8ec
9b6c9b1
4e64939
af09667
4c76cfc
d620607
80b5c16
9a17895
5a2794a
342a538
220024f
26a801f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -49,6 +49,7 @@ const ( | |||||
| // and trigger unnecessary warnings about clock offset. | ||||||
| // It's an empirical value. | ||||||
| jetLagWarningThreshold = 150 * time.Millisecond | ||||||
| waitUpdateTSOInterval = 50 * time.Millisecond | ||||||
| ) | ||||||
|
|
||||||
| // tsoObject is used to store the current TSO in memory with a RWMutex lock. | ||||||
|
|
@@ -90,13 +91,18 @@ func (t *timestampOracle) saveTimestamp(ts time.Time) error { | |||||
| return t.storage.SaveTimestamp(ctx, t.keyspaceGroupID, ts, t.member.GetLeadership()) | ||||||
| } | ||||||
|
|
||||||
| func (t *timestampOracle) setTSOPhysical(next time.Time, force bool) { | ||||||
| // setTSOPhysical sets the TSO's physical part with the given time. | ||||||
| func (t *timestampOracle) setTSOPhysical(pre time.Time, next time.Time, force bool) { | ||||||
| t.tsoMux.Lock() | ||||||
| defer t.tsoMux.Unlock() | ||||||
| // Do not update the zero physical time if the `force` flag is false. | ||||||
| if t.tsoMux.physical.Equal(typeutil.ZeroTime) && !force { | ||||||
| return | ||||||
| } | ||||||
| // If current physical time is bigger than the previous time, it indicates that the physical time has been updated. | ||||||
| if t.tsoMux.physical.After(pre) { | ||||||
| return | ||||||
| } | ||||||
| // make sure the ts won't fall back | ||||||
| if typeutil.SubTSOPhysicalByWallClock(next, t.tsoMux.physical) > 0 { | ||||||
|
||||||
| t.tsoMux.physical = next | ||||||
|
|
@@ -114,17 +120,18 @@ func (t *timestampOracle) getTSO() (time.Time, int64) { | |||||
| } | ||||||
|
|
||||||
| // generateTSO will add the TSO's logical part with the given count and returns the new TSO result. | ||||||
| func (t *timestampOracle) generateTSO(ctx context.Context, count int64) (physical int64, logical int64) { | ||||||
| func (t *timestampOracle) generateTSO(ctx context.Context, count int64) (physical int64, logical int64, current time.Time) { | ||||||
| defer trace.StartRegion(ctx, "timestampOracle.generateTSO").End() | ||||||
| t.tsoMux.Lock() | ||||||
| defer t.tsoMux.Unlock() | ||||||
| if t.tsoMux.physical.Equal(typeutil.ZeroTime) { | ||||||
| return 0, 0 | ||||||
| return 0, 0, typeutil.ZeroTime | ||||||
| } | ||||||
| physical = t.tsoMux.physical.UnixNano() / int64(time.Millisecond) | ||||||
| t.tsoMux.logical += count | ||||||
| logical = t.tsoMux.logical | ||||||
| return physical, logical | ||||||
| current = t.tsoMux.physical | ||||||
| return physical, logical, current | ||||||
| } | ||||||
|
|
||||||
| func (t *timestampOracle) getLastSavedTime() time.Time { | ||||||
|
|
@@ -201,7 +208,7 @@ func (t *timestampOracle) syncTimestamp() error { | |||||
| zap.Time("last", last), zap.Time("last-saved", lastSavedTime), | ||||||
| zap.Time("save", save), zap.Time("next", next)) | ||||||
| // save into memory | ||||||
| t.setTSOPhysical(next, true) | ||||||
| t.setTSOPhysical(t.tsoMux.physical, next, true) | ||||||
coderabbitai[bot] marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||||||
| return nil | ||||||
| } | ||||||
|
|
||||||
|
|
@@ -274,6 +281,8 @@ func (t *timestampOracle) resetUserTimestamp(tso uint64, ignoreSmaller, skipUppe | |||||
| } | ||||||
|
|
||||||
| // updateTimestamp is used to update the timestamp. | ||||||
| // pre is the previous physical time before update. | ||||||
| // allowSaveStorage indicates whether to allow save storage, only allocatorUpdater is true. | ||||||
| // This function will do two things: | ||||||
| // 1. When the logical time is going to be used up, increase the current physical time. | ||||||
| // 2. When the time window is not big enough, which means the saved etcd time minus the next physical time | ||||||
|
|
@@ -287,7 +296,7 @@ func (t *timestampOracle) resetUserTimestamp(tso uint64, ignoreSmaller, skipUppe | |||||
| // | ||||||
| // NOTICE: this function should be called after the TSO in memory has been initialized | ||||||
| // and should not be called when the TSO in memory has been reset anymore. | ||||||
| func (t *timestampOracle) updateTimestamp() error { | ||||||
| func (t *timestampOracle) updateTimestamp(pre time.Time, allowSaveStorage bool) error { | ||||||
|
||||||
| if !t.isInitialized() { | ||||||
| return errs.ErrUpdateTimestamp.FastGenByArgs("timestamp in memory has not been initialized") | ||||||
| } | ||||||
|
|
@@ -340,6 +349,11 @@ func (t *timestampOracle) updateTimestamp() error { | |||||
| // It is not safe to increase the physical time to `next`. | ||||||
| // The time window needs to be updated and saved to etcd. | ||||||
| if typeutil.SubRealTimeByWallClock(t.getLastSavedTime(), next) <= updateTimestampGuard { | ||||||
| // if need to save into etcd, | ||||||
| if !allowSaveStorage { | ||||||
| t.metrics.notAllowedSaveTimestampEvent.Inc() | ||||||
| return errs.ErrUpdateTimestamp.FastGenByArgs("update timestamp needs to save storage but it is not allowed") | ||||||
| } | ||||||
| save := next.Add(t.saveInterval) | ||||||
| start := time.Now() | ||||||
| if err := t.saveTimestamp(save); err != nil { | ||||||
|
|
@@ -353,8 +367,7 @@ func (t *timestampOracle) updateTimestamp() error { | |||||
| t.metrics.updateSaveDuration.Observe(time.Since(start).Seconds()) | ||||||
| } | ||||||
| // save into memory | ||||||
| t.setTSOPhysical(next, false) | ||||||
|
|
||||||
| t.setTSOPhysical(pre, next, false) | ||||||
| return nil | ||||||
| } | ||||||
|
|
||||||
|
|
@@ -363,6 +376,7 @@ var maxRetryCount = 10 | |||||
| func (t *timestampOracle) getTS(ctx context.Context, count uint32) (pdpb.Timestamp, error) { | ||||||
| defer trace.StartRegion(ctx, "timestampOracle.getTS").End() | ||||||
| var resp pdpb.Timestamp | ||||||
| var physical time.Time | ||||||
| if count == 0 { | ||||||
| return resp, errs.ErrGenerateTimestamp.FastGenByArgs("tso count should be positive") | ||||||
| } | ||||||
|
|
@@ -378,7 +392,7 @@ func (t *timestampOracle) getTS(ctx context.Context, count uint32) (pdpb.Timesta | |||||
| return pdpb.Timestamp{}, errs.ErrGenerateTimestamp.FastGenByArgs("timestamp in memory isn't initialized") | ||||||
| } | ||||||
| // Get a new TSO result with the given count | ||||||
| resp.Physical, resp.Logical = t.generateTSO(ctx, int64(count)) | ||||||
| resp.Physical, resp.Logical, physical = t.generateTSO(ctx, int64(count)) | ||||||
|
||||||
| resp.Physical, resp.Logical, physical = t.generateTSO(ctx, int64(count)) | |
| resp.Physical, resp.Logical, current = t.generateTSO(ctx, int64(count)) |
rleungx marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
log.Info should be log.Warn for a failed timestamp update.
This logs an error from updateTimestamp at Info level, but it represents an operational failure that warrants operator attention. Other failure paths in this file (e.g., line 390) consistently use log.Warn.
Proposed fix
if overflowed, err := t.updateTimestamp(overflowUpdate); err != nil {
- log.Info("update timestamp failed", logutil.CondUint32("keyspace-group-id", t.keyspaceGroupID, t.keyspaceGroupID > 0), zap.Error(err))
+ log.Warn("update timestamp failed", logutil.CondUint32("keyspace-group-id", t.keyspaceGroupID, t.keyspaceGroupID > 0), zap.Error(err))
time.Sleep(t.updatePhysicalInterval)🤖 Prompt for AI Agents
In `@pkg/tso/tso.go` around lines 441 - 446, The log call inside the error branch
of the updateTimestamp handling should be elevated from log.Info to log.Warn: in
the block that checks the result of t.updateTimestamp(overflowUpdate), replace
log.Info(...) with log.Warn(...), keeping the same structured fields (use
logutil.CondUint32("keyspace-group-id", t.keyspaceGroupID, t.keyspaceGroupID >
0) and zap.Error(err)) and preserve the existing sleep behavior using
t.updatePhysicalInterval; this aligns the error path for
updateTimestamp/overflowUpdate with other warnings in this file.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,125 @@ | ||
| // Copyright 2026 TiKV Project Authors. | ||
| // | ||
| // Licensed under the Apache License, Version 2.0 (the "License"); | ||
| // you may not use this file except in compliance with the License. | ||
| // You may obtain a copy of the License at | ||
| // | ||
| // http://www.apache.org/licenses/LICENSE-2.0 | ||
| // | ||
| // Unless required by applicable law or agreed to in writing, software | ||
| // distributed under the License is distributed on an "AS IS" BASIS, | ||
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| // See the License for the specific language governing permissions and | ||
| // limitations under the License. | ||
|
|
||
| package tso | ||
|
|
||
| import ( | ||
| "context" | ||
| "sync" | ||
| "sync/atomic" | ||
| "testing" | ||
| "time" | ||
|
|
||
| "github.com/stretchr/testify/require" | ||
| clientv3 "go.etcd.io/etcd/client/v3" | ||
|
|
||
| "github.com/tikv/pd/pkg/election" | ||
| ) | ||
|
|
||
| type MokElection struct{} | ||
coderabbitai[bot] marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
|
||
| func (*MokElection) ID() uint64 { return 0 } | ||
| func (*MokElection) Name() string { return "" } | ||
| func (*MokElection) MemberValue() string { return "" } | ||
| func (*MokElection) Client() *clientv3.Client { return nil } | ||
| func (*MokElection) IsServing() bool { return true } | ||
| func (*MokElection) PromoteSelf() {} | ||
| func (*MokElection) Campaign(_ context.Context, _ int64) error { | ||
| return nil | ||
| } | ||
| func (*MokElection) Resign() {} | ||
| func (*MokElection) GetServingUrls() []string { return nil } | ||
| func (*MokElection) GetElectionPath() string { return "" } | ||
| func (*MokElection) GetLeadership() *election.Leadership { return nil } | ||
|
|
||
| func TestGenerateTSO(t *testing.T) { | ||
| re := require.New(t) | ||
| ctx, cancel := context.WithCancel(context.Background()) | ||
| defer cancel() | ||
| current := time.Now() | ||
| timestampOracle := ×tampOracle{ | ||
| tsoMux: &tsoObject{ | ||
| physical: current, | ||
| logical: maxLogical - 1, | ||
| }, | ||
| saveInterval: 50 * time.Millisecond, | ||
| updatePhysicalInterval: 5 * time.Second, | ||
| maxResetTSGap: func() time.Duration { return time.Hour }, | ||
| metrics: newTSOMetrics("test"), | ||
| member: &MokElection{}, | ||
| } | ||
|
|
||
| // update physical time interval failed due to reach the lastSavedTime, it needs to save storage first, but this behavior is not allowed. | ||
| _, err := timestampOracle.getTS(ctx, 2) | ||
| re.Error(err) | ||
| re.Equal(current, timestampOracle.tsoMux.physical) | ||
|
|
||
| // simulate the save to storage operation is done. | ||
| timestampOracle.lastSavedTime.Store(current.Add(5 * time.Second)) | ||
| _, err = timestampOracle.getTS(ctx, 2) | ||
| re.NoError(err) | ||
| re.NotEqual(current, timestampOracle.tsoMux.physical) | ||
| } | ||
|
|
||
| func TestCurrentGetTSO(t *testing.T) { | ||
| re := require.New(t) | ||
| ctx, cancel := context.WithCancel(context.Background()) | ||
| defer cancel() | ||
| current := time.Now() | ||
| timestampOracle := ×tampOracle{ | ||
| tsoMux: &tsoObject{ | ||
| physical: current, | ||
| logical: maxLogical - 1, | ||
| }, | ||
| saveInterval: 50 * time.Millisecond, | ||
| updatePhysicalInterval: 5 * time.Second, | ||
| maxResetTSGap: func() time.Duration { return time.Hour }, | ||
| metrics: newTSOMetrics("test"), | ||
| member: &MokElection{}, | ||
| } | ||
|
|
||
| runDuration := 5 * time.Second | ||
| timestampOracle.lastSavedTime.Store(current.Add(runDuration)) | ||
| runCtx, runCancel := context.WithTimeout(ctx, runDuration-time.Second) | ||
| defer runCancel() | ||
| wg := sync.WaitGroup{} | ||
| wg.Add(100) | ||
| changes := atomic.Int32{} | ||
| totalTso := atomic.Int32{} | ||
| for i := range 100 { | ||
| go func(i int) { | ||
| physical := timestampOracle.tsoMux.physical | ||
| defer wg.Done() | ||
| for { | ||
| select { | ||
| case <-runCtx.Done(): | ||
| return | ||
| default: | ||
| _, err := timestampOracle.getTS(runCtx, 1) | ||
| totalTso.Add(1) | ||
| re.NoError(err) | ||
| if i == 0 { | ||
|
||
| if physical != timestampOracle.tsoMux.physical { | ||
| changes.Add(1) | ||
| physical = timestampOracle.tsoMux.physical | ||
| } | ||
| } | ||
| } | ||
| } | ||
| }(i) | ||
coderabbitai[bot] marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| } | ||
|
|
||
| wg.Wait() | ||
| re.Equal(totalTso.Load()/int32(maxLogical)+1, changes.Load()) | ||
coderabbitai[bot] marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| } | ||
Uh oh!
There was an error while loading. Please reload this page.