Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
5c82fdb
add SetGlobalGCBarrier API and update AdvanceTxnSafePoint
tiancaiamao May 21, 2025
1cf3791
Merge branch 'master' into global-gc-barrier
tiancaiamao May 21, 2025
c4df2aa
add the grpc api
tiancaiamao May 21, 2025
4e674cd
add unit test
tiancaiamao May 21, 2025
b6caf12
make fmt
tiancaiamao May 21, 2025
2ffe7a4
add integration test
tiancaiamao May 21, 2025
0007cde
make fmt
tiancaiamao May 21, 2025
5a90975
address comment
tiancaiamao May 26, 2025
c8c26e5
update go.mod for latest kvproto
tiancaiamao May 27, 2025
efd6f3d
fix tests
tiancaiamao May 27, 2025
4a52539
Merge branch 'master' into global-gc-barrier
tiancaiamao May 27, 2025
c3a1b95
update pd client to provide global gc barriers API
tiancaiamao May 27, 2025
09c6e98
update GetAllKeyspacesGCStates and add test
tiancaiamao May 27, 2025
085e37b
Merge branch 'master' into update-pdclient
tiancaiamao Aug 5, 2025
880cdbd
end
tiancaiamao Aug 5, 2025
588e7b2
Merge branch 'master' into update-pdclient
tiancaiamao Aug 8, 2025
53b0cd7
checkout go.mod
tiancaiamao Aug 8, 2025
3041687
lint?
tiancaiamao Aug 8, 2025
5febb98
make check
tiancaiamao Aug 8, 2025
574545b
Merge branch 'master' into update-pdclient
tiancaiamao Aug 11, 2025
cff1b51
address comment
tiancaiamao Aug 12, 2025
5684113
address comment
tiancaiamao Aug 12, 2025
b93c24f
address comment
tiancaiamao Aug 19, 2025
10fd788
Merge branch 'master' into update-pdclient
tiancaiamao Aug 19, 2025
faf6d9b
imporve test coverage a bit
tiancaiamao Aug 19, 2025
a06ad66
address comment
tiancaiamao Sep 24, 2025
0ed0fa1
make lint happy
tiancaiamao Sep 24, 2025
11701da
Merge branch 'master' into update-pdclient
tiancaiamao Sep 24, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 77 additions & 0 deletions client/clients/gc/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,43 @@ type GCStatesClient interface {
// When this method is called on a keyspace without keyspace-level GC enabled, it will be equivalent to calling it on
// the NullKeyspace.
GetGCState(ctx context.Context) (GCState, error)
// SetGlobalGCBarrier sets a global GC barrier, which blocks GC like how GC barriers do, but is effective for all
// keyspaces. This API is designed for some special needs to block GC of all keyspaces.
//
// The usage is the similar to SetGCBarrier, but is not affected by the keyspace context of the current GCStatesClient
// instance. Note that normal GC barriers and global GC barriers are separated.
// One can not use SetGCBarrier and DeleteGCBarrier to operate a global GC barrier set by SetGlobalGCBarrier, and vice
// versa.
//
// Once a global GC barrier is set, it will block the txn safe points of all keyspaces from being advanced over the
// barrierTS, until the global GC barrier is expired (defined by ttl) or manually deleted (by calling
// DeleteGlobalGCBarrier).
//
// When this method is called on an existing global GC barrier, it updates the barrierTS and ttl of the existing global
// GC barrier and the expiration time will become the current time plus the ttl.
// This means that calling this method on an existing global GC barrier can extend its lifetime arbitrarily.
//
// Passing non-positive value to ttl is not allowed. Passing `time.Duration(math.MaxInt64)` to ttl indicates that the
// global GC barrier should never expire. The ttl might be rounded up, and the actual ttl is guaranteed no less than the
// specified duration.
//
// The barrierID must be non-empty.
//
// The given barrierTS must be greater than or equal to the current txn safe points of all keyspaces,
// otherwise an error will be returned.
//
// Currently, the caller is responsible for guaranteeing the given barrierTS does not exceed any of the max allocated
// timestamps of all TSOs in the cluster. Note that a cluster might have multiple TSOs for different keyspaces.
//
// When this function executes successfully, its result is never nil.
SetGlobalGCBarrier(ctx context.Context, barrierID string, barrierTS uint64, ttl time.Duration) (*GlobalGCBarrierInfo, error)
// DeleteGlobalGCBarrier deletes a global GC barrier.
DeleteGlobalGCBarrier(ctx context.Context, barrierID string) (*GlobalGCBarrierInfo, error)
// Get the GC states from all keyspaces.
Comment on lines +106 to +108
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Consider also expand these comments.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK, I'm trying my best already

// The return value includes both GC states and global GC barriers information.
// If a keyspace's state is not ENABLED(like DISABLE/ARCHIVED/TOMBSTONE), that keyspace is skipped.
// If a keyspace is not configured with keyspace level GC, its GCState data is missing.
GetAllKeyspacesGCStates(ctx context.Context) (ClusterGCStates, error)
}

// InternalController is the interface for controlling GC execution.
Expand Down Expand Up @@ -141,6 +178,16 @@ type GCBarrierInfo struct {
getReqStartTime time.Time
}

// GlobalGCBarrierInfo represents the information of a global GC barrier.
type GlobalGCBarrierInfo struct {
BarrierID string
BarrierTS uint64
TTL time.Duration
// The time when the RPC that fetches the GC barrier info.
// It will be used as the basis for determining whether the barrier is expired.
getReqStartTime time.Time
}

// TTLNeverExpire is a special value for TTL that indicates the barrier never expires.
const TTLNeverExpire = time.Duration(math.MaxInt64)

Expand Down Expand Up @@ -171,6 +218,28 @@ func (b *GCBarrierInfo) isExpiredImpl(now time.Time) bool {
return now.Sub(b.getReqStartTime) > b.TTL
}

// NewGlobalGCBarrierInfo creates a new GCBarrierInfo instance.
func NewGlobalGCBarrierInfo(barrierID string, barrierTS uint64, ttl time.Duration, getReqStartTime time.Time) *GlobalGCBarrierInfo {
return &GlobalGCBarrierInfo{
BarrierID: barrierID,
BarrierTS: barrierTS,
TTL: ttl,
getReqStartTime: getReqStartTime,
}
}

// IsExpired checks whether the barrier is expired.
func (b *GlobalGCBarrierInfo) IsExpired() bool {
return b.isExpiredImpl(time.Now())
}

func (b *GlobalGCBarrierInfo) isExpiredImpl(now time.Time) bool {
if b.TTL == TTLNeverExpire {
return false
}
return now.Sub(b.getReqStartTime) > b.TTL
}

// GCState represents the information of the GC state.
//
//nolint:revive
Expand All @@ -181,3 +250,11 @@ type GCState struct {
GCSafePoint uint64
GCBarriers []*GCBarrierInfo
}

// ClusterGCStates represents the information of the GC state for all keyspaces.
type ClusterGCStates struct {
// Maps from keyspace id to GC state of that keyspace.
GCStates map[uint32]GCState
// All existing global GC barriers.
GlobalGCBarriers []*GlobalGCBarrierInfo
}
14 changes: 14 additions & 0 deletions client/clients/gc/client_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,20 @@ func TestGCBarrierInfoExpiration(t *testing.T) {
re.True(b.isExpiredImpl(now.Add(time.Hour * 24 * 365 * 10)))

b = NewGCBarrierInfo("b", 1, TTLNeverExpire, now)
re.False(b.IsExpired())
re.False(b.isExpiredImpl(now))
re.False(b.isExpiredImpl(now.Add(time.Hour * 24 * 365 * 10)))

b1 := NewGlobalGCBarrierInfo("b", 1, time.Second, now)
re.False(b1.IsExpired())
re.False(b1.isExpiredImpl(now.Add(-time.Second)))
re.False(b1.isExpiredImpl(now))
re.False(b1.isExpiredImpl(now.Add(time.Millisecond * 999)))
re.False(b1.isExpiredImpl(now.Add(time.Second)))
re.True(b1.isExpiredImpl(now.Add(time.Second + time.Millisecond)))
re.True(b1.isExpiredImpl(now.Add(time.Hour * 24 * 365 * 10)))

b1 = NewGlobalGCBarrierInfo("b", 1, TTLNeverExpire, now)
re.False(b1.isExpiredImpl(now))
re.False(b1.isExpiredImpl(now.Add(time.Hour * 24 * 365 * 10)))
}
128 changes: 120 additions & 8 deletions client/gc_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,19 @@ func pbToGCBarrierInfo(pb *pdpb.GCBarrierInfo, reqStartTime time.Time) *gc.GCBar
)
}

func pbToGlobalGCBarrierInfo(pb *pdpb.GlobalGCBarrierInfo, reqStartTime time.Time) *gc.GlobalGCBarrierInfo {
if pb == nil {
return nil
}
ttl := saturatingStdDurationFromSeconds(pb.GetTtlSeconds())
return gc.NewGlobalGCBarrierInfo(
pb.GetBarrierId(),
pb.GetBarrierTs(),
ttl,
reqStartTime,
)
}

// SetGCBarrier sets (creates or updates) a GC barrier.
func (c gcStatesClient) SetGCBarrier(ctx context.Context, barrierID string, barrierTS uint64, ttl time.Duration) (*gc.GCBarrierInfo, error) {
if span := opentracing.SpanFromContext(ctx); span != nil && span.Tracer() != nil {
Expand Down Expand Up @@ -306,18 +319,117 @@ func (c gcStatesClient) GetGCState(ctx context.Context) (gc.GCState, error) {
}

gcState := resp.GetGcState()
return pbToGCState(gcState, start), nil
}

func pbToGCState(pb *pdpb.GCState, reqStartTime time.Time) gc.GCState {
keyspaceID := constants.NullKeyspaceID
if gcState.KeyspaceScope != nil {
keyspaceID = gcState.KeyspaceScope.KeyspaceId
if pb.KeyspaceScope != nil {
keyspaceID = pb.KeyspaceScope.KeyspaceId
}
gcBarriers := make([]*gc.GCBarrierInfo, 0, len(gcState.GetGcBarriers()))
for _, b := range gcState.GetGcBarriers() {
gcBarriers = append(gcBarriers, pbToGCBarrierInfo(b, start))
gcBarriers := make([]*gc.GCBarrierInfo, 0, len(pb.GetGcBarriers()))
for _, b := range pb.GetGcBarriers() {
gcBarriers = append(gcBarriers, pbToGCBarrierInfo(b, reqStartTime))
}
return gc.GCState{
KeyspaceID: keyspaceID,
TxnSafePoint: gcState.GetTxnSafePoint(),
GCSafePoint: gcState.GetGcSafePoint(),
TxnSafePoint: pb.GetTxnSafePoint(),
GCSafePoint: pb.GetGcSafePoint(),
GCBarriers: gcBarriers,
}, nil
}
}

// SetGlobalGCBarrier sets (creates or updates) a global GC barrier.
func (c gcStatesClient) SetGlobalGCBarrier(ctx context.Context, barrierID string, barrierTS uint64, ttl time.Duration) (*gc.GlobalGCBarrierInfo, error) {
if span := opentracing.SpanFromContext(ctx); span != nil && span.Tracer() != nil {
span = span.Tracer().StartSpan("pdclient.SetGlobalGCBarrier", opentracing.ChildOf(span.Context()))
defer span.Finish()
}
start := time.Now()
defer func() { metrics.CmdDurationSetGlobalGCBarrier.Observe(time.Since(start).Seconds()) }()

ctx, cancel := context.WithTimeout(ctx, c.client.inner.option.Timeout)
defer cancel()
req := &pdpb.SetGlobalGCBarrierRequest{
Header: c.client.requestHeader(),
BarrierId: barrierID,
BarrierTs: barrierTS,
TtlSeconds: roundUpDurationToSeconds(ttl),
}
protoClient, ctx := c.client.getClientAndContext(ctx)
if protoClient == nil {
return nil, errs.ErrClientGetProtoClient
}
resp, err := protoClient.SetGlobalGCBarrier(ctx, req)
if err = c.client.respForErr(metrics.CmdFailedDurationSetGlobalGCBarrier, start, err, resp.GetHeader()); err != nil {
return nil, err
}
return pbToGlobalGCBarrierInfo(resp.GetNewBarrierInfo(), start), nil
}

// DeleteGlobalGCBarrier deletes a GC barrier.
func (c gcStatesClient) DeleteGlobalGCBarrier(ctx context.Context, barrierID string) (*gc.GlobalGCBarrierInfo, error) {
if span := opentracing.SpanFromContext(ctx); span != nil && span.Tracer() != nil {
span = span.Tracer().StartSpan("pdclient.DeleteGlobalGCBarrier", opentracing.ChildOf(span.Context()))
defer span.Finish()
}
start := time.Now()
defer func() { metrics.CmdDurationDeleteGlobalGCBarrier.Observe(time.Since(start).Seconds()) }()

ctx, cancel := context.WithTimeout(ctx, c.client.inner.option.Timeout)
defer cancel()
req := &pdpb.DeleteGlobalGCBarrierRequest{
Header: c.client.requestHeader(),
BarrierId: barrierID,
}
protoClient, ctx := c.client.getClientAndContext(ctx)
if protoClient == nil {
return nil, errs.ErrClientGetProtoClient
}
resp, err := protoClient.DeleteGlobalGCBarrier(ctx, req)
if err = c.client.respForErr(metrics.CmdFailedDurationDeleteGlobalGCBarrier, start, err, resp.GetHeader()); err != nil {
return nil, err
}
return pbToGlobalGCBarrierInfo(resp.GetDeletedBarrierInfo(), start), nil
}

// GetAllKeyspacesGCStates gets the GC states from all keyspaces.
func (c gcStatesClient) GetAllKeyspacesGCStates(ctx context.Context) (gc.ClusterGCStates, error) {
if span := opentracing.SpanFromContext(ctx); span != nil && span.Tracer() != nil {
span = span.Tracer().StartSpan("pdclient.GetAllKeyspacesGCState", opentracing.ChildOf(span.Context()))
defer span.Finish()
}
start := time.Now()
defer func() { metrics.CmdDurationGetAllKeyspacesGCStates.Observe(time.Since(start).Seconds()) }()

ctx, cancel := context.WithTimeout(ctx, c.client.inner.option.Timeout)
defer cancel()
req := &pdpb.GetAllKeyspacesGCStatesRequest{
Header: c.client.requestHeader(),
}
protoClient, ctx := c.client.getClientAndContext(ctx)
if protoClient == nil {
return gc.ClusterGCStates{}, errs.ErrClientGetProtoClient
}

resp, err := protoClient.GetAllKeyspacesGCStates(ctx, req)
if err = c.client.respForErr(metrics.CmdFailedDurationGetAllKeyspacesGCStates, start, err, resp.GetHeader()); err != nil {
return gc.ClusterGCStates{}, err
}

var ret gc.ClusterGCStates
ret.GCStates = make(map[uint32]gc.GCState, len(resp.GetGcStates()))
for _, state := range resp.GetGcStates() {
var keyspaceID uint32
if state.KeyspaceScope == nil {
keyspaceID = constants.NullKeyspaceID
} else {
keyspaceID = state.KeyspaceScope.KeyspaceId
}
ret.GCStates[keyspaceID] = pbToGCState(state, start)
}
for _, barrier := range resp.GetGlobalGcBarriers() {
ret.GlobalGCBarriers = append(ret.GlobalGCBarriers, pbToGlobalGCBarrierInfo(barrier, start))
}
return ret, nil
}
12 changes: 12 additions & 0 deletions client/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,9 @@ var (
CmdDurationSetGCBarrier prometheus.Observer
CmdDurationDeleteGCBarrier prometheus.Observer
CmdDurationGetGCState prometheus.Observer
CmdDurationSetGlobalGCBarrier prometheus.Observer
CmdDurationDeleteGlobalGCBarrier prometheus.Observer
CmdDurationGetAllKeyspacesGCStates prometheus.Observer

CmdFailedDurationGetRegion prometheus.Observer
CmdFailedDurationTSOWait prometheus.Observer
Expand Down Expand Up @@ -316,6 +319,9 @@ var (
CmdFailedDurationSetGCBarrier prometheus.Observer
CmdFailedDurationDeleteGCBarrier prometheus.Observer
CmdFailedDurationGetGCState prometheus.Observer
CmdFailedDurationSetGlobalGCBarrier prometheus.Observer
CmdFailedDurationDeleteGlobalGCBarrier prometheus.Observer
CmdFailedDurationGetAllKeyspacesGCStates prometheus.Observer

InternalCmdDurationGetClusterInfo prometheus.Observer
InternalCmdDurationGetMembers prometheus.Observer
Expand Down Expand Up @@ -373,6 +379,9 @@ func initLabelValues() {
CmdDurationSetGCBarrier = cmdDuration.WithLabelValues("set_gc_barrier")
CmdDurationDeleteGCBarrier = cmdDuration.WithLabelValues("delete_gc_barrier")
CmdDurationGetGCState = cmdDuration.WithLabelValues("get_gc_state")
CmdDurationSetGlobalGCBarrier = cmdDuration.WithLabelValues("set_global_gc_barrier")
CmdDurationDeleteGlobalGCBarrier = cmdDuration.WithLabelValues("delete_global_gc_barrier")
CmdDurationGetAllKeyspacesGCStates = cmdDuration.WithLabelValues("get_all_keyspaces_gc_states")

CmdFailedDurationGetRegion = cmdFailedDuration.WithLabelValues("get_region")
CmdFailedDurationTSOWait = cmdFailedDuration.WithLabelValues("wait")
Expand Down Expand Up @@ -400,6 +409,9 @@ func initLabelValues() {
CmdFailedDurationSetGCBarrier = cmdFailedDuration.WithLabelValues("set_gc_barrier")
CmdFailedDurationDeleteGCBarrier = cmdFailedDuration.WithLabelValues("delete_gc_barrier")
CmdFailedDurationGetGCState = cmdFailedDuration.WithLabelValues("get_gc_state")
CmdFailedDurationSetGlobalGCBarrier = cmdFailedDuration.WithLabelValues("set_global_gc_barrier")
CmdFailedDurationDeleteGlobalGCBarrier = cmdFailedDuration.WithLabelValues("delete_global_gc_barrier")
CmdFailedDurationGetAllKeyspacesGCStates = cmdDuration.WithLabelValues("get_all_keyspaces_gc_states")

InternalCmdDurationGetClusterInfo = internalCmdDuration.WithLabelValues("get_cluster_info")
InternalCmdDurationGetMembers = internalCmdDuration.WithLabelValues("get_members")
Expand Down
8 changes: 8 additions & 0 deletions pkg/gc/gc_state_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -726,6 +726,14 @@ func (m *GCStateManager) getAllKeyspacesGCStatesImpl() (map[uint32]GCState, erro
return results, nil
}

// LoadAllGlobalGCBarriers returns global GC barriers.
func (m *GCStateManager) LoadAllGlobalGCBarriers() ([]*endpoint.GlobalGCBarrier, error) {
m.mu.Lock()
defer m.mu.Unlock()

return m.gcMetaStorage.LoadAllGlobalGCBarriers()
}

// CompatibleUpdateServiceGCSafePoint updates the service safe point of the given serviceID. Service safe points are
// being deprecated, and this method provides compatibility for components that are still using service safe point API.
// This method simulates the behavior of the service safe points in old versions, by internally using txn safe points
Expand Down
17 changes: 14 additions & 3 deletions server/gc_service.go
Original file line number Diff line number Diff line change
Expand Up @@ -737,16 +737,27 @@ func (s *GrpcServer) GetAllKeyspacesGCStates(ctx context.Context, request *pdpb.
Header: wrapErrorToHeader(pdpb.ErrorType_UNKNOWN, err.Error()),
}, nil
}

now := time.Now()
gcStatesPb := make([]*pdpb.GCState, 0, len(gcStates))
for _, gcState := range gcStates {
gcStatesPb = append(gcStatesPb, gcStateToProto(gcState, now))
}

globalBarriers, err := s.gcStateManager.LoadAllGlobalGCBarriers()
if err != nil {
return &pdpb.GetAllKeyspacesGCStatesResponse{
Header: wrapErrorToHeader(pdpb.ErrorType_UNKNOWN, err.Error()),
}, nil
}
gcBarriersPb := make([]*pdpb.GlobalGCBarrierInfo, 0, len(globalBarriers))
for _, barrier := range globalBarriers {
gcBarriersPb = append(gcBarriersPb, globalGCBarrierToProto(barrier, now))
}

return &pdpb.GetAllKeyspacesGCStatesResponse{
Header: wrapHeader(),
GcStates: gcStatesPb,
Header: wrapHeader(),
GcStates: gcStatesPb,
GlobalGcBarriers: gcBarriersPb,
}, nil
}

Expand Down
Loading