Skip to content

Commit 4d877f9

Browse files
added atomic transaction metrics to vttablet
Signed-off-by: Harshit Gangal <[email protected]>
1 parent 3bb138f commit 4d877f9

File tree

3 files changed

+22
-8
lines changed

3 files changed

+22
-8
lines changed

go/vt/vttablet/tabletserver/dt_executor.go

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,12 @@ func (dte *DTExecutor) CommitPrepared(dtid string) (err error) {
159159
defer func() {
160160
if err != nil {
161161
log.Warningf("failed to commit the prepared transaction '%s' with error: %v", dtid, err)
162-
dte.te.checkErrorAndMarkFailed(ctx, dtid, err, "TwopcCommit")
162+
fail := dte.te.checkErrorAndMarkFailed(ctx, dtid, err, "TwopcCommit")
163+
if fail {
164+
dte.te.env.Stats().CommitPreparedFail.Add("NonRetryable", 1)
165+
} else {
166+
dte.te.env.Stats().CommitPreparedFail.Add("Retryable", 1)
167+
}
163168
}
164169
dte.te.txPool.RollbackAndRelease(ctx, conn)
165170
}()

go/vt/vttablet/tabletserver/tabletenv/stats.go

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,6 @@ type Stats struct {
3434
ErrorCounters *stats.CountersWithSingleLabel
3535
InternalErrors *stats.CountersWithSingleLabel
3636
Warnings *stats.CountersWithSingleLabel
37-
Unresolved *stats.GaugesWithSingleLabel // For now, only Prepares are tracked
3837
UserTableQueryCount *stats.CountersWithMultiLabels // Per CallerID/table counts
3938
UserTableQueryTimesNs *stats.CountersWithMultiLabels // Per CallerID/table latencies
4039
UserTransactionCount *stats.CountersWithMultiLabels // Per CallerID transaction counts
@@ -49,6 +48,11 @@ type Stats struct {
4948
UserReservedTimesNs *stats.CountersWithSingleLabel // Per CallerID reserved connection duration
5049

5150
QueryTimingsByTabletType *servenv.TimingsWrapper // Query timings split by current tablet type
51+
52+
// Atomic Transactions
53+
Unresolved *stats.GaugesWithSingleLabel
54+
CommitPreparedFail *stats.CountersWithSingleLabel
55+
RedoPreparedFail *stats.CountersWithSingleLabel
5256
}
5357

5458
// NewStats instantiates a new set of stats scoped by exporter.
@@ -83,7 +87,6 @@ func NewStats(exporter *servenv.Exporter) *Stats {
8387
),
8488
InternalErrors: exporter.NewCountersWithSingleLabel("InternalErrors", "Internal component errors", "type", "Task", "StrayTransactions", "Panic", "HungQuery", "Schema", "TwopcCommit", "TwopcResurrection", "WatchdogFail", "Messages"),
8589
Warnings: exporter.NewCountersWithSingleLabel("Warnings", "Warnings", "type", "ResultsExceeded"),
86-
Unresolved: exporter.NewGaugesWithSingleLabel("Unresolved", "Unresolved items", "item_type", "Prepares"),
8790
UserTableQueryCount: exporter.NewCountersWithMultiLabels("UserTableQueryCount", "Queries received for each CallerID/table combination", []string{"TableName", "CallerID", "Type"}),
8891
UserTableQueryTimesNs: exporter.NewCountersWithMultiLabels("UserTableQueryTimesNs", "Total latency for each CallerID/table combination", []string{"TableName", "CallerID", "Type"}),
8992
UserTransactionCount: exporter.NewCountersWithMultiLabels("UserTransactionCount", "transactions received for each CallerID", []string{"CallerID", "Conclusion"}),
@@ -98,6 +101,10 @@ func NewStats(exporter *servenv.Exporter) *Stats {
98101
UserReservedTimesNs: exporter.NewCountersWithSingleLabel("UserReservedTimesNs", "Total reserved connection latency for each CallerID", "CallerID"),
99102

100103
QueryTimingsByTabletType: exporter.NewTimings("QueryTimingsByTabletType", "Query timings broken down by active tablet type", "TabletType"),
104+
105+
Unresolved: exporter.NewGaugesWithSingleLabel("UnresolvedTransaction", "Unresolved items", "ManagerType"),
106+
CommitPreparedFail: exporter.NewCountersWithSingleLabel("CommitPreparedFail", "failed prepared transactions commit", "FailureType"),
107+
RedoPreparedFail: exporter.NewCountersWithSingleLabel("RedoPreparedFail", "failed prepared transactions on redo", "FailureType"),
101108
}
102109
stats.QPSRates = exporter.NewRates("QPS", stats.QueryTimings, 15*60/5, 5*time.Second)
103110
return stats

go/vt/vttablet/tabletserver/tx_engine.go

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,6 @@ type TxEngine struct {
8383
// 2. TabletControls have been set in the tablet record, and Query service is going to be disabled.
8484
twopcAllowed []bool
8585
shutdownGracePeriod time.Duration
86-
coordinatorAddress string
8786
abandonAge time.Duration
8887
ticks *timer.Timer
8988

@@ -454,6 +453,9 @@ func (te *TxEngine) prepareFromRedo() error {
454453
allErrs = append(allErrs, vterrors.Wrapf(err, "dtid - %v", preparedTx.Dtid))
455454
if prepFailed {
456455
failedCounter++
456+
te.env.Stats().RedoPreparedFail.Add("NonRetryable", 1)
457+
} else {
458+
te.env.Stats().RedoPreparedFail.Add("Retryable", 1)
457459
}
458460
} else {
459461
preparedCounter++
@@ -580,14 +582,13 @@ func (te *TxEngine) startTransactionWatcher() {
580582
ctx, cancel := context.WithTimeout(tabletenv.LocalContext(), te.abandonAge/4)
581583
defer cancel()
582584

583-
// Raise alerts on prepares that have been unresolved for too long.
584-
// Use 5x abandonAge to give opportunity for transaction coordinator to resolve these redo logs.
585-
count, err := te.twoPC.CountUnresolvedRedo(ctx, time.Now().Add(-te.abandonAge*5))
585+
// Track unresolved redo logs.
586+
count, err := te.twoPC.CountUnresolvedRedo(ctx, time.Now().Add(-te.abandonAge))
586587
if err != nil {
587588
te.env.Stats().InternalErrors.Add("RedoWatcherFail", 1)
588589
log.Errorf("Error reading prepared transactions: %v", err)
589590
}
590-
te.env.Stats().Unresolved.Set("Prepares", count)
591+
te.env.Stats().Unresolved.Set("ResourceManager", count)
591592

592593
// Notify lingering distributed transactions.
593594
count, err = te.twoPC.CountUnresolvedTransaction(ctx, time.Now().Add(-te.abandonAge))
@@ -596,6 +597,7 @@ func (te *TxEngine) startTransactionWatcher() {
596597
log.Errorf("Error reading unresolved transactions: %v", err)
597598
return
598599
}
600+
te.env.Stats().Unresolved.Set("MetadataManager", count)
599601
if count > 0 {
600602
te.dxNotify()
601603
}

0 commit comments

Comments
 (0)