Skip to content

Add additional trigger debug logging #1103

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jul 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions src/Telemetry/Telemetry.cs
Original file line number Diff line number Diff line change
Expand Up @@ -394,7 +394,6 @@ public enum TelemetryMeasureName
GetColumnDefinitionsDurationMs,
GetPrimaryKeysDurationMs,
GetUnprocessedChangesDurationMs,
GetLockedRowCountDurationMs,
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The duration wasn't being calculated correctly so I just removed to keep things simpler. We haven't really used these durations for anything useful so far anyways - we can always add it back in if needed.

InsertGlobalStateTableRowDurationMs,
MaxBatchSize,
MaxChangesPerWorker,
Expand Down Expand Up @@ -441,7 +440,7 @@ public enum TelemetryErrorName
Upsert,
UpsertRollback,
GetServerTelemetryProperties,
GetLeaseLockedRowCount,
GetLeaseLockedOrMaxAttemptRowCount,
}

internal class ServerProperties
Expand Down
92 changes: 63 additions & 29 deletions src/TriggerBinding/SqlTableChangeMonitor.cs
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,12 @@ private async Task GetTableChangesAsync(SqlConnection connection, CancellationTo
using (SqlCommand updateTablesPreInvocationCommand = this.BuildUpdateTablesPreInvocation(connection, transaction))
{
var commandSw = Stopwatch.StartNew();
await updateTablesPreInvocationCommand.ExecuteNonQueryAsyncWithLogging(this._logger, token, true);
object result = await updateTablesPreInvocationCommand.ExecuteScalarAsyncWithLogging(this._logger, token, true);
if (result != null)
{
// If we updated the LastSyncVersion we'll get a message back from the query, so log it here
this._logger.LogDebug($"[PreInvocation] {result}");
}
setLastSyncVersionDurationMs = commandSw.ElapsedMilliseconds;
}

Expand All @@ -312,13 +317,13 @@ private async Task GetTableChangesAsync(SqlConnection connection, CancellationTo
getChangesDurationMs = commandSw.ElapsedMilliseconds;
}
// Also get the number of rows that currently have lease locks on them
// or are skipped because they have reached their max attempt count.
// This can help with supportability by allowing a customer to see when a
// trigger was processed successfully but returned fewer rows than expected
// because of the rows being locked.
int leaseLockedRowCount = await this.GetLeaseLockedRowCount(connection, transaction);
if (rows.Count > 0 || leaseLockedRowCount > 0)
// trigger was processed successfully but returned fewer rows than expected.
string leaseLockedOrMaxAttemptRowCountMessage = await this.GetLeaseLockedOrMaxAttemptRowCountMessage(connection, transaction, token);
if (rows.Count > 0 || leaseLockedOrMaxAttemptRowCountMessage != null)
{
this._logger.LogDebug($"Executed GetChangesCommand in GetTableChangesAsync. {rows.Count} available changed rows ({leaseLockedRowCount} found with lease locks).");
this._logger.LogDebug($"Executed GetChangesCommand in GetTableChangesAsync. {rows.Count} available changed rows. {leaseLockedOrMaxAttemptRowCountMessage}");
}
// If changes were found, acquire leases on them.
if (rows.Count > 0)
Expand Down Expand Up @@ -530,6 +535,7 @@ private async Task RenewLeasesAsync(SqlConnection connection, CancellationToken

if (rowsAffected > 0)
{
this._logger.LogDebug($"Renewed leases for {rowsAffected} rows");
// Only send an event if we actually updated rows to reduce the overall number of events we send
var measures = new Dictionary<TelemetryMeasureName, double>
{
Expand Down Expand Up @@ -598,6 +604,10 @@ private async Task ClearRowsAsync(CancellationToken token)
await this._rowsToProcessLock.WaitAsync(token);
this._leaseRenewalCount = 0;
this._state = State.CheckingForChanges;
if (this._rowsToProcess.Count > 0)
{
this._logger.LogDebug($"Clearing internal state for {this._rowsToProcess.Count} rows");
}
this._rowsToProcess = new List<IReadOnlyDictionary<string, object>>();
this._rowsToProcessLock.Release();
}
Expand Down Expand Up @@ -635,7 +645,12 @@ private async Task ReleaseLeasesAsync(SqlConnection connection, CancellationToke
using (SqlCommand updateTablesPostInvocationCommand = this.BuildUpdateTablesPostInvocation(connection, transaction, newLastSyncVersion))
{
var commandSw = Stopwatch.StartNew();
await updateTablesPostInvocationCommand.ExecuteNonQueryAsyncWithLogging(this._logger, token);
object result = await updateTablesPostInvocationCommand.ExecuteScalarAsyncWithLogging(this._logger, token);
if (result != null)
{
// If we updated the LastSyncVersion we'll get a message back from the query, so log it here
this._logger.LogDebug($"[PostInvocation] {result}");
}
updateLastSyncVersionDurationMs = commandSw.ElapsedMilliseconds;
}
transaction.Commit();
Expand Down Expand Up @@ -782,9 +797,12 @@ private SqlCommand BuildUpdateTablesPreInvocation(SqlConnection connection, SqlT
WHERE UserFunctionID = '{this._userFunctionId}' AND UserTableID = {this._userTableId};

IF @last_sync_version < @min_valid_version
BEGIN
UPDATE {GlobalStateTableName}
SET LastSyncVersion = @min_valid_version
WHERE UserFunctionID = '{this._userFunctionId}' AND UserTableID = {this._userTableId};
SELECT 'Updated LastSyncVersion from ' + CAST(@last_sync_version AS NVARCHAR) + ' to ' + CAST(@min_valid_version AS NVARCHAR);
END
";

return new SqlCommand(updateTablesPreInvocationQuery, connection, transaction);
Expand Down Expand Up @@ -842,44 +860,53 @@ LEFT OUTER JOIN {this._userTable.BracketQuotedFullName} AS u ON {userTableJoinCo
}

/// <summary>
/// Returns the number of changes(rows) on the user's table that are actively locked by other leases OR returns -1 on exception.
/// Returns a message indicating the number of changes(rows) on the user's table that are actively locked by other leases or have
/// reached the max attempts allowed for the row.
/// </summary>
/// <param name="connection">The connection to add to the SqlCommand</param>
/// <param name="transaction">The transaction to add to the SqlCommand</param>
/// <returns>The number of rows locked by leases or -1 on exception</returns>
private async Task<int> GetLeaseLockedRowCount(SqlConnection connection, SqlTransaction transaction)
/// <param name="token">Cancellation token</param>
/// <returns>The message with the number of rows that were lease locked or were at the max attempt limit. Null if no such rows exist</returns>
private async Task<string> GetLeaseLockedOrMaxAttemptRowCountMessage(SqlConnection connection, SqlTransaction transaction, CancellationToken token)
{
string leasesTableJoinCondition = string.Join(" AND ", this._primaryKeyColumns.Select(col => $"c.{col.name.AsBracketQuotedString()} = l.{col.name.AsBracketQuotedString()}"));
int leaseLockedRowsCount = 0;
long getLockedRowCountDurationMs = 0L;
// Get the count of changes from CHANGETABLE that meet the following criteria:
// * Not Null LeaseExpirationTime AND
// * LeaseExpirationTime > Current Time
string getLeaseLockedrowCountQuery = $@"
// Lease locked:
// * Have attempts remaining (Attempt count < Max attempts)
// * NOT NULL LeaseExpirationTime
// * LeaseExpirationTime > Current Time
// Max Attempts reached:
// * NULL LeaseExpirationTime OR LeaseExpirationTime <= Current Time
// * No attempts remaining (Attempt count = Max attempts)
string getLeaseLockedOrMaxAttemptRowCountQuery = $@"
{AppLockStatements}

DECLARE @last_sync_version bigint;
SELECT @last_sync_version = LastSyncVersion
FROM {GlobalStateTableName}
WHERE UserFunctionID = '{this._userFunctionId}' AND UserTableID = {this._userTableId};

SELECT COUNT(*)
DECLARE @lease_locked_count int;
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Refactored this to just create the message in the query itself. I could have returned a table with the numeric values, but having to read the data table and do all that extra stuff didn't seem worth it when we can just do it directly in the T-SQL.

DECLARE @max_attempts_count int;
SELECT
@lease_locked_count = COUNT(CASE WHEN l.{LeasesTableAttemptCountColumnName} < {MaxChangeProcessAttemptCount} AND l.{LeasesTableLeaseExpirationTimeColumnName} IS NOT NULL AND l.{LeasesTableLeaseExpirationTimeColumnName} > SYSDATETIME() THEN 1 ELSE NULL END),
@max_attempts_count = COUNT(CASE WHEN (l.{LeasesTableLeaseExpirationTimeColumnName} IS NULL OR l.{LeasesTableLeaseExpirationTimeColumnName} <= SYSDATETIME()) AND l.{LeasesTableAttemptCountColumnName} = {MaxChangeProcessAttemptCount} THEN 1 ELSE NULL END)
FROM CHANGETABLE(CHANGES {this._userTable.BracketQuotedFullName}, @last_sync_version) AS c
LEFT OUTER JOIN {this._bracketedLeasesTableName} AS l ON {leasesTableJoinCondition}
WHERE l.{LeasesTableLeaseExpirationTimeColumnName} IS NOT NULL AND l.{LeasesTableLeaseExpirationTimeColumnName} > SYSDATETIME()";
LEFT OUTER JOIN {this._bracketedLeasesTableName} AS l ON {leasesTableJoinCondition};
IF @lease_locked_count > 0 OR @max_attempts_count > 0
BEGIN
SELECT '(' + CAST(@lease_locked_count AS NVARCHAR) + ' found with lease locks and ' + CAST(@max_attempts_count AS NVARCHAR) + ' ignored because they''ve reached the max attempt limit)';
END";
try
{
using (var getLeaseLockedRowCountCommand = new SqlCommand(getLeaseLockedrowCountQuery, connection, transaction))
using (var getLeaseLockedOrMaxAttemptsRowCountCommand = new SqlCommand(getLeaseLockedOrMaxAttemptRowCountQuery, connection, transaction))
{
var commandSw = Stopwatch.StartNew();
leaseLockedRowsCount = (int)await getLeaseLockedRowCountCommand.ExecuteScalarAsyncWithLogging(this._logger, CancellationToken.None);
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note I fixed an issue here with not passing in the cancellation token. We should be doing that for all queries we execute so that if the function execution is cancelled we'll properly cancel the queries we're running right away.

getLockedRowCountDurationMs = commandSw.ElapsedMilliseconds;
return (await getLeaseLockedOrMaxAttemptsRowCountCommand.ExecuteScalarAsyncWithLogging(this._logger, token))?.ToString();
}
}
catch (Exception ex)
{
this._logger.LogError($"Failed to query count of lease locked changes for table '{this._userTable.FullName}' due to exception: {ex.GetType()}. Exception message: {ex.Message}");
TelemetryInstance.TrackException(TelemetryErrorName.GetLeaseLockedRowCount, ex, null, new Dictionary<TelemetryMeasureName, double>() { { TelemetryMeasureName.GetLockedRowCountDurationMs, getLockedRowCountDurationMs } });
this._logger.LogError($"Failed to query count of lease locked or max attempt changes for table '{this._userTable.FullName}' due to exception: {ex.GetType()}. Exception message: {ex.Message}");
TelemetryInstance.TrackException(TelemetryErrorName.GetLeaseLockedOrMaxAttemptRowCount, ex);
// This is currently only used for debugging, so ignore the exception if we can. If the error is a fatal one though then the connection or transaction will be
// unusable so we have to let this bubble up so we can attempt to reconnect
if (ex.IsFatalSqlException() || ex.IsDeadlockException() || connection.IsBrokenOrClosed())
Expand All @@ -888,11 +915,10 @@ LEFT OUTER JOIN {this._bracketedLeasesTableName} AS l ON {leasesTableJoinConditi
}
else
{
// If it's non-fatal though return a -1 instead of throwing since it isn't necessary to get the value
leaseLockedRowsCount = -1;
// If it's non-fatal though return null instead of throwing since it isn't necessary to get the value
return null;
}
}
return leaseLockedRowsCount;
}

/// <summary>
Expand Down Expand Up @@ -952,7 +978,10 @@ WHEN NOT MATCHED THEN
private SqlCommand BuildRenewLeasesCommand(SqlConnection connection, SqlTransaction transaction)
{
string matchCondition = string.Join(" OR ", this._rowMatchConditions.Take(this._rowsToProcess.Count));

if (string.IsNullOrEmpty(matchCondition))
{
this._logger.LogError($"MatchCondition resolved to empty with '{this._rowsToProcess.Count}' rowsToProcess.");
}
string renewLeasesQuery = $@"
{AppLockStatements}

Expand Down Expand Up @@ -1043,7 +1072,12 @@ LEFT OUTER JOIN {this._bracketedLeasesTableName} AS l ON {leasesTableJoinConditi
SET LastSyncVersion = {newLastSyncVersion}, LastAccessTime = GETUTCDATE()
WHERE UserFunctionID = '{this._userFunctionId}' AND UserTableID = {this._userTableId};

DECLARE @max_attempt_rows_to_be_deleted int;
SELECT @max_attempt_rows_to_be_deleted = COUNT(*) FROM {this._bracketedLeasesTableName} WHERE {LeasesTableChangeVersionColumnName} <= {newLastSyncVersion} AND {LeasesTableAttemptCountColumnName} = {MaxChangeProcessAttemptCount};

DELETE FROM {this._bracketedLeasesTableName} WHERE {LeasesTableChangeVersionColumnName} <= {newLastSyncVersion};

SELECT 'Updated LastSyncVersion from ' + CAST(@current_last_sync_version AS NVARCHAR) + ' to {newLastSyncVersion} MaxAttemptRowsToBeDeleted=' + CAST(@max_attempt_rows_to_be_deleted AS NVARCHAR);
END
";

Expand Down