Skip to content

Commit 9104901

Browse files
authored
[server][common] Server read quota initialization without CV fallback strategy (#2352)
Problem Statement: 1. If one store version is not initialized correctly and throws unhandled exception, we would skip the initialization of follow up stores in the storeRepository 2. If CV repository is delayed or unavailable (which we have been seeing more frequently recently especially with large clusters) then we often fail quota initialization due to VeniceNoHelixResourceException which was also unhandled as per problem 1. 3. Having both initialized and initializedVolatile is confusing and unnecessary. Solution: 1. Catch unhandled exceptions at the store level and continue initialization for other stores to isolate any initialization errors. 2. If CV is unavailable, i.e. VeniceNoHelixResourceException is thrown or partition assignment map is somehow empty and fallback strategy is enabled (enabled by default but can be disabled via config server.read.quota.initialization.fallback.enabled). We will allocate this instance Q * X/P quota where Q is the total read quota, X is the number of partitions assigned to the node based on storage engine state and P is the store version partition count. 3. Removed initialized flag and only keeping the initializedVolatile flag. 4. Added additional logging to capture only when there is a total quota change and how is the new rate limiter calculated. i.e. the total quota, node responsibility and resulting instance quota. Also added additional logging whenever fallback strategy is invoked regardless of the quota update trigger (from store update or CV update events).
1 parent 2b99017 commit 9104901

File tree

8 files changed

+299
-91
lines changed

8 files changed

+299
-91
lines changed

clients/da-vinci-client/src/main/java/com/linkedin/davinci/config/VeniceServerConfig.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,7 @@
175175
import static com.linkedin.venice.ConfigKeys.SERVER_QUOTA_ENFORCEMENT_CAPACITY_MULTIPLE;
176176
import static com.linkedin.venice.ConfigKeys.SERVER_QUOTA_ENFORCEMENT_ENABLED;
177177
import static com.linkedin.venice.ConfigKeys.SERVER_QUOTA_ENFORCEMENT_INTERVAL_IN_MILLIS;
178+
import static com.linkedin.venice.ConfigKeys.SERVER_READ_QUOTA_INITIALIZATION_FALLBACK_ENABLED;
178179
import static com.linkedin.venice.ConfigKeys.SERVER_RECORD_LEVEL_METRICS_WHEN_BOOTSTRAPPING_CURRENT_VERSION_ENABLED;
179180
import static com.linkedin.venice.ConfigKeys.SERVER_REMOTE_CONSUMER_CONFIG_PREFIX;
180181
import static com.linkedin.venice.ConfigKeys.SERVER_REMOTE_INGESTION_REPAIR_SLEEP_INTERVAL_SECONDS;
@@ -691,6 +692,7 @@ public class VeniceServerConfig extends VeniceClusterConfig {
691692

692693
private final boolean parallelResourceShutdownEnabled;
693694
private final int lagMonitorCleanupCycle;
695+
private final boolean readQuotaInitializationFallbackEnabled;
694696

695697
public VeniceServerConfig(VeniceProperties serverProperties) throws ConfigurationException {
696698
this(serverProperties, Collections.emptyMap());
@@ -1177,6 +1179,8 @@ public VeniceServerConfig(VeniceProperties serverProperties, Map<String, Map<Str
11771179
serverProperties.getBoolean(SERVER_PARALLEL_RESOURCE_SHUTDOWN_ENABLED, false);
11781180
this.lagMonitorCleanupCycle =
11791181
serverProperties.getInt(SERVER_LAG_MONITOR_CLEANUP_CYCLE, DEFAULT_LAG_MONITOR_CLEANUP_CYCLE);
1182+
this.readQuotaInitializationFallbackEnabled =
1183+
serverProperties.getBoolean(SERVER_READ_QUOTA_INITIALIZATION_FALLBACK_ENABLED, true);
11801184
}
11811185

11821186
List<Double> extractThrottleLimitFactorsFor(VeniceProperties serverProperties, String configKey) {
@@ -2131,4 +2135,8 @@ public boolean isParallelResourceShutdownEnabled() {
21312135
public int getLagMonitorCleanupCycle() {
21322136
return lagMonitorCleanupCycle;
21332137
}
2138+
2139+
public boolean isReadQuotaInitializationFallbackEnabled() {
2140+
return readQuotaInitializationFallbackEnabled;
2141+
}
21342142
}

internal/venice-common/src/main/java/com/linkedin/venice/ConfigKeys.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3085,4 +3085,11 @@ private ConfigKeys() {
30853085
*/
30863086
public static final String SYSTEM_STORE_VERSION_RETENTION_COUNT = "store.version.retention.count.system.store";
30873087
public static final int DEFAULT_SYSTEM_STORE_VERSION_RETENTION_COUNT = 5;
3088+
3089+
/**
3090+
* Whether storage node read quota will fail-open if CV is unavailable during initialization or it will try to use
3091+
* cluster's instance count to calculate a placeholder/fallback value until CV is available.
3092+
*/
3093+
public static final String SERVER_READ_QUOTA_INITIALIZATION_FALLBACK_ENABLED =
3094+
"server.read.quota.initialization.fallback.enabled";
30883095
}

services/venice-server/src/main/java/com/linkedin/venice/listener/HttpChannelInitializer.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import com.linkedin.alpini.netty4.http2.Http2PipelineInitializer;
55
import com.linkedin.alpini.netty4.ssl.SslInitializer;
66
import com.linkedin.davinci.config.VeniceServerConfig;
7+
import com.linkedin.davinci.storage.StorageEngineRepository;
78
import com.linkedin.venice.acl.DynamicAccessController;
89
import com.linkedin.venice.acl.StaticAccessController;
910
import com.linkedin.venice.authorization.IdentityParser;
@@ -77,7 +78,8 @@ public HttpChannelInitializer(
7778
VeniceServerConfig serverConfig,
7879
Optional<StaticAccessController> routerAccessController,
7980
Optional<DynamicAccessController> storeAccessController,
80-
StorageReadRequestHandler requestHandler) {
81+
StorageReadRequestHandler requestHandler,
82+
StorageEngineRepository storageEngineRepository) {
8183
this.serverConfig = serverConfig;
8284
this.requestHandler = requestHandler;
8385
this.isDaVinciClient = serverConfig.isDaVinciClient();
@@ -146,6 +148,7 @@ public HttpChannelInitializer(
146148
serverConfig,
147149
storeMetadataRepository,
148150
customizedViewRepository,
151+
storageEngineRepository,
149152
nodeId,
150153
quotaUsageStats);
151154
} else {

services/venice-server/src/main/java/com/linkedin/venice/listener/ListenerService.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,8 @@ public ListenerService(
125125
serverConfig,
126126
routerAccessController,
127127
storeAccessController,
128-
requestHandler);
128+
requestHandler,
129+
storageEngineRepository);
129130

130131
Class<? extends ServerChannel> serverSocketChannelClass = NioServerSocketChannel.class;
131132
boolean epollEnabled = serverConfig.isRestServiceEpollEnabled();

0 commit comments

Comments
 (0)