Skip to content

Commit f828d7e

Browse files
lfranckesbernauer
andauthored
Performance fixes (#50)
* Performance fixes - HTTP_CLIENT is now static and includes a connection timeout - StackableAuthorizer only constructs a single instance of StackableAccessControlEnforcer - StackableAccessControlEnforcer does not serialize to JSON twice anymore - Spotless formatting applied to code * Add changelog entry * Fix changelog entry * chore: Update to HDFS 3.4.0 and fix Tilt workflow * Add shell for benchmarks --------- Co-authored-by: Sebastian Bernauer <[email protected]>
1 parent 55598e4 commit f828d7e

File tree

10 files changed

+309
-174
lines changed

10 files changed

+309
-174
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,10 @@ All notable changes to this project will be documented in this file.
77
### Changed
88

99
- Bump okio to 1.17.6 to get rid of CVE-2023-3635 ([#46])
10+
- Performance fixes ([#50])
1011

1112
[#46]: https://github.com/stackabletech/hdfs-utils/pull/46
13+
[#50]: https://github.com/stackabletech/hdfs-utils/pull/50
1214

1315
## [0.3.0] - 2024-07-04
1416

Dockerfile

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,8 @@
1-
FROM docker.stackable.tech/stackable/hadoop:3.3.6-stackable0.0.0-dev
1+
FROM docker.stackable.tech/stackable/hadoop:3.4.0-stackable0.0.0-dev
22

3-
COPY --chown=stackable:stackable ./hdfs-utils-*.jar /stackable/hadoop/share/hadoop/tools/lib/
4-
COPY --chown=stackable:stackable ./bom.json /stackable/hadoop/share/hadoop/tools/lib/hdfs-utils.cdx.json
3+
# Remove existing hdfs-utils jars, so we can ship our custom one
4+
RUN rm -f /stackable/hadoop/share/hadoop/common/lib/hdfs-utils-*.jar
5+
RUN rm -f /stackable/hadoop/share/hadoop/tools/lib/hdfs-utils-*.jar
6+
7+
COPY --chown=stackable:stackable ./hdfs-utils-*.jar /stackable/hadoop/share/hadoop/common/lib/
8+
COPY --chown=stackable:stackable ./bom.json /stackable/hadoop/share/hadoop/common/lib/hdfs-utils.cdx.json

pom.xml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
<maven.compiler.release>${java.version}</maven.compiler.release>
3636
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
3737

38+
<!-- Tip: Use "mvn versions:display-dependency-updates" to check for updates -->
3839
<cleanthat.version>2.17</cleanthat.version>
3940
<error-prone.version>2.28.0</error-prone.version>
4041
<google-java-format.version>1.19.2</google-java-format.version>

src/main/java/tech/stackable/hadoop/StackableAccessControlEnforcer.java

Lines changed: 194 additions & 139 deletions
Large diffs are not rendered by default.

src/main/java/tech/stackable/hadoop/StackableAuthorizer.java

Lines changed: 26 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -7,26 +7,30 @@
77

88
public class StackableAuthorizer extends INodeAttributeProvider {
99

10-
private static final Logger LOG = LoggerFactory.getLogger(StackableAuthorizer.class);
11-
12-
@Override
13-
public void start() {
14-
LOG.debug("Starting HdfsOpaAuthorizer");
15-
}
16-
17-
@Override
18-
public void stop() {
19-
LOG.debug("Stopping HdfsOpaAuthorizer");
20-
}
21-
22-
@Override
23-
public INodeAttributes getAttributes(String[] strings, INodeAttributes iNodeAttributes) {
24-
// No special attributes needed
25-
return iNodeAttributes;
26-
}
27-
28-
@Override
29-
public AccessControlEnforcer getExternalAccessControlEnforcer(AccessControlEnforcer defaultEnforcer) {
30-
return new StackableAccessControlEnforcer();
31-
}
10+
private static final Logger LOG = LoggerFactory.getLogger(StackableAuthorizer.class);
11+
12+
private static final StackableAccessControlEnforcer ENFORCER =
13+
new StackableAccessControlEnforcer();
14+
15+
@Override
16+
public void start() {
17+
LOG.debug("Starting HdfsOpaAuthorizer");
18+
}
19+
20+
@Override
21+
public void stop() {
22+
LOG.debug("Stopping HdfsOpaAuthorizer");
23+
}
24+
25+
@Override
26+
public INodeAttributes getAttributes(String[] strings, INodeAttributes iNodeAttributes) {
27+
// No special attributes needed
28+
return iNodeAttributes;
29+
}
30+
31+
@Override
32+
public AccessControlEnforcer getExternalAccessControlEnforcer(
33+
AccessControlEnforcer defaultEnforcer) {
34+
return ENFORCER;
35+
}
3236
}

src/main/java/tech/stackable/hadoop/StackableGroupMapper.java

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,9 @@
88
import java.net.http.HttpClient;
99
import java.net.http.HttpRequest;
1010
import java.net.http.HttpResponse;
11-
import java.util.HashMap;
11+
import java.time.Duration;
1212
import java.util.List;
13-
import java.util.Map;
1413
import java.util.Objects;
15-
16-
import com.fasterxml.jackson.databind.type.TypeFactory;
1714
import org.apache.hadoop.conf.Configuration;
1815
import org.apache.hadoop.security.GroupMappingServiceProvider;
1916
import org.slf4j.Logger;
@@ -23,7 +20,8 @@ public class StackableGroupMapper implements GroupMappingServiceProvider {
2320

2421
public static final String OPA_MAPPING_URL_PROP = "hadoop.security.group.mapping.opa.policy.url";
2522
private static final Logger LOG = LoggerFactory.getLogger(StackableGroupMapper.class);
26-
private final HttpClient httpClient = HttpClient.newHttpClient();
23+
private static final HttpClient HTTP_CLIENT =
24+
HttpClient.newBuilder().connectTimeout(Duration.ofSeconds(30)).build();
2725
private final ObjectMapper json;
2826
private URI opaUri;
2927

@@ -80,13 +78,13 @@ public List<String> getGroups(String user) {
8078
HttpResponse<String> response = null;
8179
try {
8280
response =
83-
httpClient.send(
81+
HTTP_CLIENT.send(
8482
HttpRequest.newBuilder(opaUri)
8583
.header("Content-Type", "application/json")
8684
.POST(HttpRequest.BodyPublishers.ofString(body))
8785
.build(),
8886
HttpResponse.BodyHandlers.ofString());
89-
LOG.debug("Opa response: {}", response.body());
87+
LOG.debug("OPA response: {}", response.body());
9088
} catch (Exception e) {
9189
LOG.error(e.getMessage());
9290
throw new OpaException.QueryFailed(e);

test/stack/20-hdfs.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ metadata:
2525
name: simple-hdfs
2626
spec:
2727
image:
28-
productVersion: 3.3.6
28+
productVersion: 3.4.0
2929
custom: hdfs # Will be overwritten by Tilt
3030
pullPolicy: IfNotPresent
3131
clusterConfig:

test/stack/30-test-hdfs-permissions.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ spec:
88
spec:
99
containers:
1010
- name: test-hdfs-permissions
11-
image: docker.stackable.tech/stackable/hadoop:3.3.6-stackable0.0.0-dev
11+
image: docker.stackable.tech/stackable/hadoop:3.4.0-stackable0.0.0-dev
1212
env:
1313
- name: HADOOP_CONF_DIR
1414
value: /stackable/conf/hdfs

test/stack/31-benchmark-shell.yaml

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
---
2+
apiVersion: batch/v1
3+
kind: Job
4+
metadata:
5+
name: benchmark-shell
6+
spec:
7+
template:
8+
spec:
9+
containers:
10+
- name: benchmark-shell
11+
image: docker.stackable.tech/stackable/hadoop:3.4.0-stackable0.0.0-dev
12+
env:
13+
- name: HADOOP_CONF_DIR
14+
value: /stackable/conf/hdfs
15+
- name: KRB5_CONFIG
16+
value: /stackable/kerberos/krb5.conf
17+
- name: HADOOP_OPTS
18+
value: -Djava.security.krb5.conf=/stackable/kerberos/krb5.conf
19+
command:
20+
- /bin/bash
21+
- -c
22+
- |
23+
set -ex
24+
klist -k /stackable/kerberos/keytab
25+
26+
log_in () { kdestroy; kinit -kt /stackable/kerberos/keytab $1/benchmark-shell.default.svc.cluster.local; }
27+
28+
log_in admin
29+
30+
bin/hdfs dfs -mkdir -p /bench
31+
bin/hdfs dfs -ls /bench
32+
33+
# for i in $(seq 0 10); do echo "Creating $i" && bin/hdfs dfs -put -f /etc/hosts /bench/$i; done
34+
35+
# Watch out for the exact command you are using! (e.g. don't use "du -h /""). Checl the NameNode logs to
36+
# make sure you actually produce enough OPA calls.
37+
# time bin/hdfs dfs -du -h /bench
38+
39+
# So that you can run the benchmark manually
40+
sleep infinity
41+
42+
exit 0
43+
volumeMounts:
44+
- name: hdfs-config
45+
mountPath: /stackable/conf/hdfs
46+
- name: kerberos
47+
mountPath: /stackable/kerberos
48+
volumes:
49+
- name: hdfs-config
50+
configMap:
51+
name: simple-hdfs
52+
- name: kerberos
53+
ephemeral:
54+
volumeClaimTemplate:
55+
metadata:
56+
annotations:
57+
secrets.stackable.tech/class: kerberos-default
58+
secrets.stackable.tech/scope: service=benchmark-shell
59+
secrets.stackable.tech/kerberos.service.names: admin,alice,bob
60+
spec:
61+
storageClassName: secrets.stackable.tech
62+
accessModes:
63+
- ReadWriteOnce
64+
resources:
65+
requests:
66+
storage: "1"
67+
securityContext:
68+
fsGroup: 1000
69+
runAsGroup: 1000
70+
runAsUser: 1000
71+
restartPolicy: OnFailure

test/topology-provider/stack/03-hdfs.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ metadata:
2525
name: simple-hdfs
2626
spec:
2727
image:
28-
productVersion: 3.3.6
28+
productVersion: 3.4.0
2929
custom: hdfs # updated by tilt
3030
pullPolicy: IfNotPresent
3131
clusterConfig:

0 commit comments

Comments
 (0)