diff --git a/README_bilt.md b/README_bilt.md new file mode 100644 index 0000000..7f07e3d --- /dev/null +++ b/README_bilt.md @@ -0,0 +1,59 @@ +# Bilt AR Wagon Fork + +go/arwagon + +This describes how to use Bilt's GCP Artifact Registry maven wagon fork, which has much improved +performance. GCP promised they are treating poor Java AR performance as critical, but it is unclear +when significant improvements will actually be made. + +Q: Would it be just as fast to use virtual with fallback to maven central in pom.xml? +Q: Does remote API have quota problems unlike remote? +Q: Does remote API not pull from central unlike remote? +Q: how to do jobrunr crap? + + + +## Interface + +On GitHub, we use remote API; everywhere else we just use maven central. This can be overriden by: + +* **-Dcom.bilt.internal.arwagon.bilt-redirect=**, where value is either: + * none + * standard +* **-Dcom.bilt.internal.arwagon.other-redirect=**, where value is either: + * none + * remote + * remote-api + * maven-central + + +## Performance + +On Github ubuntu-latest, with wagon 2.2.5, mvn 3.9.11 (default), mvn go:dependency-offline took: + +* tests/maven-0-artifacts: 19:21 min +* tests/quarkus-starter: 39:04 min +* tests/payment-svc: 179 min + +On local dev machine in VA, maven 3.8.7, tests/maven-0-artifacts (~450 downloads, excluding SHAs): + +* maven central: 12s +* wagon 2.2.5, bilt-maven (mvn will fallback to maven central): 1:27 min +* wagon 2.2.5, bilt-maven, fallback to maven-central-cache: 5:32 min +* wagon 2.2.5, virtual: 13:08 min +* wagon 2.2.5 (edited), virtual, redirect non-bilt to remote AR API: 1:43min + +See here for more numbers: https://docs.google.com/document/d/1z8XYDS-xTj2Y3EzUMFGM-lqa9zGcHnjo3B_KI-aiiBc/edit?tab=t.0#heading=h.hhx1fxwzd219 + +## Future Improvements + +use latest maven with parallelization flags and bf collector (2-10x) + +run builds in GCP network (2-10x) + +prefetch SHAs/JARs (or lazily verify SHAs) + +> Maven fetches pom, then pom.sha1, then eventually jar, then jar.sha1. We can prefetch on +> pom request, and/or compute the SHAs ourself and verify async. + +better connection pooling / http2 (~15% from early experiments) diff --git a/artifactregistry-maven-wagon/src/main/java/com/google/cloud/artifactregistry/wagon/ArtifactRegistryWagon.java b/artifactregistry-maven-wagon/src/main/java/com/google/cloud/artifactregistry/wagon/ArtifactRegistryWagon.java index 8c2e72a..6f24507 100644 --- a/artifactregistry-maven-wagon/src/main/java/com/google/cloud/artifactregistry/wagon/ArtifactRegistryWagon.java +++ b/artifactregistry-maven-wagon/src/main/java/com/google/cloud/artifactregistry/wagon/ArtifactRegistryWagon.java @@ -32,9 +32,25 @@ import com.google.cloud.artifactregistry.auth.CredentialProvider; import com.google.cloud.artifactregistry.auth.DefaultCredentialProvider; import java.io.File; +import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; +import java.io.UnsupportedEncodingException; +import java.net.URLEncoder; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.nio.file.StandardCopyOption; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.ThreadPoolExecutor; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.LinkedBlockingQueue; +import java.util.Map; + import org.apache.maven.wagon.AbstractWagon; import org.apache.maven.wagon.ConnectionException; import org.apache.maven.wagon.ResourceDoesNotExistException; @@ -54,11 +70,20 @@ public final class ArtifactRegistryWagon extends AbstractWagon { private HttpTransportFactory httpTransportFactory = NetHttpTransport::new; private CredentialProvider credentialProvider = DefaultCredentialProvider.getInstance(); private Credentials credentials; + private static final Map sha1Cache = new ConcurrentHashMap<>(); + private static final ExecutorService jarPrefetchExecutor = new ThreadPoolExecutor( + 10, // core pool size + 50, // maximum pool size + 60L, TimeUnit.SECONDS, // keep alive time + new LinkedBlockingQueue<>() // work queue + ); + private static final String CACHE_DIR = "/tmp/biltarwagon/"; private InputStream getInputStream(Resource resource) throws TransferFailedException, ResourceDoesNotExistException, AuthorizationException { try { GenericUrl url = googleRepository.constructURL(resource.getName()); + System.out.println(url); HttpRequest request = requestFactory.buildGetRequest(url); HttpResponse response = request.execute(); return response.getContent(); @@ -117,12 +142,66 @@ public boolean resourceExists(String resource) @Override public boolean getIfNewer(String resourceName, File destination, long timestamp) throws TransferFailedException, ResourceDoesNotExistException, AuthorizationException { + // Check if this is a request for a .sha1 file + if (resourceName.endsWith(".sha1")) { + String originalResource = resourceName.substring(0, resourceName.length() - 5); // Remove .sha1 suffix + String cachedSha1 = sha1Cache.get(originalResource); + if (cachedSha1 != null) { + // Write the cached SHA1 to the destination file + try (OutputStream out = new java.io.FileOutputStream(destination)) { + out.write(cachedSha1.getBytes("UTF-8")); + System.out.println("CACHE HIT SHA1: " + resourceName); + return true; + } catch (IOException e) { + throw new TransferFailedException("Failed to write SHA1 to destination file.", e); + } + } else { + System.out.println("CACHE MISS SHA1: " + resourceName); + } + } + + // Check if this is a .jar request and we have it cached + if (resourceName.endsWith(".jar")) { + File cachedJarFile = getCachedJarFile(resourceName); + if (cachedJarFile.exists()) { + try { + Files.copy(cachedJarFile.toPath(), destination.toPath(), StandardCopyOption.REPLACE_EXISTING); + System.out.println("CACHE HIT JAR: " + resourceName); + + // Ensure SHA1 is cached for this jar + if (!sha1Cache.containsKey(resourceName)) { + String sha1Hash = computeSha1(destination); + sha1Cache.put(resourceName, sha1Hash); + } + + return true; + } catch (IOException e) { + System.err.println("Failed to copy cached jar file, falling back to remote fetch: " + e.getMessage()); + // Fall through to normal fetch + } + } else { + System.out.println("CACHE MISS JAR: " + resourceName); + } + } + + // If this is a .pom file, prefetch the corresponding .jar file before downloading + if (resourceName.endsWith(".pom")) { + prefetchJarFile(resourceName); + } + Resource resource = new Resource(resourceName); this.fireGetInitiated(resource, destination); try { this.fireGetStarted(resource, destination); InputStream input = getInputStream(resource); this.getTransfer(resource, destination, input); + + // Compute and cache SHA1 for .pom and .jar files + if (resourceName.endsWith(".pom") || resourceName.endsWith(".jar")) { + String sha1Hash = computeSha1(destination); + sha1Cache.put(resourceName, sha1Hash); + } + this.fireGetCompleted(resource, destination); } catch (Exception e) { this.fireTransferError(resource, e, TransferEvent.REQUEST_GET); @@ -138,6 +217,94 @@ public void setHttpTransportFactory(HttpTransportFactory httpTransportFactory) { public void setCredentialProvider(CredentialProvider provider) { this.credentialProvider = provider; } + + private String computeSha1(File file) throws TransferFailedException { + try { + MessageDigest digest = MessageDigest.getInstance("SHA-1"); + try (FileInputStream fis = new FileInputStream(file)) { + byte[] buffer = new byte[8192]; + int bytesRead; + while ((bytesRead = fis.read(buffer)) != -1) { + digest.update(buffer, 0, bytesRead); + } + } + byte[] hashBytes = digest.digest(); + StringBuilder sb = new StringBuilder(); + for (byte b : hashBytes) { + sb.append(String.format("%02x", b)); + } + return sb.toString(); + } catch (NoSuchAlgorithmException | IOException e) { + throw new TransferFailedException("Failed to compute SHA1 hash.", e); + } + } + + private void ensureCacheDirectory() { + try { + Path cacheDir = Paths.get(CACHE_DIR); + if (!Files.exists(cacheDir)) { + Files.createDirectories(cacheDir); + } + } catch (IOException e) { + System.err.println("Failed to create cache directory: " + e.getMessage()); + } + } + + private File getCachedJarFile(String jarResourceName) { + // Replace path separators with underscores to create a flat file structure + String fileName = jarResourceName.replace('/', '_').replace('\\', '_'); + return new File(CACHE_DIR + fileName); + } + + private void prefetchJarFile(String pomResourceName) { + // Convert .pom resource name to .jar resource name + if (!pomResourceName.endsWith(".pom")) { + return; + } + + String jarResourceName = pomResourceName.substring(0, pomResourceName.length() - 4) + ".jar"; + File cachedJarFile = getCachedJarFile(jarResourceName); + + // Skip if already cached + if (cachedJarFile.exists()) { + return; + } + + jarPrefetchExecutor.submit(() -> { + try { + ensureCacheDirectory(); + Resource jarResource = new Resource(jarResourceName); + InputStream input = getInputStream(jarResource); + + // Create temporary file first, then move to final location atomically + File tempFile = new File(cachedJarFile.getAbsolutePath() + ".tmp"); + try (OutputStream out = new java.io.FileOutputStream(tempFile)) { + byte[] buffer = new byte[8192]; + int bytesRead; + while ((bytesRead = input.read(buffer)) != -1) { + out.write(buffer, 0, bytesRead); + } + } + + // Atomically move temp file to final location + Files.move(tempFile.toPath(), cachedJarFile.toPath(), StandardCopyOption.REPLACE_EXISTING); + + // Compute and cache SHA1 for the prefetched jar + String sha1Hash = computeSha1(cachedJarFile); + sha1Cache.put(jarResourceName, sha1Hash); + + System.out.println("PREFETCHED JAR: " + jarResourceName); + + } catch (Exception e) { + System.err.println("Failed to prefetch jar " + jarResourceName + ": " + e.getMessage()); + // Clean up any partial files + File tempFile = new File(cachedJarFile.getAbsolutePath() + ".tmp"); + if (tempFile.exists()) { + tempFile.delete(); + } + } + }); + } private void handlePutRequest(File source, Resource resource, GenericUrl url) throws AuthorizationException, ResourceDoesNotExistException, TransferFailedException { @@ -240,12 +407,28 @@ private static class GoogleRepository { } GenericUrl constructURL(String artifactPath) { + if (artifactPath.startsWith("com/bilt/") || artifactPath.startsWith("com/biltrewards/") || artifactPath.startsWith("com/biltcard/")) { + GenericUrl url = new GenericUrl(); + url.setScheme("https"); + url.setHost(repository.getHost()); + url.appendRawPath("/single-scholar-280421/bilt-maven"); + url.appendRawPath("/"); + url.appendRawPath(artifactPath); + return url; + } + GenericUrl url = new GenericUrl(); url.setScheme("https"); - url.setHost(repository.getHost()); - url.appendRawPath(repository.getBasedir()); - url.appendRawPath("/"); - url.appendRawPath(artifactPath); + url.setHost("artifactregistry.googleapis.com"); + url.appendRawPath("/download/v1/projects/single-scholar-280421/locations/us/repositories/maven-central-cache/files/"); + try { + url.appendRawPath(URLEncoder.encode(artifactPath, "UTF-8")); + } catch (UnsupportedEncodingException e) { + // UTF-8 is always supported, this should never happen + throw new RuntimeException("UTF-8 encoding not supported", e); + } + url.appendRawPath(":download"); + url.set("alt", "media"); return url; } } diff --git a/run_test.sh b/run_test.sh index bf9c0e0..6ef3af5 100755 --- a/run_test.sh +++ b/run_test.sh @@ -10,7 +10,7 @@ SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) TEST_DIR=$1 -REPO_URL=artifactregistry://us-maven.pkg.dev/single-scholar-280421/bilt-private-and-maven-central +REPO_URL=${REPO_URL:-artifactregistry://us-maven.pkg.dev/single-scholar-280421/bilt-private-and-maven-central} # MVN_IMAGE=maven:3.9.11-eclipse-temurin-21-alpine # 3.8.7-eclipse-temurin-17-alpine @@ -33,6 +33,16 @@ EXTENSIONS_XML=" " +# SETTINGS_XML=" +# +# + +# +# " + SETTINGS_XML=" " +# SETTINGS_XML=" +# +# + +# +# +# custom-repos +# +# +# custom +# $REPO_URL +# +# +# custom2 +# artifactregistry://us-maven.pkg.dev/single-scholar-280421/maven-central-cache +# +# +# +# +# custom +# $REPO_URL +# +# +# custom2 +# artifactregistry://us-maven.pkg.dev/single-scholar-280421/maven-central-cache +# +# +# +# + +# +# custom-repos +# +# +# " + ########################################################################### ##### END CONFIG ########################################################################### @@ -73,19 +122,18 @@ function run_test() { repo_dir=$run_dir/_repo cmd="mvn $MVN_ACTION -B -Dmaven.test.skip=true $MVN_FLAGS" - touch $run_dir/settings.xml + mkdir -p $run_dir/.mvn if [[ -n "$REPO_URL" ]]; then echo "Building wagon..." (cd $SCRIPT_DIR && ./gradlew publishToMavenLocal -Dmaven.repo.local="$repo_dir") - mkdir -p $run_dir/.mvn echo "$EXTENSIONS_XML" > $run_dir/.mvn/extensions.xml - - echo "$SETTINGS_XML" > $run_dir/settings.xml - cmd="$cmd -s settings.xml" fi + echo "$SETTINGS_XML" > $run_dir/settings.xml + cmd="$cmd -s settings.xml" + if [[ -n "$MVN_IMAGE" ]]; then if [[ -n "$GOOGLE_APPLICATION_CREDENTIALS" ]]; then creds="-v $GOOGLE_APPLICATION_CREDENTIALS:/root/.config/gcloud/application_default_credentials.json:ro" diff --git a/tests/bilt-1-artifact/pom.xml b/tests/bilt-1-artifact/pom.xml new file mode 100644 index 0000000..f871ccc --- /dev/null +++ b/tests/bilt-1-artifact/pom.xml @@ -0,0 +1,37 @@ + + + 4.0.0 + org.acme + slow + 1.0.0-SNAPSHOT + + + 3.14.0 + 21 + UTF-8 + UTF-8 + true + + + + + com.biltcard.common + egress-proxy + 1.1.2 + + + + + + + maven-compiler-plugin + ${compiler-plugin.version} + + true + + + + + \ No newline at end of file