Add 601.pdf-generator benchmark and its data

octonawish-akcodes · octonawish-akcodes · commit 019d5712564e · 2024-08-24T11:09:03.000+05:30
Signed-off-by: Abhishek Kumar &lt;abhishek22512@gmail.com&gt;
diff --git a/benchmarks-data b/benchmarks-data
@@ -1 +1 @@
-Subproject commit 6a17a460f289e166abb47ea6298fb939e80e8beb
+Subproject commit f407c24814f623f77dcb535d882c241909ae7588
diff --git a/benchmarks/600.pdf/601.pdf-generator/config.json b/benchmarks/600.pdf/601.pdf-generator/config.json
@@ -0,0 +1,6 @@
+{
+    "timeout": 60,
+    "memory": 256,
+    "languages": ["nodejs"]
+  }
+  
diff --git a/benchmarks/600.pdf/601.pdf-generator/input.py b/benchmarks/600.pdf/601.pdf-generator/input.py
@@ -0,0 +1,32 @@
+import os
+import glob
+
+def buckets_count():
+    return (1, 1)  # One input bucket, one output bucket
+
+def generate_input(data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func):
+    # The HTML file and the images directory
+    input_file_path = os.path.join(data_dir, 'template', 'demo.html')
+    images_dir = os.path.join(data_dir, 'template', 'images')  # Directory path
+
+    # Initialize input_config with 'object' and 'bucket' fields
+    input_config = {'object': {}, 'bucket': {}}
+    
+    # Upload the HTML file to the input bucket
+    upload_func(0, "demo.html", input_file_path)
+    
+    # Prepare the bucket configuration
+    input_config['bucket']['bucket'] = benchmarks_bucket
+    input_config['bucket']['input'] = input_paths[0]
+    input_config['bucket']['output'] = output_paths[0]
+
+    # Upload each image in the images directory to the input bucket
+    for file in glob.glob(os.path.join(images_dir, '*.png')):
+        img = os.path.relpath(file, data_dir)
+        upload_func(0, img, file)
+
+    # Store the list of image file configurations in 'object'
+    input_config['object']['key'] = "images/"
+    input_config['object']['input_file'] = 'demo.html'
+
+    return input_config
diff --git a/benchmarks/600.pdf/601.pdf-generator/nodejs/function.js b/benchmarks/600.pdf/601.pdf-generator/nodejs/function.js
@@ -0,0 +1,63 @@
+const puppeteer = require('puppeteer-core');
+const path = require('path');
+const fs = require('fs');
+const { PassThrough } = require('stream');
+const storage = require('./storage');
+
+let storage_handler = new storage.storage();
+
+const browserPath = path.join(__dirname, 'chromium/chrome-linux64/chrome');
+
+
+exports.handler = async function(event) {
+  const bucket = event.bucket.bucket;
+  const input_prefix = event.bucket.input;
+  const output_prefix = event.bucket.output;
+  const input_file = event.object.input_file;
+
+  // Create a read stream for the input HTML file
+  let readStreamPromise = storage_handler.downloadStream(bucket, path.join(input_prefix, input_file));
+  
+  // Create a PassThrough stream to pipe the HTML content into Puppeteer
+  const htmlStream = new PassThrough();
+
+  // Create a write stream for the output PDF file
+  let [writeStream, promise, uploadName] = storage_handler.uploadStream(bucket, path.join(output_prefix, 'output.pdf'));
+
+  try {
+    // Download the HTML file from storage
+    const inputStream = await readStreamPromise;
+    inputStream.pipe(htmlStream);
+
+    // Launch Puppeteer and generate the PDF
+    const browser = await puppeteer.launch({ executablePath: browserPath });
+    const page = await browser.newPage();
+    await page.setContent(await streamToString(htmlStream), { waitUntil: 'networkidle0' });
+    const pdfBuffer = await page.pdf({ format: 'A4' });
+    
+    // Close Puppeteer
+    await browser.close();
+
+    // Pipe the PDF buffer into the write stream
+    writeStream.write(pdfBuffer);
+    writeStream.end();
+
+    // Wait for upload to complete
+    await promise;
+
+    return { bucket: output_prefix, key: uploadName };
+  } catch (error) {
+    console.error('Error generating PDF:', error);
+    throw error;
+  }
+};
+
+// Utility function to convert a stream to a string
+function streamToString(stream) {
+  return new Promise((resolve, reject) => {
+    const chunks = [];
+    stream.on('data', chunk => chunks.push(chunk));
+    stream.on('end', () => resolve(Buffer.concat(chunks).toString('utf8')));
+    stream.on('error', reject);
+  });
+}
diff --git a/benchmarks/600.pdf/601.pdf-generator/nodejs/init.sh b/benchmarks/600.pdf/601.pdf-generator/nodejs/init.sh
@@ -0,0 +1,29 @@
+#!/bin/bash
+
+DIR=$1
+VERBOSE=$2
+
+CHROMIUM_URL="https://storage.googleapis.com/chrome-for-testing-public/127.0.6533.88/linux64/chrome-linux64.zip"
+
+# Define the script directory and the download path
+SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
+DOWNLOAD_DIR="${DIR}/chromium"
+
+# Create the target directory if it doesn't exist
+mkdir -p "$DOWNLOAD_DIR"
+
+# Download Chromium
+curl -o "${DOWNLOAD_DIR}/chrome-linux.zip" "$CHROMIUM_URL"
+
+# Extract the downloaded zip file
+unzip -q "${DOWNLOAD_DIR}/chrome-linux.zip" -d "$DOWNLOAD_DIR"
+
+# Clean up the downloaded zip file
+rm "${DOWNLOAD_DIR}/chrome-linux.zip"
+
+# Move the extracted files to the final directory
+mv "${DOWNLOAD_DIR}/chrome-linux"/* "${DOWNLOAD_DIR}/"
+
+# Remove the empty directory
+rmdir "${DOWNLOAD_DIR}/chrome-linux"
+
diff --git a/benchmarks/600.pdf/601.pdf-generator/nodejs/package.json b/benchmarks/600.pdf/601.pdf-generator/nodejs/package.json
@@ -0,0 +1,8 @@
+{
+  "name": "pdf-generator",
+  "version": "1.0.0",
+  "description": "PDF Generator Benchmark using Puppeteer",
+  "dependencies": {
+    "puppeteer-core": "^22.15.0"
+  }
+}