Skip to content

Commit 3b7359b

Browse files
author
Akshay Chitneni
committed
Adding Data Cache
Signed-off-by: Akshay Chitneni <achitneni@apple.com>
1 parent d997dd9 commit 3b7359b

22 files changed

Lines changed: 10157 additions & 0 deletions

.gitignore

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,3 +26,8 @@ cover.out
2626
# Helm
2727
charts/kubeflow-trainer/charts/
2828
charts/kubeflow-trainer/Chart.lock
29+
30+
# data_cache
31+
pkg/data_cache/target
32+
pkg/data_cache/src/client/target/
33+
hack/data_cache/*.log

cmd/data_cache/Dockerfile

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
FROM rust:1.85-bullseye AS builder
2+
3+
WORKDIR /usr/src/arrow_cache
4+
5+
ENV RUST_LOG=info
6+
7+
# Install system dependencies
8+
RUN apt-get update && \
9+
apt-get -y install libssl-dev openssl zlib1g zlib1g-dev libpq-dev cmake protobuf-compiler netcat curl && \
10+
rm -rf /var/lib/apt/lists/*
11+
12+
# Install cargo-chef for better caching
13+
RUN rustup update && cargo install cargo-chef --version 0.1.62
14+
15+
# Copy manifests for dependency caching
16+
COPY pkg/data_cache/Cargo.toml pkg/data_cache/Cargo.lock ./
17+
18+
# Cache dependencies
19+
RUN cargo fetch
20+
21+
# Copy source code
22+
COPY pkg/data_cache/ .
23+
24+
# Run tests
25+
RUN cargo test --tests
26+
27+
# Build binaries in release mode
28+
RUN cargo build --release --bin head --bin worker
29+
30+
# Stage 2: Create a minimal runtime image
31+
FROM debian:bookworm-slim AS runtime
32+
33+
# Install runtime dependencies
34+
RUN apt-get update && \
35+
apt-get -y install ca-certificates && \
36+
rm -rf /var/lib/apt/lists/*
37+
38+
# Copy binaries from builder stage
39+
COPY --from=builder /usr/src/arrow_cache/target/release/head /usr/local/bin/head
40+
COPY --from=builder /usr/src/arrow_cache/target/release/worker /usr/local/bin/worker
41+
42+
# Create non-root user
43+
RUN groupadd -r cache_user && useradd -r -g cache_user cache_user
44+
45+
# Change ownership and switch to non-root user
46+
RUN chown -R cache_user:cache_user /usr/local/bin/
47+
USER cache_user
48+
49+
# Set default command
50+
CMD ["head"]
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
#!/bin/bash
2+
3+
# Check for required arguments
4+
if [ $# -lt 5 ]; then
5+
echo "Usage: $0 <iam-role-arn> <metadata-loc> <table-name> <schema-name> <aws-profile> [environment]"
6+
exit 1
7+
fi
8+
9+
IAM_ROLE_ARN="$1"
10+
METADATA_LOC="$2"
11+
TABLE_NAME="$3"
12+
SCHEMA_NAME="$4"
13+
AWS_PROFILE="$5"
14+
ENVIRONMENT="${6:-LOCAL}"
15+
16+
echo "Using IAM Role: $IAM_ROLE_ARN"
17+
echo "Metadata Location: $METADATA_LOC"
18+
echo "Table Name: $TABLE_NAME"
19+
echo "Schema Name: $SCHEMA_NAME"
20+
echo "Environment: $ENVIRONMENT"
21+
echo "AWS Profile: $AWS_PROFILE"
22+
23+
role_output=$(aws sts assume-role --role-arn "$IAM_ROLE_ARN" --role-session-name "RoleSession1" --profile "$AWS_PROFILE")
24+
25+
# Parse the JSON output using jq
26+
export AWS_ACCESS_KEY_ID=$(echo $role_output | jq -r '.Credentials.AccessKeyId')
27+
export AWS_SECRET_ACCESS_KEY=$(echo $role_output | jq -r '.Credentials.SecretAccessKey')
28+
export AWS_SESSION_TOKEN=$(echo $role_output | jq -r '.Credentials.SessionToken')
29+
export AWS_REGION=us-west-2
30+
export AWS_EC2_METADATA_DISABLED=true
31+
32+
# Set required environment variables for testing
33+
export METADATA_LOC="$METADATA_LOC"
34+
export TABLE_NAME="$TABLE_NAME"
35+
export SCHEMA_NAME="$SCHEMA_NAME"
36+
export RUNTIME_ENV="$ENVIRONMENT"
37+
38+
# Function to cleanup processes on exit
39+
cleanup() {
40+
echo ""
41+
echo "Stopping services..."
42+
kill -9 $WORKER1_PID $WORKER2_PID $HEAD_PID 2>/dev/null || true
43+
wait $WORKER1_PID $WORKER2_PID $HEAD_PID 2>/dev/null || true
44+
rm -rf /tmp/test_metadata
45+
exit 0
46+
}
47+
48+
# Set up signal handlers for graceful shutdown
49+
trap cleanup SIGINT SIGTERM
50+
51+
# Function to check if a service is ready
52+
check_service_ready() {
53+
local host=$1
54+
local port=$2
55+
local service_name=$3
56+
57+
echo "Waiting for $service_name to be ready on $host:$port..."
58+
while ! nc -z "$host" "$port" 2>/dev/null; do
59+
echo " $service_name not ready yet, waiting 2 seconds..."
60+
sleep 2
61+
done
62+
echo " $service_name is ready!"
63+
}
64+
65+
echo "Starting worker node 1..."
66+
cargo run --bin worker -- 0.0.0.0 50052 > worker1.log 2>&1 &
67+
WORKER1_PID=$!
68+
69+
echo "Starting worker node 2..."
70+
cargo run --bin worker -- 0.0.0.0 50053 > worker2.log 2>&1 &
71+
WORKER2_PID=$!
72+
73+
# Wait for both workers to be ready
74+
check_service_ready localhost 50052 "worker1"
75+
check_service_ready localhost 50053 "worker2"
76+
77+
echo "Both workers are ready, starting head node..."
78+
cargo run --bin head -- 0.0.0.0 50051 > head.log 2>&1 &
79+
HEAD_PID=$!
80+
81+
check_service_ready localhost 50051 "head"
82+
83+
echo "All services are running. Press Ctrl+C to stop all services."
84+
wait
85+
86+
#echo "Running client test..."
87+
#cd src/client && cargo run 2>&1
88+
#CLIENT_EXIT_CODE=$?

0 commit comments

Comments
 (0)