Skip to content

Commit a6c3b63

Browse files
committed
Merge branch 'issue-2130-duplicate-file-remediation' into refactor/consolidate-snapshot-expiration
2 parents 5fee547 + e666efb commit a6c3b63

File tree

93 files changed

+4029
-6864
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

93 files changed

+4029
-6864
lines changed

.github/ISSUE_TEMPLATE/iceberg_bug_report.yml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,7 @@ body:
2828
description: What Apache Iceberg version are you using?
2929
multiple: false
3030
options:
31-
- "0.9.1 (latest release)"
32-
- "0.9.0"
31+
- "0.9.0 (latest release)"
3332
- "0.8.1"
3433
- "0.8.0"
3534
- "0.7.1"

.github/workflows/pypi-build-artifacts.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ jobs:
6262
if: startsWith(matrix.os, 'ubuntu')
6363

6464
- name: Build wheels
65-
uses: pypa/cibuildwheel@v3.0.0
65+
uses: pypa/cibuildwheel@v2.23.2
6666
with:
6767
output-dir: wheelhouse
6868
config-file: "pyproject.toml"

.github/workflows/python-ci.yml

Lines changed: 3 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -58,53 +58,9 @@ jobs:
5858
python-version: ${{ matrix.python }}
5959
cache: poetry
6060
cache-dependency-path: ./poetry.lock
61-
- name: Install system dependencies
62-
run: sudo apt-get update && sudo apt-get install -y libkrb5-dev # for kerberos
6361
- name: Install
6462
run: make install-dependencies
65-
- name: Run linters
63+
- name: Linters
6664
run: make lint
67-
- name: Run unit tests with coverage
68-
run: COVERAGE=1 make test
69-
- name: Generate coverage report (85%) # Coverage threshold should only increase over time — never decrease it!
70-
run: COVERAGE_FAIL_UNDER=85 make coverage-report
71-
72-
integration-test:
73-
runs-on: ubuntu-22.04
74-
strategy:
75-
matrix:
76-
python: ['3.9', '3.10', '3.11', '3.12']
77-
78-
steps:
79-
- uses: actions/checkout@v4
80-
- name: Install system dependencies
81-
run: sudo apt-get update && sudo apt-get install -y libkrb5-dev # for kerberos
82-
- name: Install
83-
run: make install
84-
85-
- name: Run integration tests with coverage
86-
run: COVERAGE=1 make test-integration
87-
- name: Show debug logs
88-
if: ${{ failure() }}
89-
run: docker compose -f dev/docker-compose.yml logs
90-
91-
- name: Run s3 integration tests with coverage
92-
run: COVERAGE=1 make test-s3
93-
- name: Show debug logs
94-
if: ${{ failure() }}
95-
run: docker compose -f dev/docker-compose.yml logs
96-
97-
- name: Run adls integration tests with coverage
98-
run: COVERAGE=1 make test-adls
99-
- name: Show debug logs
100-
if: ${{ failure() }}
101-
run: docker compose -f dev/docker-compose-azurite.yml logs
102-
103-
- name: Run gcs integration tests with coverage
104-
run: COVERAGE=1 make test-gcs
105-
- name: Show debug logs
106-
if: ${{ failure() }}
107-
run: docker compose -f dev/docker-compose-gcs-server.yml logs
108-
109-
- name: Generate coverage report (75%) # Coverage threshold should only increase over time — never decrease it!
110-
run: COVERAGE_FAIL_UNDER=75 make coverage-report
65+
- name: Tests
66+
run: make test-coverage
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
#
2+
# Licensed to the Apache Software Foundation (ASF) under one
3+
# or more contributor license agreements. See the NOTICE file
4+
# distributed with this work for additional information
5+
# regarding copyright ownership. The ASF licenses this file
6+
# to you under the Apache License, Version 2.0 (the
7+
# "License"); you may not use this file except in compliance
8+
# with the License. You may obtain a copy of the License at
9+
#
10+
# http://www.apache.org/licenses/LICENSE-2.0
11+
#
12+
# Unless required by applicable law or agreed to in writing,
13+
# software distributed under the License is distributed on an
14+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
# KIND, either express or implied. See the License for the
16+
# specific language governing permissions and limitations
17+
# under the License.
18+
#
19+
20+
name: "Python Integration"
21+
22+
on:
23+
push:
24+
branches:
25+
- 'main'
26+
pull_request:
27+
paths:
28+
- '**' # Include all files and directories in the repository by default.
29+
- '!.github/workflows/**' # Exclude all workflow files
30+
- '.github/workflows/python-integration.yml' # except the current file.
31+
- '!.github/ISSUE_TEMPLATE/**' # Exclude files and directories that don't impact tests or code like templates, metadata, and documentation.
32+
- '!.gitignore'
33+
- '!.asf.yml'
34+
- '!mkdocs/**'
35+
- '!.gitattributes'
36+
- '!README.md'
37+
- '!CONTRIBUTING.md'
38+
- '!LICENSE'
39+
- '!NOTICE'
40+
41+
concurrency:
42+
group: ${{ github.workflow }}-${{ github.ref }}
43+
cancel-in-progress: ${{ github.event_name == 'pull_request' }}
44+
45+
jobs:
46+
integration-test:
47+
runs-on: ubuntu-22.04
48+
49+
steps:
50+
- uses: actions/checkout@v4
51+
with:
52+
fetch-depth: 2
53+
- name: Install
54+
run: make install
55+
- name: Run integration tests
56+
run: make test-integration
57+
- name: Show debug logs
58+
if: ${{ failure() }}
59+
run: docker compose -f dev/docker-compose.yml logs

.github/workflows/svn-build-artifacts.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ jobs:
5757
if: startsWith(matrix.os, 'ubuntu')
5858

5959
- name: Build wheels
60-
uses: pypa/cibuildwheel@v3.0.0
60+
uses: pypa/cibuildwheel@v2.23.2
6161
with:
6262
output-dir: wheelhouse
6363
config-file: "pyproject.toml"

.gitignore

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@ coverage.xml
3535
.project
3636
.settings
3737
bin/
38-
.vscode/
3938

4039
# Hive/metastore files
4140
metastore_db/

.pre-commit-config.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,13 +27,13 @@ repos:
2727
- id: check-yaml
2828
- id: check-ast
2929
- repo: https://github.com/astral-sh/ruff-pre-commit
30-
rev: v0.11.13
30+
rev: v0.8.6
3131
hooks:
3232
- id: ruff
3333
args: [ --fix, --exit-non-zero-on-fix ]
3434
- id: ruff-format
3535
- repo: https://github.com/pre-commit/mirrors-mypy
36-
rev: v1.16.0
36+
rev: v1.14.1
3737
hooks:
3838
- id: mypy
3939
args:

Makefile

Lines changed: 51 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -14,143 +14,102 @@
1414
# KIND, either express or implied. See the License for the
1515
# specific language governing permissions and limitations
1616
# under the License.
17-
# ========================
18-
# Configuration Variables
19-
# ========================
2017

21-
PYTEST_ARGS ?= -v # Override with e.g. PYTEST_ARGS="-vv --tb=short"
22-
COVERAGE ?= 0 # Set COVERAGE=1 to enable coverage: make test COVERAGE=1
23-
COVERAGE_FAIL_UNDER ?= 85 # Minimum coverage % to pass: make coverage-report COVERAGE_FAIL_UNDER=70
2418

25-
ifeq ($(COVERAGE),1)
26-
TEST_RUNNER = poetry run coverage run --parallel-mode --source=pyiceberg -m
27-
else
28-
TEST_RUNNER = poetry run
29-
endif
19+
help: ## Display this help
20+
@awk 'BEGIN {FS = ":.*##"; printf "\nUsage:\n make \033[36m\033[0m\n"} /^[a-zA-Z_-]+:.*?##/ { printf " \033[36m%-20s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST)
3021

31-
POETRY_VERSION = 2.1.1
32-
33-
# ============
34-
# Help Section
35-
# ============
36-
37-
##@ General
38-
39-
help: ## Display this help message
40-
@awk 'BEGIN {FS = ":.*##"; printf "\nUsage:\n make \033[36m<target>\033[0m\n"} /^[a-zA-Z_-]+:.*?##/ { printf " \033[36m%-25s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST)
41-
42-
# ==================
43-
# Installation Tasks
44-
# ==================
45-
46-
##@ Setup
47-
48-
install-poetry: ## Ensure Poetry is installed at the specified version
22+
POETRY_VERSION = 2.0.1
23+
install-poetry: ## Ensure Poetry is installed and the correct version is being used.
4924
@if ! command -v poetry &> /dev/null; then \
50-
echo "Poetry not found. Installing..."; \
25+
echo "Poetry could not be found. Installing..."; \
5126
pip install --user poetry==$(POETRY_VERSION); \
5227
else \
5328
INSTALLED_VERSION=$$(pip show poetry | grep Version | awk '{print $$2}'); \
5429
if [ "$$INSTALLED_VERSION" != "$(POETRY_VERSION)" ]; then \
55-
echo "Updating Poetry to version $(POETRY_VERSION)..."; \
30+
echo "Poetry version $$INSTALLED_VERSION does not match required version $(POETRY_VERSION). Updating..."; \
5631
pip install --user --upgrade poetry==$(POETRY_VERSION); \
5732
else \
58-
echo "Poetry version $(POETRY_VERSION) already installed."; \
59-
fi; \
33+
echo "Poetry version $$INSTALLED_VERSION is already installed."; \
34+
fi \
6035
fi
6136

62-
install-dependencies: ## Install all dependencies including extras
37+
install-dependencies: ## Install dependencies including dev, docs, and all extras
6338
poetry install --all-extras
6439

65-
install: install-poetry install-dependencies ## Install Poetry and dependencies
66-
67-
# ===============
68-
# Code Validation
69-
# ===============
70-
71-
##@ Quality
40+
install: | install-poetry install-dependencies
7241

7342
check-license: ## Check license headers
7443
./dev/check-license
7544

76-
lint: ## Run code linters via pre-commit
45+
lint: ## lint
7746
poetry run pre-commit run --all-files
7847

79-
# ===============
80-
# Testing Section
81-
# ===============
82-
83-
##@ Testing
84-
85-
test: ## Run all unit tests (excluding integration)
86-
$(TEST_RUNNER) pytest tests/ -m "(unmarked or parametrize) and not integration" $(PYTEST_ARGS)
48+
test: ## Run all unit tests, can add arguments with PYTEST_ARGS="-vv"
49+
poetry run pytest tests/ -m "(unmarked or parametrize) and not integration" ${PYTEST_ARGS}
8750

88-
test-integration: test-integration-setup test-integration-exec ## Run integration tests
51+
test-s3: # Run tests marked with s3, can add arguments with PYTEST_ARGS="-vv"
52+
sh ./dev/run-minio.sh
53+
poetry run pytest tests/ -m s3 ${PYTEST_ARGS}
8954

90-
test-integration-setup: ## Start Docker services for integration tests
55+
test-integration: ## Run all integration tests, can add arguments with PYTEST_ARGS="-vv"
9156
docker compose -f dev/docker-compose-integration.yml kill
9257
docker compose -f dev/docker-compose-integration.yml rm -f
9358
docker compose -f dev/docker-compose-integration.yml up -d
9459
sleep 10
9560
docker compose -f dev/docker-compose-integration.yml cp ./dev/provision.py spark-iceberg:/opt/spark/provision.py
9661
docker compose -f dev/docker-compose-integration.yml exec -T spark-iceberg ipython ./provision.py
62+
poetry run pytest tests/ -v -m integration ${PYTEST_ARGS}
9763

98-
test-integration-exec: ## Run integration tests (excluding provision)
99-
$(TEST_RUNNER) pytest tests/ -m integration $(PYTEST_ARGS)
100-
101-
test-integration-rebuild: ## Rebuild integration Docker services from scratch
64+
test-integration-rebuild:
10265
docker compose -f dev/docker-compose-integration.yml kill
10366
docker compose -f dev/docker-compose-integration.yml rm -f
10467
docker compose -f dev/docker-compose-integration.yml build --no-cache
10568

106-
test-s3: ## Run tests marked with @pytest.mark.s3
107-
sh ./dev/run-minio.sh
108-
$(TEST_RUNNER) pytest tests/ -m s3 $(PYTEST_ARGS)
109-
110-
test-adls: ## Run tests marked with @pytest.mark.adls
69+
test-adls: ## Run tests marked with adls, can add arguments with PYTEST_ARGS="-vv"
11170
sh ./dev/run-azurite.sh
112-
$(TEST_RUNNER) pytest tests/ -m adls $(PYTEST_ARGS)
71+
poetry run pytest tests/ -m adls ${PYTEST_ARGS}
11372

114-
test-gcs: ## Run tests marked with @pytest.mark.gcs
73+
test-gcs: ## Run tests marked with gcs, can add arguments with PYTEST_ARGS="-vv"
11574
sh ./dev/run-gcs-server.sh
116-
$(TEST_RUNNER) pytest tests/ -m gcs $(PYTEST_ARGS)
75+
poetry run pytest tests/ -m gcs ${PYTEST_ARGS}
76+
77+
test-coverage-unit: # Run test with coverage for unit tests, can add arguments with PYTEST_ARGS="-vv"
78+
poetry run coverage run --source=pyiceberg/ --data-file=.coverage.unit -m pytest tests/ -v -m "(unmarked or parametrize) and not integration" ${PYTEST_ARGS}
11779

118-
test-coverage: COVERAGE=1
119-
test-coverage: test test-integration test-s3 test-adls test-gcs coverage-report ## Run all tests with coverage and report
80+
test-coverage-integration: # Run test with coverage for integration tests, can add arguments with PYTEST_ARGS="-vv"
81+
docker compose -f dev/docker-compose-integration.yml kill
82+
docker compose -f dev/docker-compose-integration.yml rm -f
83+
docker compose -f dev/docker-compose-integration.yml up -d
84+
sh ./dev/run-azurite.sh
85+
sh ./dev/run-gcs-server.sh
86+
sleep 10
87+
docker compose -f dev/docker-compose-integration.yml cp ./dev/provision.py spark-iceberg:/opt/spark/provision.py
88+
docker compose -f dev/docker-compose-integration.yml exec -T spark-iceberg ipython ./provision.py
89+
poetry run coverage run --source=pyiceberg/ --data-file=.coverage.integration -m pytest tests/ -v -m integration ${PYTEST_ARGS}
12090

121-
coverage-report: ## Combine and report coverage
122-
poetry run coverage combine
123-
poetry run coverage report -m --fail-under=$(COVERAGE_FAIL_UNDER)
91+
test-coverage: | test-coverage-unit test-coverage-integration ## Run all tests with coverage including unit and integration tests
92+
poetry run coverage combine .coverage.unit .coverage.integration
93+
poetry run coverage report -m --fail-under=90
12494
poetry run coverage html
12595
poetry run coverage xml
12696

127-
# ================
128-
# Documentation
129-
# ================
130-
131-
##@ Documentation
13297

133-
docs-install: ## Install docs dependencies
134-
poetry install --with docs
135-
136-
docs-serve: ## Serve local docs preview (hot reload)
137-
poetry run mkdocs serve -f mkdocs/mkdocs.yml
138-
139-
docs-build: ## Build the static documentation site
140-
poetry run mkdocs build -f mkdocs/mkdocs.yml --strict
141-
142-
# ===================
143-
# Project Maintenance
144-
# ===================
145-
146-
##@ Maintenance
147-
148-
clean: ## Remove build artifacts and caches
149-
@echo "Cleaning up Cython and Python cached files..."
98+
clean: ## Clean up the project Python working environment
99+
@echo "Cleaning up Cython and Python cached files"
150100
@rm -rf build dist *.egg-info
151101
@find . -name "*.so" -exec echo Deleting {} \; -delete
152102
@find . -name "*.pyc" -exec echo Deleting {} \; -delete
153103
@find . -name "__pycache__" -exec echo Deleting {} \; -exec rm -rf {} +
154104
@find . -name "*.pyd" -exec echo Deleting {} \; -delete
155105
@find . -name "*.pyo" -exec echo Deleting {} \; -delete
156-
@echo "Cleanup complete."
106+
@echo "Cleanup complete"
107+
108+
docs-install:
109+
poetry install --with docs
110+
111+
docs-serve:
112+
poetry run mkdocs serve -f mkdocs/mkdocs.yml
113+
114+
docs-build:
115+
poetry run mkdocs build -f mkdocs/mkdocs.yml --strict

dev/Dockerfile

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -37,22 +37,22 @@ RUN mkdir -p ${HADOOP_HOME} && mkdir -p ${SPARK_HOME} && mkdir -p /home/iceberg/
3737
WORKDIR ${SPARK_HOME}
3838

3939
# Remember to also update `tests/conftest`'s spark setting
40-
ENV SPARK_VERSION=3.5.6
40+
ENV SPARK_VERSION=3.5.4
4141
ENV ICEBERG_SPARK_RUNTIME_VERSION=3.5_2.12
42-
ENV ICEBERG_VERSION=1.9.1
43-
ENV PYICEBERG_VERSION=0.9.1
42+
ENV ICEBERG_VERSION=1.9.0-SNAPSHOT
43+
ENV PYICEBERG_VERSION=0.9.0
4444

4545
RUN curl --retry 5 -s -C - https://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop3.tgz -o spark-${SPARK_VERSION}-bin-hadoop3.tgz \
4646
&& tar xzf spark-${SPARK_VERSION}-bin-hadoop3.tgz --directory /opt/spark --strip-components 1 \
4747
&& rm -rf spark-${SPARK_VERSION}-bin-hadoop3.tgz
4848

4949
# Download iceberg spark runtime
50-
RUN curl --retry 5 -s https://repo1.maven.org/maven2/org/apache/iceberg/iceberg-spark-runtime-${ICEBERG_SPARK_RUNTIME_VERSION}/${ICEBERG_VERSION}/iceberg-spark-runtime-${ICEBERG_SPARK_RUNTIME_VERSION}-${ICEBERG_VERSION}.jar \
50+
RUN curl --retry 5 -s https://repository.apache.org/content/groups/snapshots/org/apache/iceberg/iceberg-spark-runtime-3.5_2.12/1.9.0-SNAPSHOT/iceberg-spark-runtime-3.5_2.12-1.9.0-20250409.001855-44.jar \
5151
-Lo /opt/spark/jars/iceberg-spark-runtime-${ICEBERG_SPARK_RUNTIME_VERSION}-${ICEBERG_VERSION}.jar
5252

5353

5454
# Download AWS bundle
55-
RUN curl --retry 5 -s https://repo1.maven.org/maven2/org/apache/iceberg/iceberg-aws-bundle/${ICEBERG_VERSION}/iceberg-aws-bundle-${ICEBERG_VERSION}.jar \
55+
RUN curl --retry 5 -s https://repository.apache.org/content/groups/snapshots/org/apache/iceberg/iceberg-aws-bundle/1.9.0-SNAPSHOT/iceberg-aws-bundle-1.9.0-20250409.002731-88.jar \
5656
-Lo /opt/spark/jars/iceberg-aws-bundle-${ICEBERG_VERSION}.jar
5757

5858
COPY spark-defaults.conf /opt/spark/conf

0 commit comments

Comments
 (0)