Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 67 additions & 0 deletions .github/workflows/master.yml
Original file line number Diff line number Diff line change
Expand Up @@ -154,3 +154,70 @@ jobs:
./dev/pytest
- uses: codecov/codecov-action@v1

spark_3_0_build:
name: Conda (Python, Spark 3.0, pandas, PyArrow)
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
include:
- python-version: 3.8
spark-version: 3.0.0
spark-rc-version: 2
pandas-version: 1.0.4
pyarrow-version: 0.17.1
env:
PYTHON_VERSION: ${{ matrix.python-version }}
SPARK_VERSION: ${{ matrix.spark-version }}
SPARK_RC_VERSION: ${{ matrix.spark-rc-version }}
PANDAS_VERSION: ${{ matrix.pandas-version }}
PYARROW_VERSION: ${{ matrix.pyarrow-version }}
SPARK_CACHE_DIR: /home/runner/.cache/spark-versions
# `QT_QPA_PLATFORM` for resolving 'QXcbConnection: Could not connect to display :0.0'
DISPLAY: 0.0
QT_QPA_PLATFORM: offscreen
# Github token is required to auto-generate the release notes from Github release notes
GITHUB_OAUTH_KEY: ${{ secrets.GITHUB_TOKEN }}
steps:
- uses: actions/checkout@v2
- uses: actions/setup-java@v1
with:
java-version: 1.8
- uses: actions/cache@v1
with:
path: ${{ env.SPARK_CACHE_DIR }}
key: ${{ runner.os }}-koalas-spark-${{ env.SPARK_VERSION }}-rc${{ env.SPARK_RC_VERSION }}
- name: Install dependencies
run: |
./dev/download_spark_3.0.sh
curl -s https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh > miniconda.sh
bash miniconda.sh -b -p $HOME/miniconda
# See also https://github.com/conda/conda/issues/7980
. $HOME/miniconda/etc/profile.d/conda.sh
hash -r
conda config --set always_yes yes --set changeps1 no
conda update -q conda
# Useful for debugging any issues with conda
conda info -a
# Replace dep1 dep2 ... with your dependencies
conda create -c conda-forge -q -n test-environment python=$PYTHON_VERSION
conda activate test-environment
conda install -c conda-forge --yes codecov
conda config --env --add pinned_packages python=$PYTHON_VERSION
conda config --env --add pinned_packages pandas==$PANDAS_VERSION
conda config --env --add pinned_packages pyarrow==$PYARROW_VERSION
conda install -c conda-forge --yes pandas==$PANDAS_VERSION pyarrow==$PYARROW_VERSION
conda install -c conda-forge --yes --freeze-installed --file requirements-dev.txt
conda list
- name: Run tests
run: |
# The environment should be initialized newly between Github Actions steps. Also,
# for some reasons we should add enviornment directory manually to allow for
# 'test-enviornment' to be found. See also https://github.com/conda/conda/issues/7980
. $HOME/miniconda/etc/profile.d/conda.sh
conda config --prepend envs_dirs $HOME/miniconda/envs
conda activate test-environment
export SPARK_HOME="$SPARK_CACHE_DIR/spark-$SPARK_VERSION-rc$SPARK_RC_VERSION-bin-hadoop2.7"
./dev/lint-python
./dev/pytest
- uses: codecov/codecov-action@v1
4 changes: 2 additions & 2 deletions databricks/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,10 @@
# Delta requires Spark 2.4.2+. See
# https://github.com/delta-io/delta#compatibility-with-apache-spark-versions.
if LooseVersion(__version__) >= LooseVersion("3.0.0"):
shared_conf["spark.jars.packages"] = "io.delta:delta-core_2.12:0.1.0"
shared_conf["spark.jars.packages"] = "io.delta:delta-core_2.12:0.6.1"
session = utils.default_session(shared_conf)
elif LooseVersion(__version__) >= LooseVersion("2.4.2"):
shared_conf["spark.jars.packages"] = "io.delta:delta-core_2.11:0.1.0"
shared_conf["spark.jars.packages"] = "io.delta:delta-core_2.11:0.6.1"
session = utils.default_session(shared_conf)
else:
session = utils.default_session(shared_conf)
Expand Down
48 changes: 48 additions & 0 deletions dev/download_spark_3.0.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
#!/usr/bin/env bash

#
# Copyright (C) 2020 Databricks, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

echo "Downloading Spark if necessary"
echo "Spark version = $SPARK_VERSION-rc$SPARK_RC_VERSION"

sparkVersionsDir="$HOME/.cache/spark-versions"
mkdir -p "$sparkVersionsDir"
sparkFile="spark-$SPARK_VERSION-bin-hadoop2.7"
sparkBuild="spark-$SPARK_VERSION-rc$SPARK_RC_VERSION-bin-hadoop2.7"
sparkBuildDir="$sparkVersionsDir/$sparkBuild"

if [[ -d "$sparkBuildDir" ]]; then
echo "Skipping download - found Spark dir $sparkBuildDir"
else
echo "Missing $sparkBuildDir, downloading archive"

# If not already found,
if ! [[ -d "$sparkBuildDir" ]]; then
sparkURL="https://dist.apache.org/repos/dist/dev/spark/v$SPARK_VERSION-rc$SPARK_RC_VERSION-bin/$sparkFile.tgz"
echo "Downloading $sparkURL ..."
# Test whether it's reachable
if curl -s -I -f -o /dev/null "$sparkURL"; then
curl -s "$sparkURL" | tar xz --directory "$sparkVersionsDir"
mv "$sparkVersionsDir/$sparkFile" "$sparkVersionsDir/$sparkBuild"
else
echo "Could not reach $sparkURL"
fi
fi

echo "Content of $sparkBuildDir:"
ls -la "$sparkBuildDir"
fi
4 changes: 2 additions & 2 deletions dev/pytest
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,8 @@ fi

# Runs both doctests and unit tests by default, otherwise hands arguments over to pytest.
if [ "$#" = 0 ]; then
if [[ "$SPARK_VERSION" == 2.3* ]] || [[ "$SPARK_VERSION" == 2.4.1* ]] || [[ "$SPARK_VERSION" == 2.4.2* ]]; then
# Delta requires Spark 2.4.2+. We skip the related doctests.
if [[ "$SPARK_VERSION" == 2.3* ]] || [[ "$SPARK_VERSION" == 2.4.1* ]] || [[ "$SPARK_VERSION" == 2.4.2* ]] || [[ "$SPARK_VERSION" == 3.0* ]]; then
# Delta requires Spark 2.4.2+, and doesn't support Spark 3.0+ yet. We skip the related doctests.
if [[ "$SPARK_VERSION" == 2.3* ]]; then
$PYTHON_EXECUTABLE -m pytest --cov=databricks --cov-report xml:"$FWDIR/coverage.xml" -k " -melt -to_delta -read_delta" --verbose --showlocals --color=yes --doctest-modules databricks "${logopts[@]}"
else
Expand Down