Skip to content

Commit b32203b

Browse files
authored
Refresh GitHub workflow matrix. (#2083)
Now that Spark 3.1 has been released, we should refresh GitHub Action's workflow matrix to focus more on the newer versions. The new matrix: | Python | PySpark | pandas | PyArrow | | :---: | :---: | :---: | :---: | | 3.5 | 2.3.4 | 0.23.4 | 0.16.0 | | 3.6 | 2.3.4 | 0.24.2 | 0.10.0 | | 3.6 | 2.4.7 | 0.24.2 | 0.14.1 | | 3.7 | 2.4.7 | 0.25.3 | 0.15.1 | | 3.7 | 3.0.2 | 1.0.5 | 1.0.1 | | 3.7 | 3.1.1 | 1.1.5 | 2.0.0 | | 3.8 | 3.0.2 | 1.1.5 | 2.0.0 | | 3.8 | 3.1.1 | 1.2.3 | 3.0.0 |
1 parent 54a9502 commit b32203b

File tree

3 files changed

+30
-32
lines changed

3 files changed

+30
-32
lines changed

.github/workflows/master.yml

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -89,26 +89,26 @@ jobs:
8989
pandas-version: 0.24.2
9090
pyarrow-version: 0.14.1
9191
logger: databricks.koalas.usage_logging.usage_logger
92-
- python-version: 3.6
93-
spark-version: 2.4.7
94-
pandas-version: 0.25.3
95-
pyarrow-version: 0.15.1
96-
default-index-type: 'distributed-sequence'
9792
- python-version: 3.7
9893
spark-version: 2.4.7
9994
pandas-version: 0.25.3
100-
pyarrow-version: 0.14.1
101-
- python-version: 3.7
102-
spark-version: 2.4.7
103-
pandas-version: 1.0.5
10495
pyarrow-version: 0.15.1
10596
default-index-type: 'distributed-sequence'
10697
- python-version: 3.7
10798
spark-version: 3.0.2
108-
pandas-version: 0.25.3
99+
pandas-version: 1.0.5
109100
pyarrow-version: 1.0.1
101+
- python-version: 3.7
102+
spark-version: 3.1.1
103+
pandas-version: 1.1.5
104+
pyarrow-version: 2.0.0
105+
default-index-type: 'distributed-sequence'
110106
- python-version: 3.8
111107
spark-version: 3.0.2
108+
pandas-version: 1.1.5
109+
pyarrow-version: 2.0.0
110+
- python-version: 3.8
111+
spark-version: 3.1.1
112112
pandas-version: 1.2.3
113113
pyarrow-version: 3.0.0
114114
default-index-type: 'distributed-sequence'
@@ -151,6 +151,8 @@ jobs:
151151
fi
152152
conda install -c conda-forge --yes pandas==$PANDAS_VERSION pyarrow==$PYARROW_VERSION
153153
sed -i -e "/pandas/d" -e "/pyarrow/d" requirements-dev.txt
154+
# Disable mypy check for PySpark 3.1
155+
if [[ "SPARK_VERSION" > "3.1" ]]; then sed -i '/mypy/d' requirements-dev.txt; fi
154156
# sphinx-plotly-directive is not available on Conda.
155157
sed -i '/sphinx-plotly-directive/d' requirements-dev.txt
156158
conda install -c conda-forge --yes --file requirements-dev.txt

databricks/koalas/indexes/multi.py

Lines changed: 14 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -480,21 +480,19 @@ def _is_monotonic(self, order):
480480
return self._is_monotonic_decreasing().all()
481481

482482
def _is_monotonic_increasing(self):
483-
scol = self.spark.column
484483
window = Window.orderBy(NATURAL_ORDER_COLUMN_NAME).rowsBetween(-1, -1)
485-
prev = F.lag(scol, 1).over(window)
486484

487485
cond = F.lit(True)
488486
has_not_null = F.lit(True)
489-
for field in self.spark.data_type[::-1]:
490-
left = scol.getField(field.name)
491-
right = prev.getField(field.name)
492-
compare = MultiIndex._comparator_for_monotonic_increasing(field.dataType)
487+
for scol in self._internal.index_spark_columns[::-1]:
488+
data_type = self._internal.spark_type_for(scol)
489+
prev = F.lag(scol, 1).over(window)
490+
compare = MultiIndex._comparator_for_monotonic_increasing(data_type)
493491
# Since pandas 1.1.4, null value is not allowed at any levels of MultiIndex.
494492
# Therefore, we should check `has_not_null` over the all levels.
495-
has_not_null = has_not_null & left.isNotNull()
496-
cond = F.when(left.eqNullSafe(right), cond).otherwise(
497-
compare(left, right, spark.Column.__gt__)
493+
has_not_null = has_not_null & scol.isNotNull()
494+
cond = F.when(scol.eqNullSafe(prev), cond).otherwise(
495+
compare(scol, prev, spark.Column.__gt__)
498496
)
499497

500498
cond = has_not_null & (prev.isNull() | cond)
@@ -524,21 +522,19 @@ def _comparator_for_monotonic_decreasing(data_type):
524522
return compare_disallow_null
525523

526524
def _is_monotonic_decreasing(self):
527-
scol = self.spark.column
528525
window = Window.orderBy(NATURAL_ORDER_COLUMN_NAME).rowsBetween(-1, -1)
529-
prev = F.lag(scol, 1).over(window)
530526

531527
cond = F.lit(True)
532528
has_not_null = F.lit(True)
533-
for field in self.spark.data_type[::-1]:
534-
left = scol.getField(field.name)
535-
right = prev.getField(field.name)
536-
compare = MultiIndex._comparator_for_monotonic_decreasing(field.dataType)
529+
for scol in self._internal.index_spark_columns[::-1]:
530+
data_type = self._internal.spark_type_for(scol)
531+
prev = F.lag(scol, 1).over(window)
532+
compare = MultiIndex._comparator_for_monotonic_increasing(data_type)
537533
# Since pandas 1.1.4, null value is not allowed at any levels of MultiIndex.
538534
# Therefore, we should check `has_not_null` over the all levels.
539-
has_not_null = has_not_null & left.isNotNull()
540-
cond = F.when(left.eqNullSafe(right), cond).otherwise(
541-
compare(left, right, spark.Column.__lt__)
535+
has_not_null = has_not_null & scol.isNotNull()
536+
cond = F.when(scol.eqNullSafe(prev), cond).otherwise(
537+
compare(scol, prev, spark.Column.__lt__)
542538
)
543539

544540
cond = has_not_null & (prev.isNull() | cond)

databricks/koalas/internal.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -445,8 +445,8 @@ def __init__(
445445
| zoo| bar| 7| 8| 9|...
446446
+-----------------+-----------------+------+------+------+...
447447
448-
>>> internal.index_spark_columns
449-
[Column<b'__index_level_0__'>, Column<b'__index_level_1__'>, Column<b'(a, x)'>]
448+
>>> internal.index_spark_columns # doctest: +SKIP
449+
[Column<'__index_level_0__'>, Column<'__index_level_1__'>, Column<'(a, x)'>]
450450
451451
>>> internal.index_names
452452
[('row_index_a',), ('row_index_b',), ('a', 'x')]
@@ -457,8 +457,8 @@ def __init__(
457457
>>> internal.column_labels
458458
[('a', 'y'), ('b', 'z')]
459459
460-
>>> internal.data_spark_columns
461-
[Column<b'(a, y)'>, Column<b'(b, z)'>]
460+
>>> internal.data_spark_columns # doctest: +SKIP
461+
[Column<'(a, y)'>, Column<'(b, z)'>]
462462
463463
>>> internal.data_dtypes
464464
[dtype('int64'), dtype('int64')]

0 commit comments

Comments
 (0)