Skip to content

Commit e84bfdf

Browse files
committed
Add logger to setup_julia and setup_spark
1 parent c294e9e commit e84bfdf

File tree

2 files changed

+25
-6
lines changed

2 files changed

+25
-6
lines changed

images/minimal-notebook/setup-scripts/setup_julia.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
# - Run as the root user
77
# - The JULIA_PKGDIR environment variable is set
88

9+
import logging
910
import os
1011
import platform
1112
import shutil
@@ -14,6 +15,8 @@
1415

1516
import requests
1617

18+
LOGGER = logging.getLogger(__name__)
19+
1720

1821
def unify_aarch64(platform: str) -> str:
1922
"""
@@ -31,7 +34,7 @@ def get_latest_julia_url() -> tuple[str, str]:
3134
Get the last stable version of Julia
3235
Based on: https://github.com/JuliaLang/www.julialang.org/issues/878#issuecomment-749234813
3336
"""
34-
37+
LOGGER.info("Downloading Julia versions information")
3538
versions = requests.get(
3639
"https://julialang-s3.julialang.org/bin/versions.json"
3740
).json()
@@ -43,6 +46,7 @@ def get_latest_julia_url() -> tuple[str, str]:
4346
latest_version_files = stable_versions[latest_stable_version]["files"]
4447
triplet = unify_aarch64(platform.machine()) + "-linux-gnu"
4548
file_info = [vf for vf in latest_version_files if vf["triplet"] == triplet][0]
49+
LOGGER.info(f"Latest version: {file_info['version']} url: {file_info['url']}")
4650
return file_info["url"], file_info["version"]
4751

4852

@@ -51,6 +55,7 @@ def download_julia(julia_url: str) -> None:
5155
Downloads and unpacks julia
5256
The resulting julia directory is "/opt/julia-VERSION/"
5357
"""
58+
LOGGER.info("Downloading and unpacking Julia")
5459
tmp_file = Path("/tmp/julia.tar.gz")
5560
subprocess.check_call(
5661
["curl", "--progress-bar", "--location", "--output", tmp_file, julia_url]
@@ -59,12 +64,13 @@ def download_julia(julia_url: str) -> None:
5964
tmp_file.unlink()
6065

6166

62-
def prepare_julia(julia_version: str) -> None:
67+
def configure_julia(julia_version: str) -> None:
6368
"""
6469
Creates /usr/local/bin/julia symlink
6570
Make Julia aware of conda libraries
6671
Creates a directory for Julia user libraries
6772
"""
73+
LOGGER.info("Configuring Julia")
6874
# Link Julia installed version to /usr/local/bin, so julia launches it
6975
subprocess.check_call(
7076
["ln", "-fs", f"/opt/julia-{julia_version}/bin/julia", "/usr/local/bin/julia"]
@@ -84,6 +90,8 @@ def prepare_julia(julia_version: str) -> None:
8490

8591

8692
if __name__ == "__main__":
93+
logging.basicConfig(level=logging.INFO)
94+
8795
julia_url, julia_version = get_latest_julia_url()
8896
download_julia(julia_url=julia_url)
89-
prepare_julia(julia_version=julia_version)
97+
configure_julia(julia_version=julia_version)

images/pyspark-notebook/setup_spark.py

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,16 @@
77
# - Required env variables: SPARK_HOME, HADOOP_VERSION, SPARK_DOWNLOAD_URL
88
# - Optional env variables: SPARK_VERSION, SCALA_VERSION
99

10+
import logging
1011
import os
1112
import subprocess
1213
from pathlib import Path
1314

1415
import requests
1516
from bs4 import BeautifulSoup
1617

18+
LOGGER = logging.getLogger(__name__)
19+
1720

1821
def get_all_refs(url: str) -> list[str]:
1922
"""
@@ -31,16 +34,19 @@ def get_spark_version() -> str:
3134
"""
3235
if (version := os.environ["SPARK_VERSION"]) != "":
3336
return version
37+
LOGGER.info("Downloading Spark versions information")
3438
all_refs = get_all_refs("https://archive.apache.org/dist/spark/")
3539
stable_versions = [
3640
ref.removeprefix("spark-").removesuffix("/")
3741
for ref in all_refs
3842
if ref.startswith("spark-") and "incubating" not in ref and "preview" not in ref
3943
]
4044
# Compare versions semantically
41-
return max(
45+
latest_version = max(
4246
stable_versions, key=lambda ver: [int(sub_ver) for sub_ver in ver.split(".")]
4347
)
48+
LOGGER.info(f"Latest version: {latest_version}")
49+
return latest_version
4450

4551

4652
def download_spark(
@@ -53,9 +59,11 @@ def download_spark(
5359
Downloads and unpacks spark
5460
The resulting spark directory name is returned
5561
"""
62+
LOGGER.info("Downloading and unpacking Spark")
5663
spark_dir_name = f"spark-{spark_version}-bin-hadoop{hadoop_version}"
5764
if scala_version:
5865
spark_dir_name += f"-scala{scala_version}"
66+
LOGGER.info(f"Spark directory name: {spark_dir_name}")
5967
spark_url = spark_download_url / f"spark-{spark_version}" / f"{spark_dir_name}.tgz"
6068

6169
tmp_file = Path("/tmp/spark.tar.gz")
@@ -80,11 +88,12 @@ def download_spark(
8088
return spark_dir_name
8189

8290

83-
def prepare_spark(spark_dir_name: str, spark_home: Path) -> None:
91+
def configure_spark(spark_dir_name: str, spark_home: Path) -> None:
8492
"""
8593
Creates a ${SPARK_HOME} symlink to a versioned spark directory
8694
Creates a 10spark-config.sh symlink to source PYTHONPATH automatically
8795
"""
96+
LOGGER.info("Configuring Spark")
8897
subprocess.check_call(["ln", "-s", f"/usr/local/{spark_dir_name}", spark_home])
8998

9099
# Add a link in the before_notebook hook in order to source PYTHONPATH automatically
@@ -95,13 +104,15 @@ def prepare_spark(spark_dir_name: str, spark_home: Path) -> None:
95104

96105

97106
if __name__ == "__main__":
107+
logging.basicConfig(level=logging.INFO)
108+
98109
spark_version = get_spark_version()
99110
spark_dir_name = download_spark(
100111
spark_version=spark_version,
101112
hadoop_version=os.environ["HADOOP_VERSION"],
102113
scala_version=os.environ["SCALA_VERSION"],
103114
spark_download_url=Path(os.environ["SPARK_DOWNLOAD_URL"]),
104115
)
105-
prepare_spark(
116+
configure_spark(
106117
spark_dir_name=spark_dir_name, spark_home=Path(os.environ["SPARK_HOME"])
107118
)

0 commit comments

Comments
 (0)