Skip to content

Commit 40d2312

Browse files
authored
Merge branch 'master' into asalikhov/improve_docs
2 parents b6d96d1 + 8d4bff6 commit 40d2312

File tree

7 files changed

+465
-83
lines changed

7 files changed

+465
-83
lines changed
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": 2,
6+
"metadata": {},
7+
"outputs": [
8+
{
9+
"output_type": "error",
10+
"ename": "Error",
11+
"evalue": "Jupyter cannot be started. Error attempting to locate jupyter: Data Science libraries jupyter and notebook are not installed in interpreter Python 3.7.7 64-bit ('jupyter': conda).",
12+
"traceback": [
13+
"Error: Jupyter cannot be started. Error attempting to locate jupyter: Data Science libraries jupyter and notebook are not installed in interpreter Python 3.7.7 64-bit ('jupyter': conda).",
14+
"at b.startServer (/Users/romain/.vscode/extensions/ms-python.python-2020.5.80290/out/client/extension.js:92:270430)",
15+
"at async b.createServer (/Users/romain/.vscode/extensions/ms-python.python-2020.5.80290/out/client/extension.js:92:269873)",
16+
"at async connect (/Users/romain/.vscode/extensions/ms-python.python-2020.5.80290/out/client/extension.js:92:397876)",
17+
"at async w.ensureConnectionAndNotebookImpl (/Users/romain/.vscode/extensions/ms-python.python-2020.5.80290/out/client/extension.js:16:556625)",
18+
"at async w.ensureConnectionAndNotebook (/Users/romain/.vscode/extensions/ms-python.python-2020.5.80290/out/client/extension.js:16:556303)",
19+
"at async w.clearResult (/Users/romain/.vscode/extensions/ms-python.python-2020.5.80290/out/client/extension.js:16:552346)",
20+
"at async w.reexecuteCell (/Users/romain/.vscode/extensions/ms-python.python-2020.5.80290/out/client/extension.js:16:540374)",
21+
"at async w.reexecuteCells (/Users/romain/.vscode/extensions/ms-python.python-2020.5.80290/out/client/extension.js:16:537541)"
22+
]
23+
}
24+
],
25+
"source": [
26+
"from pyspark.sql import SparkSession\n",
27+
"\n",
28+
"# Spark session & context\n",
29+
"spark = SparkSession.builder.master('local').getOrCreate()\n",
30+
"sc = spark.sparkContext\n",
31+
"\n",
32+
"# Sum of the first 100 whole numbers\n",
33+
"rdd = sc.parallelize(range(100 + 1))\n",
34+
"rdd.sum()\n",
35+
"# 5050"
36+
]
37+
}
38+
],
39+
"metadata": {
40+
"kernelspec": {
41+
"display_name": "Python 3",
42+
"language": "python",
43+
"name": "python3"
44+
},
45+
"language_info": {
46+
"codemirror_mode": {
47+
"name": "ipython",
48+
"version": 3
49+
},
50+
"file_extension": ".py",
51+
"mimetype": "text/x-python",
52+
"name": "python",
53+
"nbconvert_exporter": "python",
54+
"pygments_lexer": "ipython3",
55+
"version": "3.7.6"
56+
}
57+
},
58+
"nbformat": 4,
59+
"nbformat_minor": 4
60+
}
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": null,
6+
"metadata": {},
7+
"outputs": [],
8+
"source": [
9+
"library(SparkR)\n",
10+
"\n",
11+
"# Spark session & context\n",
12+
"sc <- sparkR.session(\"local\")\n",
13+
"\n",
14+
"# Sum of the first 100 whole numbers\n",
15+
"sdf <- createDataFrame(list(1:100))\n",
16+
"dapplyCollect(sdf,\n",
17+
" function(x) \n",
18+
" { x <- sum(x)}\n",
19+
" )\n",
20+
"# 5050"
21+
]
22+
}
23+
],
24+
"metadata": {
25+
"kernelspec": {
26+
"display_name": "R",
27+
"language": "R",
28+
"name": "ir"
29+
},
30+
"language_info": {
31+
"codemirror_mode": "r",
32+
"file_extension": ".r",
33+
"mimetype": "text/x-r-source",
34+
"name": "R",
35+
"pygments_lexer": "r",
36+
"version": "3.6.3"
37+
}
38+
},
39+
"nbformat": 4,
40+
"nbformat_minor": 4
41+
}
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": null,
6+
"metadata": {},
7+
"outputs": [],
8+
"source": [
9+
"library(sparklyr)\n",
10+
"\n",
11+
"# get the default config\n",
12+
"conf <- spark_config()\n",
13+
"# Set the catalog implementation in-memory\n",
14+
"conf$spark.sql.catalogImplementation <- \"in-memory\"\n",
15+
"\n",
16+
"# Spark session & context\n",
17+
"sc <- spark_connect(master = \"local\", config = conf)\n",
18+
"\n",
19+
"# Sum of the first 100 whole numbers\n",
20+
"sdf_len(sc, 100, repartition = 1) %>% \n",
21+
" spark_apply(function(e) sum(e))\n",
22+
"# 5050"
23+
]
24+
}
25+
],
26+
"metadata": {
27+
"kernelspec": {
28+
"display_name": "R",
29+
"language": "R",
30+
"name": "ir"
31+
},
32+
"language_info": {
33+
"codemirror_mode": "r",
34+
"file_extension": ".r",
35+
"mimetype": "text/x-r-source",
36+
"name": "R",
37+
"pygments_lexer": "r",
38+
"version": "3.6.3"
39+
}
40+
},
41+
"nbformat": 4,
42+
"nbformat_minor": 4
43+
}
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": 6,
6+
"metadata": {},
7+
"outputs": [],
8+
"source": [
9+
"%%init_spark\n",
10+
"# Spark session & context\n",
11+
"launcher.master = \"local\"\n",
12+
"launcher.conf.spark.executor.cores = 1"
13+
]
14+
},
15+
{
16+
"cell_type": "code",
17+
"execution_count": 7,
18+
"metadata": {},
19+
"outputs": [
20+
{
21+
"data": {
22+
"text/plain": [
23+
"rdd: org.apache.spark.rdd.RDD[Int] = ParallelCollectionRDD[8] at parallelize at <console>:28\n",
24+
"res4: Double = 5050.0\n"
25+
]
26+
},
27+
"execution_count": 7,
28+
"metadata": {},
29+
"output_type": "execute_result"
30+
}
31+
],
32+
"source": [
33+
"// Sum of the first 100 whole numbers\n",
34+
"val rdd = sc.parallelize(0 to 100)\n",
35+
"rdd.sum()\n",
36+
"// 5050"
37+
]
38+
}
39+
],
40+
"metadata": {
41+
"kernelspec": {
42+
"display_name": "spylon-kernel",
43+
"language": "scala",
44+
"name": "spylon-kernel"
45+
},
46+
"language_info": {
47+
"codemirror_mode": "text/x-scala",
48+
"file_extension": ".scala",
49+
"help_links": [
50+
{
51+
"text": "MetaKernel Magics",
52+
"url": "https://metakernel.readthedocs.io/en/latest/source/README.html"
53+
}
54+
],
55+
"mimetype": "text/x-scala",
56+
"name": "scala",
57+
"pygments_lexer": "scala",
58+
"version": "0.4.1"
59+
}
60+
},
61+
"nbformat": 4,
62+
"nbformat_minor": 4
63+
}
Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": 1,
6+
"metadata": {},
7+
"outputs": [
8+
{
9+
"data": {
10+
"text/plain": [
11+
"Waiting for a Spark session to start..."
12+
]
13+
},
14+
"metadata": {},
15+
"output_type": "display_data"
16+
},
17+
{
18+
"name": "stdout",
19+
"output_type": "stream",
20+
"text": [
21+
"spark://master:7077\n"
22+
]
23+
}
24+
],
25+
"source": [
26+
"// should print the value of --master in the kernel spec\n",
27+
"println(sc.master)"
28+
]
29+
},
30+
{
31+
"cell_type": "code",
32+
"execution_count": 2,
33+
"metadata": {},
34+
"outputs": [
35+
{
36+
"data": {
37+
"text/plain": [
38+
"Waiting for a Spark session to start..."
39+
]
40+
},
41+
"metadata": {},
42+
"output_type": "display_data"
43+
},
44+
{
45+
"data": {
46+
"text/plain": [
47+
"rdd = ParallelCollectionRDD[0] at parallelize at <console>:28\n"
48+
]
49+
},
50+
"metadata": {},
51+
"output_type": "display_data"
52+
},
53+
{
54+
"data": {
55+
"text/plain": [
56+
"5050.0"
57+
]
58+
},
59+
"execution_count": 2,
60+
"metadata": {},
61+
"output_type": "execute_result"
62+
}
63+
],
64+
"source": [
65+
"// Sum of the first 100 whole numbers\n",
66+
"val rdd = sc.parallelize(0 to 100)\n",
67+
"rdd.sum()\n",
68+
"// 5050"
69+
]
70+
}
71+
],
72+
"metadata": {
73+
"kernelspec": {
74+
"display_name": "Apache Toree - Scala",
75+
"language": "scala",
76+
"name": "apache_toree_scala"
77+
},
78+
"language_info": {
79+
"codemirror_mode": "text/x-scala",
80+
"file_extension": ".scala",
81+
"mimetype": "text/x-scala",
82+
"name": "scala",
83+
"pygments_lexer": "scala",
84+
"version": "2.11.12"
85+
}
86+
},
87+
"nbformat": 4,
88+
"nbformat_minor": 4
89+
}
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
# Copyright (c) Jupyter Development Team.
2+
# Distributed under the terms of the Modified BSD License.
3+
4+
import logging
5+
6+
import pytest
7+
import os
8+
9+
LOGGER = logging.getLogger(__name__)
10+
11+
12+
@pytest.mark.parametrize(
13+
"test_file",
14+
# TODO: add local_sparklyr
15+
["local_pyspark", "local_spylon", "local_toree", "local_sparkR"],
16+
)
17+
def test_nbconvert(container, test_file):
18+
"""Check if Spark notebooks can be executed"""
19+
host_data_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "data")
20+
cont_data_dir = "/home/jovyan/data"
21+
output_dir = "/tmp"
22+
timeout_ms = 600
23+
LOGGER.info(f"Test that {test_file} notebook can be executed ...")
24+
command = f"jupyter nbconvert --to markdown --ExecutePreprocessor.timeout={timeout_ms} --output-dir {output_dir} --execute {cont_data_dir}/{test_file}.ipynb"
25+
c = container.run(
26+
volumes={host_data_dir: {"bind": cont_data_dir, "mode": "ro"}},
27+
tty=True,
28+
command=["start.sh", "bash", "-c", command],
29+
)
30+
rv = c.wait(timeout=timeout_ms / 10 + 10)
31+
assert rv == 0 or rv["StatusCode"] == 0, f"Command {command} failed"
32+
logs = c.logs(stdout=True).decode("utf-8")
33+
LOGGER.debug(logs)
34+
expected_file = f"{output_dir}/{test_file}.md"
35+
assert expected_file in logs, f"Expected file {expected_file} not generated"

0 commit comments

Comments
 (0)