Skip to content

Added Jupyter notebook for release 1.1.0 #63

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
217 changes: 217 additions & 0 deletions examples/release-1.1-examples.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,217 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "e57c4da7",
"metadata": {},
"outputs": [],
"source": [
"# Run the following to install the MarkLogic Python client.\n",
"# %pip install marklogic_python_client\n",
"\n",
"# Create an instance of the MarkLogic Python client, pointing at the out-of-the-box Documents database.\n",
"\n",
"from marklogic import Client\n",
"client = Client(\"http://localhost:8000\", digest=(\"python-user\", \"pyth0n\"))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3872a3ae",
"metadata": {},
"outputs": [],
"source": [
"# Insert a MarkLogic TDE view to project rows from documents in the \"employee\" collection.\n",
"\n",
"from marklogic.documents import Document\n",
"tde_view = {\n",
" \"template\": {\n",
" \"context\": \"/\",\n",
" \"collections\": [\"employee\"],\n",
" \"rows\": [{\n",
" \"schemaName\": \"example\",\n",
" \"viewName\": \"employee\",\n",
" \"columns\": [\n",
" {\"name\": \"lastName\", \"scalarType\": \"string\", \"val\": \"Surname\"},\n",
" {\"name\": \"firstName\", \"scalarType\": \"string\", \"val\": \"GivenName\"},\n",
" {\"name\": \"state\", \"scalarType\": \"string\", \"val\": \"State\"},\n",
" {\"name\": \"department\", \"scalarType\": \"string\", \"val\": \"Department\"},\n",
" {\"name\": \"salary\", \"scalarType\": \"int\", \"val\": \"BaseSalary\"}\n",
" ]\n",
" }]\n",
" }\n",
"}\n",
"\n",
"client.documents.write(\n",
" Document(\n",
" \"/tde/employees.json\", tde_view, \n",
" permissions={\"rest-reader\": [\"read\", \"update\"]}, \n",
" collections=[\"http://marklogic.com/xdmp/tde\"]\n",
" ),\n",
" params={\"database\": \"Schemas\"}\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c72a2506",
"metadata": {},
"outputs": [],
"source": [
"# Load 500 JSON documents into the \"employee\" collection.\n",
"\n",
"from marklogic.documents import Document, DefaultMetadata\n",
"import requests\n",
"import json\n",
"r = requests.get('https://raw.githubusercontent.com/marklogic/marklogic-spark-connector/master/src/test/resources/500-employees.json')\n",
"\n",
"docs = [\n",
" DefaultMetadata(permissions={\"rest-reader\": [\"read\", \"update\"]}, collections=[\"employee\"])\n",
"]\n",
"\n",
"for employee in json.loads(r.text):\n",
" docs.append(Document(employee['uri'], json.dumps(employee['value'])))\n",
"\n",
"client.documents.write(docs)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ef22f938",
"metadata": {},
"outputs": [],
"source": [
"# Can use MarkLogic's Optic query language with the view.\n",
"\n",
"client.rows.query(\"op.fromView('example', 'employee', '').limit(3)\")[\"rows\"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ae929676",
"metadata": {},
"outputs": [],
"source": [
"# Can use SQL queries with the view.\n",
"\n",
"client.rows.query(sql=\"select * from example.employee order by lastName limit 3\")[\"rows\"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1905651c",
"metadata": {},
"outputs": [],
"source": [
"# Can use GraphQL queries with the view.\n",
"\n",
"client.rows.query(graphql=\"query myQuery { example_employee { lastName firstName } }\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3fb93d17",
"metadata": {},
"outputs": [],
"source": [
"# Can return data as CSV for integration with pandas.\n",
"\n",
"import io\n",
"import pandas\n",
"\n",
"csv_data = client.rows.query(\"op.fromView('example', 'employee', '')\", format=\"csv\")\n",
"df = pandas.read_csv(io.StringIO(csv_data))\n",
"df\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "711caba0",
"metadata": {},
"outputs": [],
"source": [
"# Install matplotlib to visualize data.\n",
"\n",
"%matplotlib inline"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "43532cff",
"metadata": {},
"outputs": [],
"source": [
"# Simple bar chart showing the count of each department.\n",
"\n",
"df['department'].value_counts().plot(kind='bar')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "473000f1",
"metadata": {},
"outputs": [],
"source": [
"# Can use MarkLogic Spark connector with Python.\n",
"# First create a Spark session that has access to the MarkLogic Spark connector jar file.\n",
"\n",
"import os\n",
"os.environ['PYSPARK_SUBMIT_ARGS'] = '--jars \"/Users/rudin/marklogic-spark-connector-2.2.0.jar\" pyspark-shell'\n",
"\n",
"%pip install pyspark\n",
"from pyspark.sql import SparkSession\n",
"spark = SparkSession.builder.master(\"local[*]\").appName('My Notebook').getOrCreate()\n",
"spark.sparkContext.setLogLevel(\"WARN\")\n",
"spark"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "dfdd727d",
"metadata": {},
"outputs": [],
"source": [
"# Create a Spark DataFrame via a MarkLogic Optic query.\n",
"\n",
"df = spark.read.format(\"marklogic\") \\\n",
" .option(\"spark.marklogic.client.uri\", \"python-user:pyth0n@localhost:8000\") \\\n",
" .option(\"spark.marklogic.read.opticQuery\", \"op.fromView('example', 'employee', '')\") \\\n",
" .load()\n",
"\n",
"df.show()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.5"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Loading