Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions .devcontainer/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
FROM mcr.microsoft.com/devcontainers/rust:1

# Install system dependencies
RUN apt-get update && export DEBIAN_FRONTEND=noninteractive \
&& apt-get -y install --no-install-recommends \
sqlite3 \
pkg-config \
libclang-dev \
postgresql-client \
default-mysql-client \
&& apt-get clean -y \
&& rm -rf /var/lib/apt/lists/*

# Install Python and Poetry
ENV POETRY_HOME=/opt/poetry
ENV POETRY_VERSION=2.1.3
ENV PATH="/opt/poetry/bin:$PATH"
RUN curl -sSL https://install.python-poetry.org | python3 - \
&& poetry config virtualenvs.create false

# Install Rust components
RUN rustup component add rustfmt clippy \
&& cargo install cargo-watch \
&& cargo install just

# Set environment variables
ENV PATH="/home/vscode/.local/bin:${PATH}"
ENV PYTHONPATH="/workspaces/connector-x:${PYTHONPATH}"
21 changes: 21 additions & 0 deletions .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
{
"name": "ConnectorX Development",
"dockerComposeFile": [
"docker-compose.yml"
],
"service": "connectorx",
"workspaceFolder": "/workspaces/${localWorkspaceFolderBasename}",
"customizations": {
"vscode": {
"extensions": [
"rust-lang.rust-analyzer"
],
"settings": {
"rust-analyzer.checkOnSave.command": "clippy"
}
}
},
"features": {
"ghcr.io/devcontainers/features/rust:1": "latest"
}
}
48 changes: 48 additions & 0 deletions .devcontainer/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
services:
connectorx:
build:
context: ..
dockerfile: .devcontainer/Dockerfile
volumes:
- ..:/workspaces/connectorx:cached
command: sleep infinity
depends_on:
- postgres
- mysql
networks:
- connectorx-network

postgres:
image: pgvector/pgvector:pg17
environment:
POSTGRES_USER: postgres
POSTGRES_PASSWORD: postgres
POSTGRES_DB: connectorx
ports:
- "5433:5432"
volumes:
- postgres_data:/var/lib/postgresql/data
networks:
- connectorx-network

mysql:
image: ghcr.io/wangxiaoying/mysql:latest
environment:
MYSQL_DATABASE: mysql
MYSQL_ROOT_PASSWORD: mysql
LANG: C.UTF-8
ports:
- "3306:3306"
volumes:
- mysql_data:/var/lib/mysql
networks:
- connectorx-network

networks:
connectorx-network:
driver: bridge

volumes:
postgres_data:
mysql_data:

4 changes: 2 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ jobs:
container: ubuntu:24.04
services:
postgres:
image: postgres
image: pgvector/pgvector:pg17
env:
POSTGRES_PASSWORD: postgres
# Set health checks to wait until postgres has started
Expand Down Expand Up @@ -117,7 +117,7 @@ jobs:
# Label used to access the service container
postgres:
# Docker Hub image
image: postgres
image: pgvector/pgvector:pg17
env:
POSTGRES_PASSWORD: postgres
# Set health checks to wait until postgres has started
Expand Down
32 changes: 32 additions & 0 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,38 @@ This doc describes how you can get started at developing ConnectorX.
Please check out [here](https://sfu-db.github.io/connector-x/install.html#build-from-source-code)


### Run In Vscode Dev-Container

1. Install required tools:
- [Docker](https://docs.docker.com/get-docker/)
- [VSCode](https://code.visualstudio.com/)
- [VSCode Remote - Containers extension](https://marketplace.visualstudio.com/items?itemName=ms-vscode-remote.remote-containers)

2. Clone the repository and open it in VSCode:
```bash
git clone https://github.com/sfu-db/connector-x.git
code connector-x
```

3. When prompted, click "Reopen in Container" or use the command palette (F1) and select "Remote-Containers: Reopen in Container"

4. The dev container includes:
- Rust development environment with rust-analyzer
- PostgreSQL (pgvector) running on port 5433
- MySQL running on port 3306
- All necessary build tools and dependencies

5. The container will automatically:
- Mount your local repository into the container
- Install Rust toolchain and dependencies
- Configure rust-analyzer with clippy for code analysis
- Set up the development workspace

6. You can now start developing with:
- Full Rust development support
- Integrated database services
- All development tools pre-configured

### Run tests

* Set up environment variables by creating a `.env` file under project directory. Here is an example:
Expand Down
12 changes: 12 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

20 changes: 20 additions & 0 deletions connectorx-python/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

83 changes: 83 additions & 0 deletions connectorx-python/connectorx/tests/test_postgres.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
import pytest
from pandas.testing import assert_frame_equal
import datetime
import numpy as np
import ast

from .. import read_sql

Expand Down Expand Up @@ -1058,3 +1060,84 @@ def test_postgres_partitioned_pre_execution_queries(postgres_url: str) -> None:
},
).sort_values(by=['name']).reset_index(drop=True)
assert_frame_equal(df, expected, check_names=True)

def test_postgres_inet_type(postgres_url: str) -> None:
query = "SELECT test_inet FROM test_types"
df = read_sql(postgres_url, query)
expected = pd.DataFrame(
data={
"test_inet": pd.Series(
["192.168.1.1", "10.0.0.0/24", "2001:db8::1", "2001:db8::/32", None],
dtype="object"
),
},
)
assert_frame_equal(df, expected, check_names=True)

def test_postgres_vector_types(postgres_url: str) -> None:
query = "SELECT dense_vector, half_vector, binary_vector, sparse_vector FROM vector_types"
df = read_sql(postgres_url, query)

# Parse string vectors into numpy arrays
def parse_vector(vec_str):
if vec_str is None:
return None
# Handle both string and list inputs
if isinstance(vec_str, str):
# Remove brackets and split string
vec_str = vec_str.strip('[]')
return np.array([float(x) for x in vec_str.split(',')])
elif isinstance(vec_str, list):
return np.array([float(x) for x in vec_str])
else:
raise TypeError(f"Unexpected type for vector: {type(vec_str)}")

# Convert dense_vector and half_vector to numpy arrays
df['dense_vector'] = df['dense_vector'].apply(parse_vector)
df['half_vector'] = df['half_vector'].apply(parse_vector)

# Verify dense_vector
expected_dense = np.array([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0])
assert df['dense_vector'].iloc[0] is not None
assert np.allclose(df['dense_vector'].iloc[0], expected_dense, rtol=1e-5)
assert df['dense_vector'].iloc[1] is None

# Verify half_vector
expected_half = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0])
assert df['half_vector'].iloc[0] is not None
assert np.allclose(df['half_vector'].iloc[0], expected_half, rtol=1e-5)
assert df['half_vector'].iloc[1] is None

# Verify binary_vector and sparse_vector
# Convert binary_vector to string representation for comparison
def binary_to_string(binary):
if binary is None:
return None
# Convert binary to string of 1s and 0s
return ''.join(format(b, '08b') for b in binary)[:10] # Take first 10 bits

df['binary_vector'] = df['binary_vector'].apply(binary_to_string)

# Convert sparse vector array to string format
def sparse_to_string(sparse_vec):
if sparse_vec is None:
return None
# Convert array to sparse format string with integer values
non_zero = {i+1: int(val) for i, val in enumerate(sparse_vec) if val != 0}
return f"{non_zero}/{len(sparse_vec)}"

df['sparse_vector'] = df['sparse_vector'].apply(sparse_to_string)

expected = pd.DataFrame(
data={
"binary_vector": pd.Series(
["1010101010", None],
dtype="object"
),
"sparse_vector": pd.Series(
["{1: 1, 3: 2, 5: 3}/5", None],
dtype="object"
),
},
)
assert_frame_equal(df[['binary_vector', 'sparse_vector']], expected[['binary_vector', 'sparse_vector']], check_names=True)
Loading
Loading