sfu-db · wangxiaoying · Jun 10, 2025 · Jun 4, 2025 · Jun 4, 2025 · Jun 4, 2025
diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile
@@ -0,0 +1,28 @@
+FROM mcr.microsoft.com/devcontainers/rust:1
+
+# Install system dependencies
+RUN apt-get update && export DEBIAN_FRONTEND=noninteractive \
+    && apt-get -y install --no-install-recommends \
+    sqlite3 \
+    pkg-config \
+    libclang-dev \
+    postgresql-client \
+    default-mysql-client \
+    && apt-get clean -y \
+    && rm -rf /var/lib/apt/lists/*
+
+# Install Python and Poetry
+ENV POETRY_HOME=/opt/poetry
+ENV POETRY_VERSION=2.1.3
+ENV PATH="/opt/poetry/bin:$PATH"
+RUN curl -sSL https://install.python-poetry.org | python3 - \
+    && poetry config virtualenvs.create false
+
+# Install Rust components
+RUN rustup component add rustfmt clippy \
+    && cargo install cargo-watch \
+    && cargo install just
+
+# Set environment variables
+ENV PATH="/home/vscode/.local/bin:${PATH}"
+ENV PYTHONPATH="/workspaces/connector-x:${PYTHONPATH}"
diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
@@ -0,0 +1,21 @@
+{
+    "name": "ConnectorX Development",
+    "dockerComposeFile": [
+        "docker-compose.yml"
+    ],
+    "service": "connectorx",
+    "workspaceFolder": "/workspaces/${localWorkspaceFolderBasename}",
+    "customizations": {
+        "vscode": {
+            "extensions": [
+                "rust-lang.rust-analyzer"
+            ],
+            "settings": {
+                "rust-analyzer.checkOnSave.command": "clippy"
+            }
+        }
+    },
+    "features": {
+        "ghcr.io/devcontainers/features/rust:1": "latest"
+    }
+}
diff --git a/.devcontainer/docker-compose.yml b/.devcontainer/docker-compose.yml
@@ -0,0 +1,48 @@
+services:
+  connectorx:
+    build:
+      context: ..
+      dockerfile: .devcontainer/Dockerfile
+    volumes:
+      - ..:/workspaces/connectorx:cached
+    command: sleep infinity
+    depends_on:
+      - postgres
+      - mysql
+    networks:
+      - connectorx-network
+
+  postgres:
+    image: pgvector/pgvector:pg17
+    environment:
+      POSTGRES_USER: postgres
+      POSTGRES_PASSWORD: postgres
+      POSTGRES_DB: connectorx
+    ports:
+      - "5433:5432"
+    volumes:
+      - postgres_data:/var/lib/postgresql/data
+    networks:
+      - connectorx-network
+
+  mysql:
+    image: ghcr.io/wangxiaoying/mysql:latest
+    environment:
+      MYSQL_DATABASE: mysql
+      MYSQL_ROOT_PASSWORD: mysql
+      LANG: C.UTF-8
+    ports:
+      - "3306:3306"
+    volumes:
+      - mysql_data:/var/lib/mysql
+    networks:
+      - connectorx-network
+
+networks:
+  connectorx-network:
+    driver: bridge
+
+volumes:
+  postgres_data:
+  mysql_data:
+
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -15,7 +15,7 @@ jobs:
     container: ubuntu:24.04
     services:
       postgres:
-        image: postgres
+        image: pgvector/pgvector:pg17
         env:
           POSTGRES_PASSWORD: postgres
         # Set health checks to wait until postgres has started
@@ -117,7 +117,7 @@ jobs:
       # Label used to access the service container
       postgres:
         # Docker Hub image
-        image: postgres
+        image: pgvector/pgvector:pg17
         env:
           POSTGRES_PASSWORD: postgres
         # Set health checks to wait until postgres has started

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -9,6 +9,38 @@ This doc describes how you can get started at developing ConnectorX.
 Please check out [here](https://sfu-db.github.io/connector-x/install.html#build-from-source-code)
 
 
+### Run In Vscode Dev-Container
+
+1. Install required tools:
+   - [Docker](https://docs.docker.com/get-docker/)
+   - [VSCode](https://code.visualstudio.com/)
+   - [VSCode Remote - Containers extension](https://marketplace.visualstudio.com/items?itemName=ms-vscode-remote.remote-containers)
+
+2. Clone the repository and open it in VSCode:
+   ```bash
+   git clone https://github.com/sfu-db/connector-x.git
+   code connector-x
+   ```
+
+3. When prompted, click "Reopen in Container" or use the command palette (F1) and select "Remote-Containers: Reopen in Container"
+
+4. The dev container includes:
+   - Rust development environment with rust-analyzer
+   - PostgreSQL (pgvector) running on port 5433
+   - MySQL running on port 3306
+   - All necessary build tools and dependencies
+
+5. The container will automatically:
+   - Mount your local repository into the container
+   - Install Rust toolchain and dependencies
+   - Configure rust-analyzer with clippy for code analysis
+   - Set up the development workspace
+
+6. You can now start developing with:
+   - Full Rust development support
+   - Integrated database services
+   - All development tools pre-configured
+
 ### Run tests
 
 * Set up environment variables by creating a `.env` file under project directory. Here is an example:

diff --git a/Cargo.lock b/Cargo.lock
diff --git a/connectorx-python/Cargo.lock b/connectorx-python/Cargo.lock
diff --git a/connectorx-python/connectorx/tests/test_postgres.py b/connectorx-python/connectorx/tests/test_postgres.py
@@ -4,6 +4,8 @@
 import pytest
 from pandas.testing import assert_frame_equal
 import datetime
+import numpy as np
+import ast
 
 from .. import read_sql
 
@@ -1058,3 +1060,84 @@ def test_postgres_partitioned_pre_execution_queries(postgres_url: str) -> None:
         },
     ).sort_values(by=['name']).reset_index(drop=True)
     assert_frame_equal(df, expected, check_names=True)
+
+def test_postgres_inet_type(postgres_url: str) -> None:
+    query = "SELECT test_inet FROM test_types"
+    df = read_sql(postgres_url, query)
+    expected = pd.DataFrame(
+        data={
+            "test_inet": pd.Series(
+                ["192.168.1.1", "10.0.0.0/24", "2001:db8::1", "2001:db8::/32", None],
+                dtype="object"
+            ),
+        },
+    )
+    assert_frame_equal(df, expected, check_names=True)
+
+def test_postgres_vector_types(postgres_url: str) -> None:
+    query = "SELECT dense_vector, half_vector, binary_vector, sparse_vector FROM vector_types"
+    df = read_sql(postgres_url, query)
+
+    # Parse string vectors into numpy arrays
+    def parse_vector(vec_str):
+        if vec_str is None:
+            return None
+        # Handle both string and list inputs
+        if isinstance(vec_str, str):
+            # Remove brackets and split string
+            vec_str = vec_str.strip('[]')
+            return np.array([float(x) for x in vec_str.split(',')])
+        elif isinstance(vec_str, list):
+            return np.array([float(x) for x in vec_str])
+        else:
+            raise TypeError(f"Unexpected type for vector: {type(vec_str)}")
+
+    # Convert dense_vector and half_vector to numpy arrays
+    df['dense_vector'] = df['dense_vector'].apply(parse_vector)
+    df['half_vector'] = df['half_vector'].apply(parse_vector)
+
+    # Verify dense_vector
+    expected_dense = np.array([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0])
+    assert df['dense_vector'].iloc[0] is not None
+    assert np.allclose(df['dense_vector'].iloc[0], expected_dense, rtol=1e-5)
+    assert df['dense_vector'].iloc[1] is None
+
+    # Verify half_vector
+    expected_half = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0])
+    assert df['half_vector'].iloc[0] is not None
+    assert np.allclose(df['half_vector'].iloc[0], expected_half, rtol=1e-5)
+    assert df['half_vector'].iloc[1] is None
+
+    # Verify binary_vector and sparse_vector
+    # Convert binary_vector to string representation for comparison
+    def binary_to_string(binary):
+        if binary is None:
+            return None
+        # Convert binary to string of 1s and 0s
+        return ''.join(format(b, '08b') for b in binary)[:10]  # Take first 10 bits
+
+    df['binary_vector'] = df['binary_vector'].apply(binary_to_string)
+
+    # Convert sparse vector array to string format
+    def sparse_to_string(sparse_vec):
+        if sparse_vec is None:
+            return None
+        # Convert array to sparse format string with integer values
+        non_zero = {i+1: int(val) for i, val in enumerate(sparse_vec) if val != 0}
+        return f"{non_zero}/{len(sparse_vec)}"
+
+    df['sparse_vector'] = df['sparse_vector'].apply(sparse_to_string)
+
+    expected = pd.DataFrame(
+        data={
+            "binary_vector": pd.Series(
+                ["1010101010", None],
+                dtype="object"
+            ),
+            "sparse_vector": pd.Series(
+                ["{1: 1, 3: 2, 5: 3}/5", None],
+                dtype="object"
+            ),
+        },
+    )
+    assert_frame_equal(df[['binary_vector', 'sparse_vector']], expected[['binary_vector', 'sparse_vector']], check_names=True)