Skip to content

Commit 286e0aa

Browse files
authored
Merge pull request #7 from msoedov/ralph/improvements
improvements
2 parents 31e375d + dc17d0e commit 286e0aa

File tree

15 files changed

+479
-873
lines changed

15 files changed

+479
-873
lines changed

.flake8

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
[flake8]
22
max-line-length = 160
3+
# E203 is whitespace before ':' - conflicts with black's slice formatting
4+
extend-ignore = E203
35
per-file-ignores =
46
# Ignore docstring lints for tests
57
*: D100, D101, D102, D103, D104, D107, D105, D401, D400, D205, D202, D209

.pre-commit-config.yaml

Lines changed: 12 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,59 +1,57 @@
1-
21
default_language_version:
3-
python: python3
2+
python: python3.11
43

54
repos:
65

76
- repo: https://github.com/asottile/pyupgrade
8-
rev: v2.31.1
7+
rev: v3.21.2
98
hooks:
109
- id: pyupgrade
11-
args: [--py39-plus]
10+
args: [--py311-plus]
1211

1312
- repo: https://github.com/psf/black
14-
rev: 22.8.0
13+
rev: 26.1.0
1514
hooks:
1615
- id: black
17-
language_version: python3.9
16+
language_version: python3.11
1817

1918
- repo: https://github.com/pycqa/flake8
20-
rev: 5.0.4
19+
rev: 7.3.0
2120
hooks:
2221
- id: flake8
2322
language_version: python3
2423
additional_dependencies: [flake8-docstrings]
2524

2625
- repo: https://github.com/PyCQA/isort
27-
rev: 5.12.0
26+
rev: 7.0.0
2827
hooks:
2928
- id: isort
3029
args: [--profile, black]
3130

3231
- repo: https://github.com/pre-commit/pre-commit-hooks
33-
rev: v4.3.0
32+
rev: v6.0.0
3433
hooks:
3534
- id: end-of-file-fixer
3635
- id: check-yaml
3736
- id: trailing-whitespace
3837
- id: check-merge-conflict
3938

4039
- repo: https://github.com/executablebooks/mdformat
41-
rev: 0.7.14
40+
rev: 1.0.0
4241
hooks:
4342
- id: mdformat
4443
name: mdformat
4544
entry: mdformat .
4645
language_version: python3
4746

48-
49-
- repo: https://github.com/myint/docformatter
50-
rev: v1.4
47+
- repo: https://github.com/PyCQA/docformatter
48+
rev: v1.7.7
5149
hooks:
5250
- id: docformatter
5351
args: [--in-place]
5452

5553
- repo: https://github.com/hadialqattan/pycln
56-
rev: v2.1.1 # Possible releases: https://github.com/hadialqattan/pycln/releases
54+
rev: v2.6.0
5755
hooks:
5856
- id: pycln
5957

pyproject.toml

Lines changed: 34 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -3,33 +3,33 @@ name = "vector_lake"
33
version = "0.0.5"
44
description = "S3 vector database for bigdata"
55
readme = "Readme.md"
6-
requires-python = ">=3.9,<4.0"
6+
requires-python = ">=3.11,<4.0"
77
license = { text = "MIT" }
88
keywords = ["vector", "database", "bigdata"]
99
authors = [{ name = "Alexander Miasoiedov", email = "msoedov@gmail.com" }]
1010
maintainers = [{ name = "Alexander Miasoiedov", email = "msoedov@gmail.com" }]
1111
dependencies = [
12-
"fire>=0.5.0,<0.6.0",
13-
"loguru>=0.7.0,<0.8.0",
14-
"numpy>=1.26.4,<2.0.0",
15-
"pandas>=2.2.2,<3.0.0",
16-
"pyarrow>=16.1.0,<17.0.0",
17-
"pydantic>=1.10.15,<2.0.0",
18-
"pytz>=2024.1,<2025.0",
19-
"scikit-learn>=1.4.2,<2.0.0",
12+
"fire>=0.7.1,<0.8.0",
13+
"loguru>=0.7.3,<0.8.0",
14+
"numpy>=2.0.0,<3.0.0",
15+
"pandas>=3.0.0,<4.0.0",
16+
"pyarrow>=23.0.0,<24.0.0",
17+
"pydantic>=1.10.24,<2.0.0",
18+
"pytz>=2025.2",
19+
"scikit-learn>=1.8.0,<2.0.0",
2020
]
2121
urls = { "Repository" = "https://github.com/msoedov/vector_lake" }
2222

2323
[project.optional-dependencies]
24-
s3 = ["boto3>=1.34.115,<2.0.0"]
24+
s3 = ["boto3>=1.42.0,<2.0.0"]
2525
dev = [
26-
"black>=24.4.2,<25.0.0",
27-
"mypy>=1.10.0,<2.0.0",
28-
"httpx>=0.27.0,<0.28.0",
29-
"pytest>=8.2.0,<9.0.0",
30-
"types-requests>=2.31.0,<3.0.0",
31-
"pre-commit>=3.7.1,<4.0.0",
32-
"hypothesis>=6.99.13,<7.0.0",
26+
"black>=26.1.0,<27.0.0",
27+
"mypy>=1.19.0,<2.0.0",
28+
"httpx>=0.28.0,<0.29.0",
29+
"pytest>=9.0.0,<10.0.0",
30+
"types-requests>=2.32.0,<3.0.0",
31+
"pre-commit>=4.5.0,<5.0.0",
32+
"hypothesis>=6.151.0,<7.0.0",
3333
"pytest-benchmark==4.0.0",
3434
]
3535

@@ -54,25 +54,25 @@ packages = [{ include = "vector_lake", from = "." }]
5454
vector_lake = "vector_lake.__main__:entrypoint"
5555

5656
[tool.poetry.dependencies]
57-
python = "^3.9"
58-
fire = "^0.5.0"
59-
loguru = "^0.7.0"
60-
numpy = "^1.26.4"
61-
pandas = "^2.2.2"
62-
pyarrow = "^16.1.0"
63-
pydantic = "^1.10.15"
64-
pytz = "^2024.1"
65-
scikit-learn = "^1.4.2"
66-
boto3 = { version = "^1.34.115", optional = true }
57+
python = "^3.11"
58+
fire = "^0.7.1"
59+
loguru = "^0.7.3"
60+
numpy = "^2.0.0"
61+
pandas = "^3.0.0"
62+
pyarrow = "^23.0.0"
63+
pydantic = "^1.10.24"
64+
pytz = "^2025.2"
65+
scikit-learn = "^1.8.0"
66+
boto3 = { version = "^1.42.0", optional = true }
6767

6868
[tool.poetry.group.dev.dependencies]
69-
black = "^24.4.2"
70-
mypy = "^1.10.0"
71-
httpx = "^0.27.0"
72-
pytest = "^8.2.0"
73-
types-requests = "^2.31.0"
74-
pre-commit = "^3.7.1"
75-
hypothesis = "^6.99.13"
69+
black = "^26.1.0"
70+
mypy = "^1.19.0"
71+
httpx = "^0.28.0"
72+
pytest = "^9.0.0"
73+
types-requests = "^2.32.0"
74+
pre-commit = "^4.5.0"
75+
hypothesis = "^6.151.0"
7676
pytest-benchmark = "4.0.0"
7777

7878
[tool.poetry.extras]

tests/__init__.py

Whitespace-only changes.

tests/conftest.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,9 @@ def index_factory():
3333
@pytest.fixture(scope="session")
3434
def s3_index_factory():
3535
if not os.environ.get("RUN_S3_TESTS"):
36-
pytest.skip("RUN_S3_TESTS is not set; skipping S3 integration tests by default.")
36+
pytest.skip(
37+
"RUN_S3_TESTS is not set; skipping S3 integration tests by default."
38+
)
3739
pytest.importorskip("boto3")
3840
os.environ.setdefault("LOCALSTACK_ENDPOINT_URL", "http://localhost:4566")
3941
os.environ.setdefault("AWS_ACCESS_KEY_ID", "foo")

tests/integration/__init__.py

Whitespace-only changes.

tests/integration/test_s3.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
import numpy as np
2+
3+
4+
class TestS3Integration:
5+
def test_end_to_end_s3(self, s3_index_factory):
6+
index = s3_index_factory(
7+
location="s3://unittest-vector-lake",
8+
dimension=5,
9+
approx_shards=243,
10+
size=0,
11+
force_clean=True,
12+
)
13+
vector = np.random.rand(1, 5)[0]
14+
closest_vectors = index.query(vector, 4)
15+
assert len(closest_vectors) == 0
16+
index.add(vector, metadata={"id": 1}, document="unit test")
17+
closest_vectors = index.query(vector, 4)
18+
assert len(closest_vectors) == 1
19+
index.persist()
20+
# Reload the index
21+
index = s3_index_factory(
22+
location="s3://unittest-vector-lake",
23+
dimension=5,
24+
approx_shards=243,
25+
size=0,
26+
)
27+
closest_vectors = index.query(vector, 4)
28+
assert len(closest_vectors) == 1
29+
assert np.array_equal(closest_vectors[0]["vector"], vector)
30+
index.delete_remote()

tests/system/__init__.py

Whitespace-only changes.

tests/unit/__init__.py

Whitespace-only changes.
Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,7 @@
33
from vector_lake.core.index import make_granularity, make_nodes
44

55

6-
class TestUnit:
7-
# Tests that the function works when D is 1
6+
class TestMakeGranularity:
87
def test_D_is_1(self):
98
D = 1
109
M = 10
@@ -16,7 +15,6 @@ def test_D_is_1(self):
1615
assert all(level <= M for level in levels)
1716
assert sum(levels) == M
1817

19-
# Tests that D and M are positive integers
2018
def test_positive_integers(self):
2119
D = 3
2220
M = 10
@@ -25,15 +23,15 @@ def test_positive_integers(self):
2523
assert all(isinstance(level, int) for level in levels)
2624
assert all(level > 0 for level in levels)
2725

28-
# Tests that the function works with levels and num_shards as 1
26+
27+
class TestMakeNodes:
2928
def test_levels_and_num_shards_as_1(self):
3029
levels = [1, 1]
3130
num_shards = 1
3231
nodes = make_nodes(levels, num_shards)
3332
assert nodes.shape == (1, 2)
3433
assert np.allclose(nodes, np.array([[0.0, 0.0]]))
3534

36-
# Tests that the function works with default values for levels and num_shards
3735
def test_default_values(self):
3836
levels = [2, 2]
3937
num_shards = 4
@@ -43,6 +41,8 @@ def test_default_values(self):
4341
nodes, np.array([[0.0, 0.0], [0.0, 0.5], [0.5, 0.0], [0.5, 0.5]])
4442
)
4543

44+
45+
class TestEndToEnd:
4646
def test_end_to_end(self, index_factory):
4747
index = index_factory(
4848
location="/tmp/cosine/empty",
@@ -68,8 +68,3 @@ def test_end_to_end(self, index_factory):
6868
closest_vectors = index.query(vector, 4)
6969
assert len(closest_vectors) == 1
7070
assert np.array_equal(closest_vectors[0]["vector"], vector)
71-
return index
72-
73-
def test_end_to_end_s3(self, s3_index_factory):
74-
index = self.test_end_to_end(s3_index_factory)
75-
index.delete_remote()

0 commit comments

Comments
 (0)