Skip to content

community[minor]: add mongodb byte store #23876

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 14 commits into from
Jul 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions libs/community/langchain_community/storage/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,7 @@
from langchain_community.storage.cassandra import (
CassandraByteStore,
)
from langchain_community.storage.mongodb import (
MongoDBStore,
)
from langchain_community.storage.mongodb import MongoDBByteStore, MongoDBStore
from langchain_community.storage.redis import (
RedisStore,
)
Expand All @@ -44,6 +42,7 @@
"AstraDBStore",
"CassandraByteStore",
"MongoDBStore",
"MongoDBByteStore",
"RedisStore",
"SQLStore",
"UpstashRedisByteStore",
Expand All @@ -55,6 +54,7 @@
"AstraDBStore": "langchain_community.storage.astradb",
"CassandraByteStore": "langchain_community.storage.cassandra",
"MongoDBStore": "langchain_community.storage.mongodb",
"MongoDBByteStore": "langchain_community.storage.mongodb",
"RedisStore": "langchain_community.storage.redis",
"SQLStore": "langchain_community.storage.sql",
"UpstashRedisByteStore": "langchain_community.storage.upstash_redis",
Expand Down
124 changes: 123 additions & 1 deletion libs/community/langchain_community/storage/mongodb.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,126 @@
from langchain_core.stores import BaseStore


class MongoDBByteStore(BaseStore[str, bytes]):
"""BaseStore implementation using MongoDB as the underlying store.

Examples:
Create a MongoDBByteStore instance and perform operations on it:

.. code-block:: python

# Instantiate the MongoDBByteStore with a MongoDB connection
from langchain.storage import MongoDBByteStore

mongo_conn_str = "mongodb://localhost:27017/"
mongodb_store = MongoDBBytesStore(mongo_conn_str, db_name="test-db",
collection_name="test-collection")

# Set values for keys
mongodb_store.mset([("key1", "hello"), ("key2", "workd")])

# Get values for keys
values = mongodb_store.mget(["key1", "key2"])
# [bytes1, bytes1]

# Iterate over keys
for key in mongodb_store.yield_keys():
print(key)

# Delete keys
mongodb_store.mdelete(["key1", "key2"])
"""

def __init__(
self,
connection_string: str,
db_name: str,
collection_name: str,
*,
client_kwargs: Optional[dict] = None,
) -> None:
"""Initialize the MongoDBStore with a MongoDB connection string.

Args:
connection_string (str): MongoDB connection string
db_name (str): name to use
collection_name (str): collection name to use
client_kwargs (dict): Keyword arguments to pass to the Mongo client
"""
try:
from pymongo import MongoClient
except ImportError as e:
raise ImportError(
"The MongoDBStore requires the pymongo library to be "
"installed. "
"pip install pymongo"
) from e

if not connection_string:
raise ValueError("connection_string must be provided.")
if not db_name:
raise ValueError("db_name must be provided.")
if not collection_name:
raise ValueError("collection_name must be provided.")

self.client: MongoClient = MongoClient(
connection_string, **(client_kwargs or {})
)
self.collection = self.client[db_name][collection_name]

def mget(self, keys: Sequence[str]) -> List[Optional[bytes]]:
"""Get the list of documents associated with the given keys.

Args:
keys (list[str]): A list of keys representing Document IDs..

Returns:
list[Document]: A list of Documents corresponding to the provided
keys, where each Document is either retrieved successfully or
represented as None if not found.
"""
result = self.collection.find({"_id": {"$in": keys}})
result_dict = {doc["_id"]: doc["value"] for doc in result}
return [result_dict.get(key) for key in keys]

def mset(self, key_value_pairs: Sequence[Tuple[str, bytes]]) -> None:
"""Set the given key-value pairs.

Args:
key_value_pairs (list[tuple[str, Document]]): A list of id-document
pairs.
"""
from pymongo import UpdateOne

updates = [{"_id": k, "value": v} for k, v in key_value_pairs]
self.collection.bulk_write(
[UpdateOne({"_id": u["_id"]}, {"$set": u}, upsert=True) for u in updates]
)

def mdelete(self, keys: Sequence[str]) -> None:
"""Delete the given ids.

Args:
keys (list[str]): A list of keys representing Document IDs..
"""
self.collection.delete_many({"_id": {"$in": keys}})

def yield_keys(self, prefix: Optional[str] = None) -> Iterator[str]:
"""Yield keys in the store.

Args:
prefix (str): prefix of keys to retrieve.
"""
if prefix is None:
for doc in self.collection.find(projection=["_id"]):
yield doc["_id"]
else:
for doc in self.collection.find(
{"_id": {"$regex": f"^{prefix}"}}, projection=["_id"]
):
yield doc["_id"]


class MongoDBStore(BaseStore[str, Document]):
"""BaseStore implementation using MongoDB as the underlying store.

Expand Down Expand Up @@ -68,7 +188,9 @@ def __init__(
if not collection_name:
raise ValueError("collection_name must be provided.")

self.client = MongoClient(connection_string, **(client_kwargs or {}))
self.client: MongoClient = MongoClient(
connection_string, **(client_kwargs or {})
)
self.collection = self.client[db_name][collection_name]

def mget(self, keys: Sequence[str]) -> List[Optional[Document]]:
Expand Down
21 changes: 19 additions & 2 deletions libs/community/tests/integration_tests/storage/test_mongodb.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
from typing import Generator
from typing import Generator, Tuple

import pytest
from langchain_core.documents import Document
from langchain_standard_tests.integration_tests.base_store import BaseStoreSyncTests

from langchain_community.storage.mongodb import MongoDBStore
from langchain_community.storage.mongodb import MongoDBByteStore, MongoDBStore

pytest.importorskip("pymongo")

Expand Down Expand Up @@ -71,3 +72,19 @@ def test_mdelete(mongo_store: MongoDBStore) -> None:
def test_init_errors() -> None:
with pytest.raises(ValueError):
MongoDBStore("", "", "")


class TestMongoDBStore(BaseStoreSyncTests):
@pytest.fixture
def three_values(self) -> Tuple[bytes, bytes, bytes]: # <-- Provide 3
return b"foo", b"bar", b"buzz"

@pytest.fixture
def kv_store(self) -> MongoDBByteStore:
import mongomock

# mongomock creates a mock MongoDB instance for testing purposes
with mongomock.patch(servers=(("localhost", 27017),)):
return MongoDBByteStore(
"mongodb://localhost:27017/", "test_db", "test_collection"
)
1 change: 1 addition & 0 deletions libs/community/tests/unit_tests/storage/test_imports.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
"AstraDBStore",
"AstraDBByteStore",
"CassandraByteStore",
"MongoDBByteStore",
"MongoDBStore",
"SQLStore",
"RedisStore",
Expand Down
Loading