|
17 | 17 | end
|
18 | 18 |
|
19 | 19 | describe "#add_texts" do
|
20 |
| - it "indexes data into elasticsearch" do |
| 20 | + it "indexes data into elasticsearch with metadata" do |
| 21 | + metadata = {lang: "en"} |
21 | 22 | es_body = [
|
22 | 23 | {index: {_index: "langchain"}},
|
23 |
| - {input: "simple text", input_vector: [0.1, 0.2, 0.3]} |
| 24 | + {input: "simple text", input_vector: [0.1, 0.2, 0.3], metadata: metadata} |
24 | 25 | ]
|
25 | 26 |
|
26 | 27 | allow_any_instance_of(::Elasticsearch::Client).to receive(:bulk).with(body: es_body)
|
27 | 28 | expect_any_instance_of(::Elasticsearch::Client).to receive(:bulk).with(body: es_body).once
|
28 | 29 |
|
29 |
| - subject.add_texts(texts: ["simple text"]) |
| 30 | + subject.add_texts(texts: ["simple text"], metadatas: [metadata]) |
| 31 | + end |
| 32 | + |
| 33 | + it "raises error when metadatas length mismatch" do |
| 34 | + expect { |
| 35 | + subject.add_texts(texts: ["t1", "t2"], metadatas: [{foo: 1}]) |
| 36 | + }.to raise_error(ArgumentError) |
30 | 37 | end
|
31 | 38 | end
|
32 | 39 |
|
|
38 | 45 | .and_return([0.1, 0.2, 0.3, 0.4])
|
39 | 46 | end
|
40 | 47 |
|
41 |
| - it "updates respective document" do |
| 48 | + it "updates respective document with metadata" do |
| 49 | + metadata = {version: 2} |
42 | 50 | es_body = [
|
43 | 51 | {index: {_index: "langchain", _id: 1}},
|
44 |
| - {input: "updated text", input_vector: [0.1, 0.2, 0.3, 0.4]} |
| 52 | + {input: "updated text", input_vector: [0.1, 0.2, 0.3, 0.4], metadata: metadata} |
45 | 53 | ]
|
46 | 54 |
|
47 | 55 | allow_any_instance_of(::Elasticsearch::Client).to receive(:bulk).with(body: es_body)
|
48 | 56 | expect_any_instance_of(::Elasticsearch::Client).to receive(:bulk).with(body: es_body).once
|
49 | 57 |
|
50 |
| - subject.update_texts(texts: ["updated text"], ids: [1]) |
| 58 | + subject.update_texts(texts: ["updated text"], ids: [1], metadatas: [metadata]) |
51 | 59 | end
|
52 | 60 | end
|
53 | 61 |
|
|
100 | 108 | input: {
|
101 | 109 | type: "text"
|
102 | 110 | },
|
103 |
| - input_vector: {type: "dense_vector", dims: 384} |
| 111 | + input_vector: {type: "dense_vector", dims: 384}, |
| 112 | + metadata: {type: "object", dynamic: true} |
104 | 113 | }
|
105 | 114 | }
|
106 | 115 | }
|
|
117 | 126 | input: {
|
118 | 127 | type: "text"
|
119 | 128 | },
|
120 |
| - input_vector: {type: "dense_vector", dims: 500} |
| 129 | + input_vector: {type: "dense_vector", dims: 500}, |
| 130 | + metadata: {type: "object", dynamic: true} |
121 | 131 | }
|
122 | 132 | }
|
123 | 133 | }
|
|
145 | 155 | end
|
146 | 156 |
|
147 | 157 | describe "#similarity_search" do
|
148 |
| - it "should return similar documents" do |
| 158 | + it "should return similar documents with metadata filter" do |
| 159 | + filter = {term: {"metadata.lang": "en"}} |
149 | 160 | response = [
|
150 | 161 | {_id: 1, input: "simple text", input_vector: [0.1, 0.5, 0.6]},
|
151 | 162 | {_id: 2, input: "update text", input_vector: [0.5, 0.3, 0.1]}
|
|
154 | 165 |
|
155 | 166 | allow(es_response).to receive(:body).and_return(response)
|
156 | 167 | allow_any_instance_of(::Elasticsearch::Client)
|
157 |
| - .to receive(:search).with(body: {query: subject.default_query([0.1, 0.2, 0.3]), size: 5}).and_return(es_response) |
| 168 | + .to receive(:search).with(body: {query: {bool: {must: subject.default_query([0.1, 0.2, 0.3]), filter: filter}}, size: 5}).and_return(es_response) |
158 | 169 |
|
159 | 170 | expect_any_instance_of(::Elasticsearch::Client)
|
160 |
| - .to receive(:search).with(body: {query: subject.default_query([0.1, 0.2, 0.3]), size: 5}) |
| 171 | + .to receive(:search).with(body: {query: {bool: {must: subject.default_query([0.1, 0.2, 0.3]), filter: filter}}, size: 5}) |
161 | 172 | expect(es_response).to receive(:body)
|
162 | 173 |
|
163 |
| - expect(subject.similarity_search(text: "simple", k: 5)).to eq(response) |
| 174 | + expect(subject.similarity_search(text: "simple", k: 5, filter: filter)).to eq(response) |
164 | 175 | end
|
165 | 176 |
|
166 | 177 | it "able to search with custom query" do
|
|
197 | 208 | end
|
198 | 209 |
|
199 | 210 | describe "#similarity_search_by_vector" do
|
200 |
| - it "should return similar documents" do |
| 211 | + it "should return similar documents with metadata filter" do |
| 212 | + filter = {term: {"metadata.lang": "en"}} |
201 | 213 | response = [
|
202 | 214 | {_id: 1, input: "simple text", input_vector: [0.1, 0.5, 0.6]},
|
203 | 215 | {_id: 2, input: "update text", input_vector: [0.5, 0.3, 0.1]}
|
|
206 | 218 |
|
207 | 219 | allow(es_response).to receive(:body).and_return(response)
|
208 | 220 | allow_any_instance_of(::Elasticsearch::Client)
|
209 |
| - .to receive(:search).with(body: {query: subject.default_query([0.5, 0.6, 0.7]), size: 5}).and_return(es_response) |
| 221 | + .to receive(:search).with(body: {query: {bool: {must: subject.default_query([0.5, 0.6, 0.7]), filter: filter}}, size: 5}).and_return(es_response) |
210 | 222 |
|
211 | 223 | expect_any_instance_of(::Elasticsearch::Client)
|
212 |
| - .to receive(:search).with(body: {query: subject.default_query([0.5, 0.6, 0.7]), size: 5}) |
| 224 | + .to receive(:search).with(body: {query: {bool: {must: subject.default_query([0.5, 0.6, 0.7]), filter: filter}}, size: 5}) |
213 | 225 | expect(es_response).to receive(:body)
|
214 | 226 |
|
215 |
| - expect(subject.similarity_search_by_vector(embedding: [0.5, 0.6, 0.7], k: 5)).to eq(response) |
| 227 | + expect(subject.similarity_search_by_vector(embedding: [0.5, 0.6, 0.7], k: 5, filter: filter)).to eq(response) |
216 | 228 | end
|
217 | 229 |
|
218 | 230 | it "able to search with custom query" do
|
|
0 commit comments