-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathexample.jl
46 lines (40 loc) · 1.38 KB
/
example.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
using HTTP, JSON, LibPQ, Tables
conn = LibPQ.Connection("dbname=pgvector_example host=localhost")
execute(conn, "CREATE EXTENSION IF NOT EXISTS vector")
execute(conn, "DROP TABLE IF EXISTS documents")
execute(conn, "CREATE TABLE documents (id bigserial PRIMARY KEY, content text, embedding bit(1536))")
function embed(texts, type)
url = "https://api.cohere.com/v2/embed"
data = Dict(
"texts" => texts,
"model" => "embed-v4.0",
"input_type" => type,
"embedding_types" => ["ubinary"]
)
headers = [
"authorization" => string("Bearer ", ENV["CO_API_KEY"]),
"content-type" => "application/json"
]
r = HTTP.request("POST", url, headers, JSON.json(data))
ubinary = JSON.parse(String(r.body))["embeddings"]["ubinary"]
[join(map(v -> bitstring(UInt8(v)), u)) for u in ubinary]
end
input = [
"The dog is barking",
"The cat is purring",
"The bear is growling"
]
embeddings = embed(input, "search_document")
LibPQ.load!(
(content = input, embedding = embeddings),
conn,
"INSERT INTO documents (content, embedding) VALUES (\$1, \$2)",
)
query = "forest"
embedding = embed([query], "search_query")[1]
result = execute(conn, "SELECT content FROM documents ORDER BY embedding <~> \$1 LIMIT 5", [embedding])
rows = Tables.rows(columntable(result))
for row in rows
println(Tables.getcolumn(row, 1))
end
close(conn)