File size: 3,433 Bytes
b3f9415
 
 
 
2834b30
b3f9415
 
2834b30
b3f9415
2834b30
b3f9415
 
 
 
2834b30
 
b3f9415
2834b30
 
 
 
 
 
 
b3f9415
2834b30
 
b3f9415
 
 
 
2834b30
b3f9415
 
2834b30
 
 
 
 
 
 
 
 
 
 
 
b3f9415
2834b30
 
b3f9415
 
 
 
 
2834b30
 
b3f9415
2834b30
 
 
 
b3f9415
2834b30
b3f9415
 
 
 
 
 
2834b30
b3f9415
 
 
 
2834b30
 
 
 
b3f9415
 
 
 
 
 
 
 
 
 
 
 
 
2834b30
b3f9415
 
 
 
 
 
 
 
 
 
 
 
 
2834b30
b3f9415
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "a9f7a25f",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Loading environment variables and initializing Supabase client and SentenceTransformer model\n",
    "import os\n",
    "import json\n",
    "from dotenv import load_dotenv\n",
    "from supabase.client import Client, create_client\n",
    "from sentence_transformers import SentenceTransformer\n",
    "from utils import load_config\n",
    "\n",
    "load_dotenv()\n",
    "\n",
    "config = load_config()\n",
    "data = config[\"data\"]\n",
    "\n",
    "supabase_url = os.getenv(\"SUPABASE_URL\")\n",
    "supabase_key = os.getenv(\"SUPABASE_SERVICE_KEY\")\n",
    "\n",
    "supabase: Client = create_client(supabase_url, supabase_key)\n",
    "embeddings = SentenceTransformer(model_name_or_path=config[\"vector_store\"][\"embedding_model_name\"], cache_folder=config[\"models\"][\"cache_folder\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "f2c5492b",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/kpatelis/projects/gaia/.venv/lib/python3.13/site-packages/torch/_dynamo/guards.py:1114: RuntimeWarning: Guards may run slower on Python 3.13.0. Consider upgrading to Python 3.13.1+.\n",
      "  warnings.warn(\n",
      "/home/kpatelis/projects/gaia/.venv/lib/python3.13/site-packages/torch/_dynamo/guards.py:1114: RuntimeWarning: Guards may run slower on Python 3.13.0. Consider upgrading to Python 3.13.1+.\n",
      "  warnings.warn(\n"
     ]
    }
   ],
   "source": [
    "# Reading JSONL file and creating documents with embeddings\n",
    "with open(data, 'r') as jsonl_file:\n",
    "    json_list = list(jsonl_file)\n",
    "\n",
    "documents = []\n",
    "for json_str in json_list:\n",
    "    json_data = json.loads(json_str)\n",
    "    content = f\"{json_data['Question']}\"\n",
    "    embedding = embeddings.encode(content, normalize_embeddings=True).tolist()\n",
    "    document = {\n",
    "        \"content\": content,\n",
    "        \"metadata\": {\n",
    "            \"source\": \"vector_search\",\n",
    "            \"task_id\": json_data['task_id']\n",
    "        },\n",
    "        \"embedding\": embedding,\n",
    "    }\n",
    "    documents.append(document)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "26ddbafd",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Inserting documents into Supabase\n",
    "\n",
    "# Note1: pgvector needs to be enabled, to turn to vector database\n",
    "# Note2: Table needs to be created beforehand in Supabase, with column types\n",
    "try:\n",
    "    response = (\n",
    "        supabase.table(\"gaia_documents\")\n",
    "        .insert(documents)\n",
    "        .execute()\n",
    "    )\n",
    "except Exception as exception:\n",
    "    print(\"Error inserting data into Supabase:\", exception)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "gaia",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.13.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}