{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "a9f7a25f", "metadata": {}, "outputs": [], "source": [ "# Loading environment variables and initializing Supabase client and SentenceTransformer model\n", "import os\n", "import json\n", "from dotenv import load_dotenv\n", "from supabase.client import Client, create_client\n", "from sentence_transformers import SentenceTransformer\n", "from utils import load_config\n", "\n", "load_dotenv()\n", "\n", "config = load_config()\n", "data = config[\"data\"]\n", "\n", "supabase_url = os.getenv(\"SUPABASE_URL\")\n", "supabase_key = os.getenv(\"SUPABASE_SERVICE_KEY\")\n", "\n", "supabase: Client = create_client(supabase_url, supabase_key)\n", "embeddings = SentenceTransformer(model_name_or_path=config[\"vector_store\"][\"embedding_model_name\"], cache_folder=config[\"models\"][\"cache_folder\"])" ] }, { "cell_type": "code", "execution_count": 2, "id": "f2c5492b", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/kpatelis/projects/gaia/.venv/lib/python3.13/site-packages/torch/_dynamo/guards.py:1114: RuntimeWarning: Guards may run slower on Python 3.13.0. Consider upgrading to Python 3.13.1+.\n", " warnings.warn(\n", "/home/kpatelis/projects/gaia/.venv/lib/python3.13/site-packages/torch/_dynamo/guards.py:1114: RuntimeWarning: Guards may run slower on Python 3.13.0. Consider upgrading to Python 3.13.1+.\n", " warnings.warn(\n" ] } ], "source": [ "# Reading JSONL file and creating documents with embeddings\n", "with open(data, 'r') as jsonl_file:\n", " json_list = list(jsonl_file)\n", "\n", "documents = []\n", "for json_str in json_list:\n", " json_data = json.loads(json_str)\n", " content = f\"{json_data['Question']}\"\n", " embedding = embeddings.encode(content, normalize_embeddings=True).tolist()\n", " document = {\n", " \"content\": content,\n", " \"metadata\": {\n", " \"source\": \"vector_search\",\n", " \"task_id\": json_data['task_id']\n", " },\n", " \"embedding\": embedding,\n", " }\n", " documents.append(document)" ] }, { "cell_type": "code", "execution_count": 3, "id": "26ddbafd", "metadata": {}, "outputs": [], "source": [ "# Inserting documents into Supabase\n", "\n", "# Note1: pgvector needs to be enabled, to turn to vector database\n", "# Note2: Table needs to be created beforehand in Supabase, with column types\n", "try:\n", " response = (\n", " supabase.table(\"gaia_documents\")\n", " .insert(documents)\n", " .execute()\n", " )\n", "except Exception as exception:\n", " print(\"Error inserting data into Supabase:\", exception)" ] } ], "metadata": { "kernelspec": { "display_name": "gaia", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.13.0" } }, "nbformat": 4, "nbformat_minor": 5 }