Spaces:
Sleeping
Sleeping
File size: 3,433 Bytes
b3f9415 2834b30 b3f9415 2834b30 b3f9415 2834b30 b3f9415 2834b30 b3f9415 2834b30 b3f9415 2834b30 b3f9415 2834b30 b3f9415 2834b30 b3f9415 2834b30 b3f9415 2834b30 b3f9415 2834b30 b3f9415 2834b30 b3f9415 2834b30 b3f9415 2834b30 b3f9415 2834b30 b3f9415 2834b30 b3f9415 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 | {
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "a9f7a25f",
"metadata": {},
"outputs": [],
"source": [
"# Loading environment variables and initializing Supabase client and SentenceTransformer model\n",
"import os\n",
"import json\n",
"from dotenv import load_dotenv\n",
"from supabase.client import Client, create_client\n",
"from sentence_transformers import SentenceTransformer\n",
"from utils import load_config\n",
"\n",
"load_dotenv()\n",
"\n",
"config = load_config()\n",
"data = config[\"data\"]\n",
"\n",
"supabase_url = os.getenv(\"SUPABASE_URL\")\n",
"supabase_key = os.getenv(\"SUPABASE_SERVICE_KEY\")\n",
"\n",
"supabase: Client = create_client(supabase_url, supabase_key)\n",
"embeddings = SentenceTransformer(model_name_or_path=config[\"vector_store\"][\"embedding_model_name\"], cache_folder=config[\"models\"][\"cache_folder\"])"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "f2c5492b",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/kpatelis/projects/gaia/.venv/lib/python3.13/site-packages/torch/_dynamo/guards.py:1114: RuntimeWarning: Guards may run slower on Python 3.13.0. Consider upgrading to Python 3.13.1+.\n",
" warnings.warn(\n",
"/home/kpatelis/projects/gaia/.venv/lib/python3.13/site-packages/torch/_dynamo/guards.py:1114: RuntimeWarning: Guards may run slower on Python 3.13.0. Consider upgrading to Python 3.13.1+.\n",
" warnings.warn(\n"
]
}
],
"source": [
"# Reading JSONL file and creating documents with embeddings\n",
"with open(data, 'r') as jsonl_file:\n",
" json_list = list(jsonl_file)\n",
"\n",
"documents = []\n",
"for json_str in json_list:\n",
" json_data = json.loads(json_str)\n",
" content = f\"{json_data['Question']}\"\n",
" embedding = embeddings.encode(content, normalize_embeddings=True).tolist()\n",
" document = {\n",
" \"content\": content,\n",
" \"metadata\": {\n",
" \"source\": \"vector_search\",\n",
" \"task_id\": json_data['task_id']\n",
" },\n",
" \"embedding\": embedding,\n",
" }\n",
" documents.append(document)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "26ddbafd",
"metadata": {},
"outputs": [],
"source": [
"# Inserting documents into Supabase\n",
"\n",
"# Note1: pgvector needs to be enabled, to turn to vector database\n",
"# Note2: Table needs to be created beforehand in Supabase, with column types\n",
"try:\n",
" response = (\n",
" supabase.table(\"gaia_documents\")\n",
" .insert(documents)\n",
" .execute()\n",
" )\n",
"except Exception as exception:\n",
" print(\"Error inserting data into Supabase:\", exception)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "gaia",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.0"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
|