ALM_LLM / tools /sql_tool.py
AshenH's picture
Update tools/sql_tool.py
a325dbc verified
from langchain_core.tools import tool
import os
import duckdb
import pandas as pd
import warnings
# Suppress warnings that might clutter the output
warnings.filterwarnings("ignore")
# --- Database Connection Setup ---
def get_md_connection() -> duckdb.DuckDBPyConnection:
"""
Establishes a connection to MotherDuck using the MOTHERDUCK_TOKEN environment variable.
"""
# 1. Get the connection token
token = os.environ.get('MOTHERDUCK_TOKEN')
if not token:
raise ConnectionError(
"MOTHERDUCK_TOKEN environment variable is not set. "
"Please ensure it is configured in your secrets to connect to the database."
)
# 2. Connect to the MotherDuck service
# Note: If you have a specific database name, you can adjust the connection string here.
conn = duckdb.connect(f'md:?motherduck_token={token}')
return conn
# --- SQL Tools ---
@tool
def run_duckdb_query(query: str) -> str:
"""
Runs a read-only SQL query against the connected MotherDuck database and returns the results as a string.
The query must be valid DuckDB SQL. This tool only supports SELECT queries.
"""
conn = None
try:
conn = get_md_connection()
# Enforce read-only constraint
if not query.strip().lower().startswith('select'):
return "Error: Only read-only SELECT queries are allowed."
# Execute the query and fetch the results into a pandas DataFrame
result_df = conn.execute(query).fetchdf()
if result_df.empty:
return "Query executed successfully, but no rows were returned."
# Return the DataFrame as a string
return result_df.to_string(index=False)
except ConnectionError as e:
# Re-raise or handle specific connection errors
return f"Connection Error: {e}"
except Exception as e:
# Catch all other DuckDB or SQL execution errors
return f"DuckDB Query Error: {e}"
finally:
# Always close the connection if it was successfully opened
if conn:
conn.close()
@tool
def get_table_schema(table_name: str = "my_db.main.masterdataset_v") -> str:
"""
Returns the schema (column names and data types) for the specified table in the MotherDuck database.
Defaults to the 'my_db.main.masterdataset_v' table.
"""
conn = None
try:
conn = get_md_connection()
# Use PRAGMA table_info to get the schema details dynamically
# This is a standard DuckDB/SQLite way to get table schema
query = f"PRAGMA table_info('{table_name}')"
schema_df = conn.execute(query).fetchdf()
if schema_df.empty:
# Also fetch available table names for better error reporting
available_tables = conn.execute('SHOW TABLES;').fetchnames()
return f"Error: Table '{table_name}' not found. Available tables: {available_tables}"
# Format the schema into a simple string: name TYPE, name TYPE, ...
schema_parts = [f"{row['name']} {row['type']}" for index, row in schema_df.iterrows()]
return ", ".join(schema_parts)
except ConnectionError as e:
return f"Connection Error: {e}"
except Exception as e:
return f"DuckDB Schema Error: {e}"
finally:
if conn:
conn.close()