Spaces:
Runtime error
Runtime error
Partial code clean-up and logging enablement
Browse files- .dockerignore +3 -3
- .env.template +27 -11
- .gitignore +3 -3
- README.MD +7 -0
- pyproject.toml +1 -0
- src/ctp_slack_bot/api/main.py +21 -13
- src/ctp_slack_bot/core/config.py +32 -47
- src/ctp_slack_bot/core/response_rendering.py +13 -0
.dockerignore
CHANGED
|
@@ -59,11 +59,11 @@ venv.bak/
|
|
| 59 |
# PyCharm
|
| 60 |
.idea/
|
| 61 |
|
| 62 |
-
# Jupyter notebooks
|
| 63 |
-
notebooks/
|
| 64 |
-
|
| 65 |
# Documentation
|
| 66 |
docs/
|
| 67 |
|
| 68 |
# MacOS
|
| 69 |
.DS_Store
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
# PyCharm
|
| 60 |
.idea/
|
| 61 |
|
|
|
|
|
|
|
|
|
|
| 62 |
# Documentation
|
| 63 |
docs/
|
| 64 |
|
| 65 |
# MacOS
|
| 66 |
.DS_Store
|
| 67 |
+
|
| 68 |
+
# Application logs
|
| 69 |
+
/logs
|
.env.template
CHANGED
|
@@ -1,25 +1,41 @@
|
|
| 1 |
# Copy this file and modify. Do not save or commit the secrets!
|
| 2 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
# API Configuration
|
| 4 |
API_HOST=0.0.0.0
|
| 5 |
API_PORT=8000
|
| 6 |
-
DEBUG=false
|
| 7 |
-
|
| 8 |
-
# MongoDB Configuration
|
| 9 |
-
MONGODB_URI=mongodb+srv://username:password@cluster.mongodb.net/database?retryWrites=true&w=majority
|
| 10 |
-
MONGODB_DB_NAME=ctp_slack_bot
|
| 11 |
|
| 12 |
# Slack Configuration
|
| 13 |
SLACK_BOT_TOKEN=🪙
|
| 14 |
SLACK_SIGNING_SECRET=🔏
|
| 15 |
SLACK_APP_TOKEN=🦥
|
| 16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
# Hugging Face Configuration
|
| 18 |
HF_API_TOKEN=🤗
|
| 19 |
|
| 20 |
-
#
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
|
|
|
| 1 |
# Copy this file and modify. Do not save or commit the secrets!
|
| 2 |
|
| 3 |
+
# Application Configuration
|
| 4 |
+
DEBUG=false
|
| 5 |
+
|
| 6 |
+
# Logging Configuration
|
| 7 |
+
LOG_LEVEL=INFO
|
| 8 |
+
LOG_FORMAT=text
|
| 9 |
+
|
| 10 |
+
# APScheduler Configuration
|
| 11 |
+
SCHEDULER_TIMEZONE=UTC
|
| 12 |
+
|
| 13 |
# API Configuration
|
| 14 |
API_HOST=0.0.0.0
|
| 15 |
API_PORT=8000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
|
| 17 |
# Slack Configuration
|
| 18 |
SLACK_BOT_TOKEN=🪙
|
| 19 |
SLACK_SIGNING_SECRET=🔏
|
| 20 |
SLACK_APP_TOKEN=🦥
|
| 21 |
|
| 22 |
+
# Vectorization Configuration
|
| 23 |
+
EMBEDDING_MODEL=🌮
|
| 24 |
+
VECTOR_DIMENSION=9001
|
| 25 |
+
CHUNK_SIZE=42
|
| 26 |
+
CHUNK_OVERLAP=37
|
| 27 |
+
TOP_K_MATCHES=1
|
| 28 |
+
|
| 29 |
+
# MongoDB Configuration
|
| 30 |
+
MONGODB_URI=mongodb+srv://username:password@cluster.mongodb.net/database?retryWrites=true&w=majority
|
| 31 |
+
MONGODB_NAME=ctp_slack_bot
|
| 32 |
+
|
| 33 |
# Hugging Face Configuration
|
| 34 |
HF_API_TOKEN=🤗
|
| 35 |
|
| 36 |
+
# OpenAI Configuration
|
| 37 |
+
OPENAI_API_KEY=😐
|
| 38 |
+
CHAT_MODEL=🙊
|
| 39 |
+
MAX_TOKENS=42
|
| 40 |
+
TEMPERATURE=0.5
|
| 41 |
+
SYSTEM_PROMPT="You are a helpful teaching assistant for a data science class.\nBased on the students question, you will be given context retreived from class transcripts and materials to answer their question.\nYour responses should be:\n\n1. Accurate and based on the class content\n2. Clear and educational\n3. Concise but complete\nIf you're unsure about something, acknowledge it and suggest asking the professor."
|
.gitignore
CHANGED
|
@@ -91,8 +91,8 @@ dmypy.json
|
|
| 91 |
# PyCharm
|
| 92 |
.idea/
|
| 93 |
|
| 94 |
-
# Jupyter notebooks
|
| 95 |
-
notebooks/
|
| 96 |
-
|
| 97 |
# MacOS
|
| 98 |
.DS_Store
|
|
|
|
|
|
|
|
|
|
|
|
| 91 |
# PyCharm
|
| 92 |
.idea/
|
| 93 |
|
|
|
|
|
|
|
|
|
|
| 94 |
# MacOS
|
| 95 |
.DS_Store
|
| 96 |
+
|
| 97 |
+
# Application logs
|
| 98 |
+
/logs
|
README.MD
CHANGED
|
@@ -14,6 +14,7 @@
|
|
| 14 |
* `src/`
|
| 15 |
* `ctp_slack_bot/`
|
| 16 |
* `api/`: FastAPI application structure
|
|
|
|
| 17 |
* `core/`: fundamental components like configuration (using pydantic), logging setup (loguru), and custom exceptions
|
| 18 |
* `db/`: database connection
|
| 19 |
* `repositories/`: repository pattern implementation
|
|
@@ -23,7 +24,9 @@
|
|
| 23 |
* `utils/`: reusable utilities
|
| 24 |
* `tests/`: unit tests
|
| 25 |
* `scripts/`: utility scripts for development, deployment, etc.
|
|
|
|
| 26 |
* `notebooks/`: Jupyter notebooks for exploration and model development
|
|
|
|
| 27 |
|
| 28 |
## How to Run the Application
|
| 29 |
|
|
@@ -41,6 +44,8 @@ First, make sure you are set up with a Python virtual environment created by the
|
|
| 41 |
pip3 install -e .
|
| 42 |
```
|
| 43 |
|
|
|
|
|
|
|
| 44 |
If `localhost` port `8000` is free, running the following will make the application available on that port:
|
| 45 |
|
| 46 |
```sh
|
|
@@ -54,4 +59,6 @@ $ curl http://localhost:8000/health
|
|
| 54 |
{"status":"healthy"}
|
| 55 |
```
|
| 56 |
|
|
|
|
|
|
|
| 57 |
Uvicorn will restart the application automatically when any source files are changed.
|
|
|
|
| 14 |
* `src/`
|
| 15 |
* `ctp_slack_bot/`
|
| 16 |
* `api/`: FastAPI application structure
|
| 17 |
+
* `routes.py`: API endpoint definitions
|
| 18 |
* `core/`: fundamental components like configuration (using pydantic), logging setup (loguru), and custom exceptions
|
| 19 |
* `db/`: database connection
|
| 20 |
* `repositories/`: repository pattern implementation
|
|
|
|
| 24 |
* `utils/`: reusable utilities
|
| 25 |
* `tests/`: unit tests
|
| 26 |
* `scripts/`: utility scripts for development, deployment, etc.
|
| 27 |
+
* `run-dev.sh`: script to run the application locally
|
| 28 |
* `notebooks/`: Jupyter notebooks for exploration and model development
|
| 29 |
+
* `.env`: local environment variables for development purposes
|
| 30 |
|
| 31 |
## How to Run the Application
|
| 32 |
|
|
|
|
| 44 |
pip3 install -e .
|
| 45 |
```
|
| 46 |
|
| 47 |
+
Make a copy of `.env.template` as `.env` and define the environment variables. (You can also define them by other means, but this has the least friction.) This file should not be committed and is excluded by `.gitignore`!
|
| 48 |
+
|
| 49 |
If `localhost` port `8000` is free, running the following will make the application available on that port:
|
| 50 |
|
| 51 |
```sh
|
|
|
|
| 59 |
{"status":"healthy"}
|
| 60 |
```
|
| 61 |
|
| 62 |
+
In debug mode (`DEBUG=true`), [http://localhost:8000/env](http://localhost:8000/env) will pretty-print the non-sensitive environment variables as JSON.
|
| 63 |
+
|
| 64 |
Uvicorn will restart the application automatically when any source files are changed.
|
pyproject.toml
CHANGED
|
@@ -43,6 +43,7 @@ dev = [
|
|
| 43 |
"pytest>=7.3.1",
|
| 44 |
"pytest-cov>=4.1.0",
|
| 45 |
"mypy>=1.3.0",
|
|
|
|
| 46 |
"black>=23.3.0",
|
| 47 |
"isort>=5.12.0",
|
| 48 |
"ruff>=0.0.270",
|
|
|
|
| 43 |
"pytest>=7.3.1",
|
| 44 |
"pytest-cov>=4.1.0",
|
| 45 |
"mypy>=1.3.0",
|
| 46 |
+
"types-pytz>=2025.2",
|
| 47 |
"black>=23.3.0",
|
| 48 |
"isort>=5.12.0",
|
| 49 |
"ruff>=0.0.270",
|
src/ctp_slack_bot/api/main.py
CHANGED
|
@@ -1,23 +1,23 @@
|
|
| 1 |
-
import logging
|
| 2 |
from contextlib import asynccontextmanager
|
| 3 |
-
|
| 4 |
-
from fastapi import FastAPI
|
| 5 |
from loguru import logger
|
|
|
|
| 6 |
|
| 7 |
from ctp_slack_bot.api.routes import router
|
| 8 |
-
from ctp_slack_bot.core.config import settings
|
| 9 |
from ctp_slack_bot.core.logging import setup_logging
|
|
|
|
| 10 |
from ctp_slack_bot.tasks.scheduler import start_scheduler, stop_scheduler
|
| 11 |
|
| 12 |
|
| 13 |
@asynccontextmanager
|
| 14 |
-
async def lifespan(app: FastAPI):
|
| 15 |
"""
|
| 16 |
Lifespan context manager for FastAPI application.
|
| 17 |
Handles startup and shutdown events.
|
| 18 |
"""
|
| 19 |
# Setup logging
|
| 20 |
-
|
| 21 |
logger.info("Starting application")
|
| 22 |
|
| 23 |
# Start scheduler
|
|
@@ -42,11 +42,19 @@ app = FastAPI(
|
|
| 42 |
# Include routers
|
| 43 |
app.include_router(router)
|
| 44 |
|
| 45 |
-
|
| 46 |
@app.get("/health")
|
| 47 |
-
async def
|
| 48 |
-
"""Health check
|
| 49 |
-
return {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
|
| 51 |
|
| 52 |
if __name__ == "__main__":
|
|
@@ -54,7 +62,7 @@ if __name__ == "__main__":
|
|
| 54 |
|
| 55 |
uvicorn.run(
|
| 56 |
"main:app",
|
| 57 |
-
host=
|
| 58 |
-
port=
|
| 59 |
-
reload=
|
| 60 |
)
|
|
|
|
|
|
|
| 1 |
from contextlib import asynccontextmanager
|
| 2 |
+
from fastapi import FastAPI, HTTPException
|
|
|
|
| 3 |
from loguru import logger
|
| 4 |
+
from typing import AsyncGenerator, Never
|
| 5 |
|
| 6 |
from ctp_slack_bot.api.routes import router
|
| 7 |
+
from ctp_slack_bot.core.config import Settings, settings
|
| 8 |
from ctp_slack_bot.core.logging import setup_logging
|
| 9 |
+
from ctp_slack_bot.core.response_rendering import PrettyJSONResponse
|
| 10 |
from ctp_slack_bot.tasks.scheduler import start_scheduler, stop_scheduler
|
| 11 |
|
| 12 |
|
| 13 |
@asynccontextmanager
|
| 14 |
+
async def lifespan(app: FastAPI) -> AsyncGenerator:
|
| 15 |
"""
|
| 16 |
Lifespan context manager for FastAPI application.
|
| 17 |
Handles startup and shutdown events.
|
| 18 |
"""
|
| 19 |
# Setup logging
|
| 20 |
+
setup_logging()
|
| 21 |
logger.info("Starting application")
|
| 22 |
|
| 23 |
# Start scheduler
|
|
|
|
| 42 |
# Include routers
|
| 43 |
app.include_router(router)
|
| 44 |
|
|
|
|
| 45 |
@app.get("/health")
|
| 46 |
+
async def health() -> dict[str, str]:
|
| 47 |
+
"""Health check"""
|
| 48 |
+
return {
|
| 49 |
+
"status": "healthy"
|
| 50 |
+
}
|
| 51 |
+
|
| 52 |
+
@app.get("/env", response_class=PrettyJSONResponse)
|
| 53 |
+
async def env() -> Settings:
|
| 54 |
+
"""Server-internal environment variables"""
|
| 55 |
+
if not settings.DEBUG:
|
| 56 |
+
raise HTTPException(status_code=404)
|
| 57 |
+
return settings
|
| 58 |
|
| 59 |
|
| 60 |
if __name__ == "__main__":
|
|
|
|
| 62 |
|
| 63 |
uvicorn.run(
|
| 64 |
"main:app",
|
| 65 |
+
host=settings.API_HOST,
|
| 66 |
+
port=settings.API_PORT,
|
| 67 |
+
reload=settings.DEBUG
|
| 68 |
)
|
src/ctp_slack_bot/core/config.py
CHANGED
|
@@ -1,69 +1,54 @@
|
|
| 1 |
from functools import lru_cache
|
| 2 |
from typing import Literal, Optional
|
| 3 |
|
| 4 |
-
from pydantic import Field,
|
| 5 |
from pydantic_settings import BaseSettings, SettingsConfigDict
|
| 6 |
|
| 7 |
|
| 8 |
-
class Settings(BaseSettings):
|
| 9 |
"""
|
| 10 |
Application settings loaded from environment variables.
|
| 11 |
"""
|
| 12 |
-
#
|
| 13 |
-
API_HOST: str = "0.0.0.0"
|
| 14 |
-
API_PORT: int = 8000
|
| 15 |
DEBUG: bool = False
|
| 16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
# Vectorization Configuration
|
| 18 |
-
EMBEDDING_MODEL: str
|
| 19 |
-
VECTOR_DIMENSION:
|
| 20 |
-
CHUNK_SIZE:
|
| 21 |
-
CHUNK_OVERLAP:
|
| 22 |
-
TOP_K_MATCHES:
|
| 23 |
|
| 24 |
# MongoDB Configuration
|
| 25 |
-
MONGODB_URI:
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
# Slack Configuration
|
| 29 |
-
SLACK_BOT_TOKEN: Optional[SecretStr] = None # TODO: Remove optionality
|
| 30 |
-
SLACK_SIGNING_SECRET: Optional[SecretStr] = None # TODO: Remove optionality
|
| 31 |
-
SLACK_APP_TOKEN: Optional[SecretStr] = None
|
| 32 |
-
|
| 33 |
# Hugging Face Configuration
|
| 34 |
HF_API_TOKEN: Optional[SecretStr] = None
|
| 35 |
|
| 36 |
# OpenAI Configuration
|
| 37 |
OPENAI_API_KEY: Optional[SecretStr] = None
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
|
| 39 |
-
# Chat Model Configuration
|
| 40 |
-
CHAT_MODEL: str = "gpt-3.5-turbo"
|
| 41 |
-
MAX_TOKENS: int = 150
|
| 42 |
-
TEMPERATURE: float = 0.8 # Maximum tokens for response generation
|
| 43 |
-
SYSTEM_PROMPT: str = """
|
| 44 |
-
You are a helpful teaching assistant for a data science class.
|
| 45 |
-
Based on the students question, you will be given context retreived from class transcripts and materials to answer their question.
|
| 46 |
-
Your responses should be:
|
| 47 |
-
1. Accurate and based on the class content
|
| 48 |
-
2. Clear and educational
|
| 49 |
-
3. Concise but complete
|
| 50 |
-
If you're unsure about something, acknowledge it and suggest asking the professor.
|
| 51 |
-
"""
|
| 52 |
-
|
| 53 |
-
# Logging Configuration
|
| 54 |
-
LOG_LEVEL: Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] = "INFO"
|
| 55 |
-
LOG_FORMAT: Literal["text", "json"] = "json"
|
| 56 |
-
|
| 57 |
-
# APScheduler Configuration
|
| 58 |
-
SCHEDULER_TIMEZONE: str = "UTC"
|
| 59 |
-
|
| 60 |
-
@validator("MONGODB_URI")
|
| 61 |
-
def validate_mongodb_uri(cls, v):
|
| 62 |
-
"""Validate MongoDB URI format"""
|
| 63 |
-
#if not v.get_secret_value().startswith("mongodb"):
|
| 64 |
-
# raise ValueError("MONGODB_URI must be a valid MongoDB connection string")
|
| 65 |
-
return v
|
| 66 |
-
|
| 67 |
model_config = SettingsConfigDict(
|
| 68 |
env_file=".env",
|
| 69 |
env_file_encoding="utf-8",
|
|
@@ -76,7 +61,7 @@ def get_settings() -> Settings:
|
|
| 76 |
"""
|
| 77 |
Get cached settings instance.
|
| 78 |
"""
|
| 79 |
-
return Settings()
|
| 80 |
|
| 81 |
|
| 82 |
settings = get_settings()
|
|
|
|
| 1 |
from functools import lru_cache
|
| 2 |
from typing import Literal, Optional
|
| 3 |
|
| 4 |
+
from pydantic import Field, MongoDsn, NonNegativeFloat, NonNegativeInt, PositiveInt, SecretStr
|
| 5 |
from pydantic_settings import BaseSettings, SettingsConfigDict
|
| 6 |
|
| 7 |
|
| 8 |
+
class Settings(BaseSettings): # TODO: Strong guarantees of validity, because garbage in = garbage out, and settings flow into all the nooks and crannies
|
| 9 |
"""
|
| 10 |
Application settings loaded from environment variables.
|
| 11 |
"""
|
| 12 |
+
# Application Configuration
|
|
|
|
|
|
|
| 13 |
DEBUG: bool = False
|
| 14 |
|
| 15 |
+
# Logging Configuration
|
| 16 |
+
LOG_LEVEL: Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] = Field(default_factory=lambda data: "DEBUG" if data.get("DEBUG", False) else "INFO")
|
| 17 |
+
LOG_FORMAT: Literal["text", "json"] = "json"
|
| 18 |
+
|
| 19 |
+
# APScheduler Configuration
|
| 20 |
+
SCHEDULER_TIMEZONE: str = "UTC"
|
| 21 |
+
|
| 22 |
+
# API Configuration
|
| 23 |
+
API_HOST: str
|
| 24 |
+
API_PORT: PositiveInt
|
| 25 |
+
|
| 26 |
+
# Slack Configuration
|
| 27 |
+
SLACK_BOT_TOKEN: SecretStr
|
| 28 |
+
SLACK_SIGNING_SECRET: SecretStr
|
| 29 |
+
SLACK_APP_TOKEN: SecretStr
|
| 30 |
+
|
| 31 |
# Vectorization Configuration
|
| 32 |
+
EMBEDDING_MODEL: str
|
| 33 |
+
VECTOR_DIMENSION: PositiveInt
|
| 34 |
+
CHUNK_SIZE: PositiveInt
|
| 35 |
+
CHUNK_OVERLAP: NonNegativeInt
|
| 36 |
+
TOP_K_MATCHES: PositiveInt
|
| 37 |
|
| 38 |
# MongoDB Configuration
|
| 39 |
+
MONGODB_URI: SecretStr # TODO: Contemplate switching to MongoDsn type for the main URL, and separate out the credentials to SecretStr variables.
|
| 40 |
+
MONGODB_NAME: str
|
| 41 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
# Hugging Face Configuration
|
| 43 |
HF_API_TOKEN: Optional[SecretStr] = None
|
| 44 |
|
| 45 |
# OpenAI Configuration
|
| 46 |
OPENAI_API_KEY: Optional[SecretStr] = None
|
| 47 |
+
CHAT_MODEL: str
|
| 48 |
+
MAX_TOKENS: PositiveInt
|
| 49 |
+
TEMPERATURE: NonNegativeFloat
|
| 50 |
+
SYSTEM_PROMPT: str
|
| 51 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
model_config = SettingsConfigDict(
|
| 53 |
env_file=".env",
|
| 54 |
env_file_encoding="utf-8",
|
|
|
|
| 61 |
"""
|
| 62 |
Get cached settings instance.
|
| 63 |
"""
|
| 64 |
+
return Settings() # type: ignore
|
| 65 |
|
| 66 |
|
| 67 |
settings = get_settings()
|
src/ctp_slack_bot/core/response_rendering.py
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from json import dumps
|
| 2 |
+
from starlette.responses import JSONResponse
|
| 3 |
+
from typing import Any, Self
|
| 4 |
+
|
| 5 |
+
class PrettyJSONResponse(JSONResponse):
|
| 6 |
+
def render(self: Self, content: Any) -> bytes:
|
| 7 |
+
return dumps(
|
| 8 |
+
content,
|
| 9 |
+
ensure_ascii=False,
|
| 10 |
+
allow_nan=False,
|
| 11 |
+
indent=4,
|
| 12 |
+
separators=(", ", ": "),
|
| 13 |
+
).encode("utf-8")
|