Spaces:

KingZack
/

ctp-slack-bot

Runtime error

App Files Files Community

LiKenun commited on Apr 5

Commit

64566ca

1 Parent(s): b6ce87e

Partial code clean-up and logging enablement

Browse files

Files changed (8) hide show

.dockerignore +3 -3
.env.template +27 -11
.gitignore +3 -3
README.MD +7 -0
pyproject.toml +1 -0
src/ctp_slack_bot/api/main.py +21 -13
src/ctp_slack_bot/core/config.py +32 -47
src/ctp_slack_bot/core/response_rendering.py +13 -0

.dockerignore CHANGED Viewed

@@ -59,11 +59,11 @@ venv.bak/
 # PyCharm
 .idea/
-# Jupyter notebooks
-notebooks/
 # Documentation
 docs/
 # MacOS
 .DS_Store

 # PyCharm
 .idea/
 # Documentation
 docs/
 # MacOS
 .DS_Store
+# Application logs
+/logs

.env.template CHANGED Viewed

@@ -1,25 +1,41 @@
 # Copy this file and modify. Do not save or commit the secrets!
 # API Configuration
 API_HOST=0.0.0.0
 API_PORT=8000
-DEBUG=false
-# MongoDB Configuration
-MONGODB_URI=mongodb+srv://username:password@cluster.mongodb.net/database?retryWrites=true&w=majority
-MONGODB_DB_NAME=ctp_slack_bot
 # Slack Configuration
 SLACK_BOT_TOKEN=🪙
 SLACK_SIGNING_SECRET=🔏
 SLACK_APP_TOKEN=🦥
 # Hugging Face Configuration
 HF_API_TOKEN=🤗
-# Logging Configuration
-LOG_LEVEL=INFO
-LOG_FORMAT=json
-# APScheduler Configuration
-SCHEDULER_TIMEZONE=UTC

 # Copy this file and modify. Do not save or commit the secrets!
+# Application Configuration
+DEBUG=false
+# Logging Configuration
+LOG_LEVEL=INFO
+LOG_FORMAT=text
+# APScheduler Configuration
+SCHEDULER_TIMEZONE=UTC
 # API Configuration
 API_HOST=0.0.0.0
 API_PORT=8000
 # Slack Configuration
 SLACK_BOT_TOKEN=🪙
 SLACK_SIGNING_SECRET=🔏
 SLACK_APP_TOKEN=🦥
+# Vectorization Configuration
+EMBEDDING_MODEL=🌮
+VECTOR_DIMENSION=9001
+CHUNK_SIZE=42
+CHUNK_OVERLAP=37
+TOP_K_MATCHES=1
+# MongoDB Configuration
+MONGODB_URI=mongodb+srv://username:password@cluster.mongodb.net/database?retryWrites=true&w=majority
+MONGODB_NAME=ctp_slack_bot
 # Hugging Face Configuration
 HF_API_TOKEN=🤗
+# OpenAI Configuration
+OPENAI_API_KEY=😐
+CHAT_MODEL=🙊
+MAX_TOKENS=42
+TEMPERATURE=0.5
+SYSTEM_PROMPT="You are a helpful teaching assistant for a data science class.\nBased on the students question, you will be given context retreived from class transcripts and materials to answer their question.\nYour responses should be:\n\n1. Accurate and based on the class content\n2. Clear and educational\n3. Concise but complete\nIf you're unsure about something, acknowledge it and suggest asking the professor."

.gitignore CHANGED Viewed

@@ -91,8 +91,8 @@ dmypy.json
 # PyCharm
 .idea/
-# Jupyter notebooks
-notebooks/
 # MacOS
 .DS_Store

 # PyCharm
 .idea/
 # MacOS
 .DS_Store
+# Application logs
+/logs

README.MD CHANGED Viewed

@@ -14,6 +14,7 @@
 * `src/`
     * `ctp_slack_bot/`
         * `api/`: FastAPI application structure
         * `core/`: fundamental components like configuration (using pydantic), logging setup (loguru), and custom exceptions
         * `db/`: database connection
             * `repositories/`: repository pattern implementation
@@ -23,7 +24,9 @@
         * `utils/`: reusable utilities
 * `tests/`: unit tests
 * `scripts/`: utility scripts for development, deployment, etc.
 * `notebooks/`: Jupyter notebooks for exploration and model development
 ## How to Run the Application
@@ -41,6 +44,8 @@ First, make sure you are set up with a Python virtual environment created by the
 pip3 install -e .
 ```
 If `localhost` port `8000` is free, running the following will make the application available on that port:
 ```sh
@@ -54,4 +59,6 @@ $ curl http://localhost:8000/health
 {"status":"healthy"}
 ```
 Uvicorn will restart the application automatically when any source files are changed.

 * `src/`
     * `ctp_slack_bot/`
         * `api/`: FastAPI application structure
+            * `routes.py`: API endpoint definitions
         * `core/`: fundamental components like configuration (using pydantic), logging setup (loguru), and custom exceptions
         * `db/`: database connection
             * `repositories/`: repository pattern implementation
         * `utils/`: reusable utilities
 * `tests/`: unit tests
 * `scripts/`: utility scripts for development, deployment, etc.
+    * `run-dev.sh`: script to run the application locally
 * `notebooks/`: Jupyter notebooks for exploration and model development
+* `.env`: local environment variables for development purposes
 ## How to Run the Application
 pip3 install -e .
 ```
+Make a copy of `.env.template` as `.env` and define the environment variables. (You can also define them by other means, but this has the least friction.) This file should not be committed and is excluded by `.gitignore`!
 If `localhost` port `8000` is free, running the following will make the application available on that port:
 ```sh
 {"status":"healthy"}
 ```
+In debug mode (`DEBUG=true`), [http://localhost:8000/env](http://localhost:8000/env) will pretty-print the non-sensitive environment variables as JSON.
 Uvicorn will restart the application automatically when any source files are changed.

pyproject.toml CHANGED Viewed

@@ -43,6 +43,7 @@ dev = [
     "pytest>=7.3.1",
     "pytest-cov>=4.1.0",
     "mypy>=1.3.0",
     "black>=23.3.0",
     "isort>=5.12.0",
     "ruff>=0.0.270",

     "pytest>=7.3.1",
     "pytest-cov>=4.1.0",
     "mypy>=1.3.0",
+    "types-pytz>=2025.2",
     "black>=23.3.0",
     "isort>=5.12.0",
     "ruff>=0.0.270",

src/ctp_slack_bot/api/main.py CHANGED Viewed

@@ -1,23 +1,23 @@
-import logging
 from contextlib import asynccontextmanager
-from fastapi import FastAPI
 from loguru import logger
 from ctp_slack_bot.api.routes import router
-from ctp_slack_bot.core.config import settings
 from ctp_slack_bot.core.logging import setup_logging
 from ctp_slack_bot.tasks.scheduler import start_scheduler, stop_scheduler
 @asynccontextmanager
-async def lifespan(app: FastAPI):
     """
     Lifespan context manager for FastAPI application.
     Handles startup and shutdown events.
     """
     # Setup logging
-    #setup_logging()
     logger.info("Starting application")
     # Start scheduler
@@ -42,11 +42,19 @@ app = FastAPI(
 # Include routers
 app.include_router(router)
 @app.get("/health")
-async def health_check():
-    """Health check endpoint"""
-    return {"status": "healthy"}
 if __name__ == "__main__":
@@ -54,7 +62,7 @@ if __name__ == "__main__":
     uvicorn.run(
         "main:app",
-        host="localhost", #settings.API_HOST,
-        port=8000, #settings.API_PORT,
-        reload=True #settings.DEBUG,
     )

 from contextlib import asynccontextmanager
+from fastapi import FastAPI, HTTPException
 from loguru import logger
+from typing import AsyncGenerator, Never
 from ctp_slack_bot.api.routes import router
+from ctp_slack_bot.core.config import Settings, settings
 from ctp_slack_bot.core.logging import setup_logging
+from ctp_slack_bot.core.response_rendering import PrettyJSONResponse
 from ctp_slack_bot.tasks.scheduler import start_scheduler, stop_scheduler
 @asynccontextmanager
+async def lifespan(app: FastAPI) -> AsyncGenerator:
     """
     Lifespan context manager for FastAPI application.
     Handles startup and shutdown events.
     """
     # Setup logging
+    setup_logging()
     logger.info("Starting application")
     # Start scheduler
 # Include routers
 app.include_router(router)
 @app.get("/health")
+async def health() -> dict[str, str]:
+    """Health check"""
+    return {
+        "status": "healthy"
+    }
+@app.get("/env", response_class=PrettyJSONResponse)
+async def env() -> Settings:
+    """Server-internal environment variables"""
+    if not settings.DEBUG:
+        raise HTTPException(status_code=404)
+    return settings
 if __name__ == "__main__":
     uvicorn.run(
         "main:app",
+        host=settings.API_HOST,
+        port=settings.API_PORT,
+        reload=settings.DEBUG
     )

src/ctp_slack_bot/core/config.py CHANGED Viewed

@@ -1,69 +1,54 @@
 from functools import lru_cache
 from typing import Literal, Optional
-from pydantic import Field, SecretStr, validator
 from pydantic_settings import BaseSettings, SettingsConfigDict
-class Settings(BaseSettings):
     """
     Application settings loaded from environment variables.
     """
-    # API Configuration
-    API_HOST: str = "0.0.0.0"
-    API_PORT: int = 8000
     DEBUG: bool = False
     # Vectorization Configuration
-    EMBEDDING_MODEL: str = "text-embedding-3-small"
-    VECTOR_DIMENSION: int = 1536
-    CHUNK_SIZE: int = 1000
-    CHUNK_OVERLAP: int = 200
-    TOP_K_MATCHES: int = 5
     # MongoDB Configuration
-    MONGODB_URI: Optional[SecretStr] = None # TODO: Remove optionality
-    MONGODB_DB_NAME: str = "ctp_slack_bot"
-    # Slack Configuration
-    SLACK_BOT_TOKEN: Optional[SecretStr] = None # TODO: Remove optionality
-    SLACK_SIGNING_SECRET: Optional[SecretStr] = None # TODO: Remove optionality
-    SLACK_APP_TOKEN: Optional[SecretStr] = None
     # Hugging Face Configuration
     HF_API_TOKEN: Optional[SecretStr] = None
     # OpenAI Configuration
     OPENAI_API_KEY: Optional[SecretStr] = None
-    # Chat Model Configuration
-    CHAT_MODEL: str = "gpt-3.5-turbo"
-    MAX_TOKENS: int = 150
-    TEMPERATURE: float = 0.8 # Maximum tokens for response generation
-    SYSTEM_PROMPT: str = """
-    You are a helpful teaching assistant for a data science class.
-    Based on the students question, you will be given context retreived from class transcripts and materials to answer their question.
-    Your responses should be:
-        1. Accurate and based on the class content
-        2. Clear and educational
-        3. Concise but complete
-    If you're unsure about something, acknowledge it and suggest asking the professor.
-    """
-    # Logging Configuration
-    LOG_LEVEL: Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] = "INFO"
-    LOG_FORMAT: Literal["text", "json"] = "json"
-    # APScheduler Configuration
-    SCHEDULER_TIMEZONE: str = "UTC"
-    @validator("MONGODB_URI")
-    def validate_mongodb_uri(cls, v):
-        """Validate MongoDB URI format"""
-        #if not v.get_secret_value().startswith("mongodb"):
-        #    raise ValueError("MONGODB_URI must be a valid MongoDB connection string")
-        return v
     model_config = SettingsConfigDict(
         env_file=".env",
         env_file_encoding="utf-8",
@@ -76,7 +61,7 @@ def get_settings() -> Settings:
     """
     Get cached settings instance.
     """
-    return Settings()
 settings = get_settings()

 from functools import lru_cache
 from typing import Literal, Optional
+from pydantic import Field, MongoDsn, NonNegativeFloat, NonNegativeInt, PositiveInt, SecretStr
 from pydantic_settings import BaseSettings, SettingsConfigDict
+class Settings(BaseSettings): # TODO: Strong guarantees of validity, because garbage in = garbage out, and settings flow into all the nooks and crannies
     """
     Application settings loaded from environment variables.
     """
+    # Application Configuration
     DEBUG: bool = False
+    # Logging Configuration
+    LOG_LEVEL: Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] = Field(default_factory=lambda data: "DEBUG" if data.get("DEBUG", False) else "INFO")
+    LOG_FORMAT: Literal["text", "json"] = "json"
+    # APScheduler Configuration
+    SCHEDULER_TIMEZONE: str = "UTC"
+    # API Configuration
+    API_HOST: str
+    API_PORT: PositiveInt
+    # Slack Configuration
+    SLACK_BOT_TOKEN: SecretStr
+    SLACK_SIGNING_SECRET: SecretStr
+    SLACK_APP_TOKEN: SecretStr
     # Vectorization Configuration
+    EMBEDDING_MODEL: str
+    VECTOR_DIMENSION: PositiveInt
+    CHUNK_SIZE: PositiveInt
+    CHUNK_OVERLAP: NonNegativeInt
+    TOP_K_MATCHES: PositiveInt
     # MongoDB Configuration
+    MONGODB_URI: SecretStr # TODO: Contemplate switching to MongoDsn type for the main URL, and separate out the credentials to SecretStr variables.
+    MONGODB_NAME: str
     # Hugging Face Configuration
     HF_API_TOKEN: Optional[SecretStr] = None
     # OpenAI Configuration
     OPENAI_API_KEY: Optional[SecretStr] = None
+    CHAT_MODEL: str
+    MAX_TOKENS: PositiveInt
+    TEMPERATURE: NonNegativeFloat
+    SYSTEM_PROMPT: str
     model_config = SettingsConfigDict(
         env_file=".env",
         env_file_encoding="utf-8",
     """
     Get cached settings instance.
     """
+    return Settings() # type: ignore
 settings = get_settings()

src/ctp_slack_bot/core/response_rendering.py ADDED Viewed

	@@ -0,0 +1,13 @@

+from json import dumps
+from starlette.responses import JSONResponse
+from typing import Any, Self
+class PrettyJSONResponse(JSONResponse):
+    def render(self: Self, content: Any) -> bytes:
+        return dumps(
+            content,
+            ensure_ascii=False,
+            allow_nan=False,
+            indent=4,
+            separators=(", ", ": "),
+        ).encode("utf-8")