Spaces:
Sleeping
Sleeping
Kyryll Kochkin
commited on
Commit
·
f65a41f
1
Parent(s):
f64f759
Add endpoint health monitoring to root status
Browse files- app/main.py +100 -3
app/main.py
CHANGED
|
@@ -1,13 +1,18 @@
|
|
| 1 |
"""FastAPI application entrypoint."""
|
| 2 |
from __future__ import annotations
|
| 3 |
|
|
|
|
|
|
|
| 4 |
import logging
|
|
|
|
| 5 |
from logging.config import dictConfig
|
| 6 |
-
from typing import Any, Dict
|
| 7 |
|
|
|
|
| 8 |
from fastapi import FastAPI, HTTPException, Request
|
| 9 |
from fastapi.middleware.cors import CORSMiddleware
|
| 10 |
from fastapi.responses import JSONResponse
|
|
|
|
| 11 |
|
| 12 |
from .core.settings import get_settings
|
| 13 |
from .routers import chat, completions, embeddings, models
|
|
@@ -38,6 +43,75 @@ logger = logging.getLogger(__name__)
|
|
| 38 |
|
| 39 |
app = FastAPI(title="GPT3dev OpenAI-Compatible API", version="1.0.0")
|
| 40 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
if settings.cors_allow_origins:
|
| 42 |
app.add_middleware(
|
| 43 |
CORSMiddleware,
|
|
@@ -59,9 +133,21 @@ async def healthcheck() -> Dict[str, str]:
|
|
| 59 |
|
| 60 |
|
| 61 |
@app.get("/")
|
| 62 |
-
async def root() -> Dict[str,
|
| 63 |
"""Root endpoint used by platform health checks (e.g., HF Spaces)."""
|
| 64 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
|
| 66 |
|
| 67 |
@app.on_event("startup")
|
|
@@ -74,6 +160,17 @@ async def on_startup() -> None:
|
|
| 74 |
except Exception: # pragma: no cover - defensive logging only
|
| 75 |
models = "(unavailable)"
|
| 76 |
logger.info("API startup complete. Log level=%s. Models=[%s]", settings.log_level, models)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
|
| 78 |
|
| 79 |
@app.exception_handler(HTTPException)
|
|
|
|
| 1 |
"""FastAPI application entrypoint."""
|
| 2 |
from __future__ import annotations
|
| 3 |
|
| 4 |
+
import asyncio
|
| 5 |
+
import contextlib
|
| 6 |
import logging
|
| 7 |
+
from datetime import datetime, timezone
|
| 8 |
from logging.config import dictConfig
|
| 9 |
+
from typing import Any, Dict, List, Optional
|
| 10 |
|
| 11 |
+
import httpx
|
| 12 |
from fastapi import FastAPI, HTTPException, Request
|
| 13 |
from fastapi.middleware.cors import CORSMiddleware
|
| 14 |
from fastapi.responses import JSONResponse
|
| 15 |
+
from fastapi.routing import APIRoute
|
| 16 |
|
| 17 |
from .core.settings import get_settings
|
| 18 |
from .routers import chat, completions, embeddings, models
|
|
|
|
| 43 |
|
| 44 |
app = FastAPI(title="GPT3dev OpenAI-Compatible API", version="1.0.0")
|
| 45 |
|
| 46 |
+
CHECK_INTERVAL_SECONDS = 60
|
| 47 |
+
IGNORED_MONITOR_PATHS = {"/"}
|
| 48 |
+
|
| 49 |
+
EndpointStatus = Dict[str, Dict[str, Any]]
|
| 50 |
+
|
| 51 |
+
_endpoint_status: Dict[str, Any] = {"failures": {}, "last_checked": None}
|
| 52 |
+
_endpoint_monitor_task: Optional[asyncio.Task[None]] = None
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
def _monitored_endpoints() -> List[str]:
|
| 56 |
+
endpoints: List[str] = []
|
| 57 |
+
for route in app.routes:
|
| 58 |
+
if not isinstance(route, APIRoute):
|
| 59 |
+
continue
|
| 60 |
+
if "GET" not in (route.methods or set()):
|
| 61 |
+
continue
|
| 62 |
+
if route.path in IGNORED_MONITOR_PATHS:
|
| 63 |
+
continue
|
| 64 |
+
if route.dependant.path_params:
|
| 65 |
+
continue
|
| 66 |
+
if not route.include_in_schema:
|
| 67 |
+
continue
|
| 68 |
+
endpoints.append(route.path)
|
| 69 |
+
return sorted(set(endpoints))
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
async def _poll_endpoint_health() -> None:
|
| 73 |
+
previous_failures: set[str] = set()
|
| 74 |
+
async with httpx.AsyncClient(app=app, base_url="http://status-check", timeout=10.0) as client:
|
| 75 |
+
while True:
|
| 76 |
+
try:
|
| 77 |
+
monitored_paths = _monitored_endpoints()
|
| 78 |
+
failures: Dict[str, Dict[str, Any]] = {}
|
| 79 |
+
for path in monitored_paths:
|
| 80 |
+
try:
|
| 81 |
+
response = await client.get(path)
|
| 82 |
+
except httpx.HTTPError as exc:
|
| 83 |
+
failures[path] = {"error": str(exc)}
|
| 84 |
+
continue
|
| 85 |
+
except Exception as exc: # pragma: no cover - defensive
|
| 86 |
+
failures[path] = {"error": str(exc)}
|
| 87 |
+
continue
|
| 88 |
+
if not 200 <= response.status_code < 400:
|
| 89 |
+
failures[path] = {
|
| 90 |
+
"status_code": response.status_code,
|
| 91 |
+
"detail": response.text[:200],
|
| 92 |
+
}
|
| 93 |
+
_endpoint_status["failures"] = failures
|
| 94 |
+
_endpoint_status["last_checked"] = datetime.now(timezone.utc).isoformat()
|
| 95 |
+
current_failures = set(failures.keys())
|
| 96 |
+
if current_failures != previous_failures:
|
| 97 |
+
if current_failures:
|
| 98 |
+
logger.warning("Endpoint monitor detected failures: %s", sorted(current_failures))
|
| 99 |
+
elif previous_failures:
|
| 100 |
+
logger.info("All monitored endpoints restored")
|
| 101 |
+
previous_failures = current_failures
|
| 102 |
+
await asyncio.sleep(CHECK_INTERVAL_SECONDS)
|
| 103 |
+
except asyncio.CancelledError: # pragma: no cover - shutdown handling
|
| 104 |
+
raise
|
| 105 |
+
except Exception: # pragma: no cover - defensive logging only
|
| 106 |
+
logger.exception("Unexpected error during endpoint monitoring")
|
| 107 |
+
await asyncio.sleep(CHECK_INTERVAL_SECONDS)
|
| 108 |
+
|
| 109 |
+
|
| 110 |
+
def _ensure_monitor_task() -> None:
|
| 111 |
+
global _endpoint_monitor_task
|
| 112 |
+
if _endpoint_monitor_task is None or _endpoint_monitor_task.done():
|
| 113 |
+
_endpoint_monitor_task = asyncio.create_task(_poll_endpoint_health())
|
| 114 |
+
|
| 115 |
if settings.cors_allow_origins:
|
| 116 |
app.add_middleware(
|
| 117 |
CORSMiddleware,
|
|
|
|
| 133 |
|
| 134 |
|
| 135 |
@app.get("/")
|
| 136 |
+
async def root() -> Dict[str, Any]:
|
| 137 |
"""Root endpoint used by platform health checks (e.g., HF Spaces)."""
|
| 138 |
+
base_response: Dict[str, Any] = {"status": "ok", "message": "GPT3dev API is running"}
|
| 139 |
+
failures: EndpointStatus = _endpoint_status.get("failures", {})
|
| 140 |
+
if not failures:
|
| 141 |
+
return base_response
|
| 142 |
+
degraded_response = dict(base_response)
|
| 143 |
+
degraded_response["status"] = "degraded"
|
| 144 |
+
degraded_response["issues"] = [
|
| 145 |
+
{"endpoint": path, **details} for path, details in sorted(failures.items())
|
| 146 |
+
]
|
| 147 |
+
last_checked = _endpoint_status.get("last_checked")
|
| 148 |
+
if last_checked:
|
| 149 |
+
degraded_response["last_checked"] = last_checked
|
| 150 |
+
return degraded_response
|
| 151 |
|
| 152 |
|
| 153 |
@app.on_event("startup")
|
|
|
|
| 160 |
except Exception: # pragma: no cover - defensive logging only
|
| 161 |
models = "(unavailable)"
|
| 162 |
logger.info("API startup complete. Log level=%s. Models=[%s]", settings.log_level, models)
|
| 163 |
+
_ensure_monitor_task()
|
| 164 |
+
|
| 165 |
+
|
| 166 |
+
@app.on_event("shutdown")
|
| 167 |
+
async def on_shutdown() -> None:
|
| 168 |
+
global _endpoint_monitor_task
|
| 169 |
+
if _endpoint_monitor_task is not None:
|
| 170 |
+
_endpoint_monitor_task.cancel()
|
| 171 |
+
with contextlib.suppress(asyncio.CancelledError):
|
| 172 |
+
await _endpoint_monitor_task
|
| 173 |
+
_endpoint_monitor_task = None
|
| 174 |
|
| 175 |
|
| 176 |
@app.exception_handler(HTTPException)
|