Spaces:
Sleeping
Sleeping
File size: 8,111 Bytes
f87d4de 259fc98 f87d4de 259fc98 f87d4de 3fcc1a9 f87d4de 3fcc1a9 259fc98 3fcc1a9 b29148a 2b2b073 259fc98 f87d4de 3fcc1a9 f87d4de 3fcc1a9 f87d4de 3fcc1a9 259fc98 3fcc1a9 f87d4de 3fcc1a9 f87d4de 3fcc1a9 f87d4de 3fcc1a9 f87d4de 3fcc1a9 259fc98 f87d4de |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 |
from fastapi import FastAPI
from pydantic import BaseModel
import os
from typing import List, Literal, Optional
import joblib
import numpy as np
import pandas as pd
import requests
import shap
from sklearn.metrics import roc_auc_score, precision_recall_curve, auc
# =====================================================
# CONFIG
# =====================================================
# Replace these with your NoCoDB API details
NOCO_API_URL = "https://dun3co-sdc-nocodb.hf.space/api/v2/tables/m39a8axnn3980w9/records"
NOCO_VIEW_ID = "vwjuv5jnaet9npuu"
NOCO_API_TOKEN = os.getenv("NOCODB_TOKEN")
HEADERS = {"xc-token": NOCO_API_TOKEN}
# =====================================================
# MODEL LOADING
# =====================================================
model = joblib.load("model_1mvp.pkl")
app = FastAPI(title="Logistic Regression API 2")
# =====================================================
# DATA SCHEMAS
# =====================================================
class InputData(BaseModel):
age: int
balance: float
day: int
campaign: int
job: str
education: str
default: Literal["yes", "no", "unknown"]
housing: Literal["yes", "no", "unknown"]
loan: Literal["yes", "no", "unknown"]
months_since_previous_contact: str
n_previous_contacts: str
poutcome: str
had_contact: bool
is_single: bool
uknown_contact: bool
class BatchInputData(BaseModel):
data: List[InputData]
# =====================================================
# HEALTH CHECK
# =====================================================
@app.get("/health")
def health():
return {"status": "ok"}
# =====================================================
# NOCODB DATA FETCHING
# =====================================================
def fetch_test_data(limit: int = 100):
"""Fetch test or sample data from NoCoDB view."""
params = {"offset": 0, "limit": limit, "viewId": NOCO_VIEW_ID}
res = requests.get(NOCO_API_URL, headers=HEADERS, params=params)
res.raise_for_status()
data = res.json()["list"]
return pd.DataFrame(data)
# =====================================================
# PREDICTION ENDPOINT
# =====================================================
@app.post("/predict")
def predict(batch: BatchInputData):
try:
X = pd.DataFrame([item.dict() for item in batch.data])
preds = model.predict(X)
probs = model.predict_proba(X)[:, 1]
return {
"predictions": preds.tolist(),
"probabilities": probs.tolist()
}
except Exception as e:
import traceback
return {"error": str(e), "trace": traceback.format_exc()}
# =====================================================
# EXPLAINABILITY ENDPOINT
# =====================================================
@app.post("/explain")
def explain(batch: Optional[BatchInputData] = None, limit: int = 100):
"""Generate SHAP values either from provided data or from NoCoDB test data."""
try:
if batch:
X = pd.DataFrame([item.dict() for item in batch.data])
source = "client batch"
else:
X = fetch_test_data(limit=limit)
source = f"NoCoDB (limit={limit})"
print(f"[DEBUG] SHAP explain called using {source} | shape={X.shape} | cols={list(X.columns)}")
# Remove ID and target columns if they exist
drop_cols = [c for c in ["Id", "y", "target"] if c in X.columns]
if drop_cols:
print(f"[DEBUG] Dropping columns not used for prediction: {drop_cols}")
X = X.drop(columns=drop_cols)
# Handle pipelines correctly
if hasattr(model, "named_steps"):
preprocessor = model.named_steps["preprocessor"]
classifier = model.named_steps["classifier"]
X_transformed = preprocessor.transform(X)
feature_names = preprocessor.get_feature_names_out()
print(f"[DEBUG] Transformed shape: {X_transformed.shape} | n_features={len(feature_names)}")
explainer = shap.Explainer(classifier, X_transformed)
shap_values = explainer(X_transformed)
shap_summary = pd.DataFrame({
"feature": feature_names,
"mean_abs_shap": np.abs(shap_values.values).mean(axis=0)
}).sort_values("mean_abs_shap", ascending=False)
else:
# If model is not a pipeline
explainer = shap.Explainer(model, X)
shap_values = explainer(X)
shap_summary = pd.DataFrame({
"feature": X.columns,
"mean_abs_shap": np.abs(shap_values.values).mean(axis=0)
}).sort_values("mean_abs_shap", ascending=False)
print(f"[DEBUG] SHAP summary created successfully with {len(shap_summary)} features.")
return {"n_samples": len(X), "shap_summary": shap_summary.to_dict(orient="records")}
except Exception as e:
import traceback
print("[ERROR] SHAP explain failed:", e)
print(traceback.format_exc())
return {"error": str(e), "trace": traceback.format_exc()}
# =====================================================
# METRICS ENDPOINT
# =====================================================
@app.post("/metrics")
def metrics(batch: Optional[BatchInputData] = None, limit: int = 100):
"""
Compute ROC AUC and threshold analysis using input or NoCoDB test data.
Assumes the target column 'y' is boolean (True/False).
"""
# Defaults in case something fails
roc_auc = None
pr_auc = None
thresholds = []
precision = []
recall = []
try:
# Fetch data from batch or NoCoDB
if batch:
X = pd.DataFrame([item.dict() for item in batch.data])
source = "client batch"
else:
X = fetch_test_data(limit=limit)
source = f"NoCoDB (limit={limit})"
print(f"[DEBUG] Metrics called using {source} | shape={X.shape}")
# Ensure target 'y' exists
if "y" not in X.columns:
return {"error": "No target column 'y' found in dataset."}
# Robust conversion to int
y_true = X["y"].map(lambda v: 1 if v in [True, "True", "true", 1] else 0).tolist()
print(f"[DEBUG] Found {sum(y_true)} positive cases out of {len(y_true)}")
X = X.drop(columns=["y"])
# Drop ID if exists
if "Id" in X.columns:
X = X.drop(columns=["Id"])
# Predict probabilities
y_prob = model.predict_proba(X)[:, 1]
# Compute metrics
roc_auc = roc_auc_score(y_true, y_prob)
precision, recall, thresholds = precision_recall_curve(y_true, y_prob)
pr_auc = auc(recall, precision)
print(f"[DEBUG] ROC AUC={roc_auc:.3f} | PR AUC={pr_auc:.3f}")
return {
"roc_auc": roc_auc,
"pr_auc": pr_auc,
"thresholds": thresholds.tolist()[:20],
"precision": precision.tolist()[:20],
"recall": recall.tolist()[:20]
}
except Exception as e:
import traceback
print("[ERROR] Metrics failed:", e)
print(traceback.format_exc())
return {"error": str(e), "trace": traceback.format_exc()}
@app.get("/coefficients")
def coefficients():
"""
Return logistic regression coefficients and feature names.
Works if your model is a pipeline with 'preprocessor' and 'classifier' steps.
"""
try:
# Extract classifier and preprocessor
classifier = model.named_steps["classifier"]
preprocessor = model.named_steps["preprocessor"]
# Get feature names after preprocessing
feature_names = preprocessor.get_feature_names_out()
# Get coefficients
coefficients = classifier.coef_[0]
df = pd.DataFrame({
"feature": feature_names,
"coefficient": coefficients.tolist()
})
return {"coefficients": df.to_dict(orient="records")}
except Exception as e:
import traceback
return {"error": str(e), "trace": traceback.format_exc()}
|