Spaces:

Dun3Co
/

LogRegModel

Sleeping

App Files Files Community

Dun3Co commited on Oct 9

Commit

2f872a2

verified ·

1 Parent(s): d2ca5be

Upload app.py

Browse files

Files changed (1) hide show

app.py +200 -0

app.py ADDED Viewed

	@@ -0,0 +1,200 @@

+from fastapi import FastAPI
+from pydantic import BaseModel
+import os
+from typing import List, Literal, Optional
+import joblib
+import numpy as np
+import pandas as pd
+import requests
+import shap
+from sklearn.metrics import roc_auc_score, precision_recall_curve, auc
+# =====================================================
+# CONFIG
+# =====================================================
+# Replace these with your NoCoDB API details
+NOCO_API_URL = "https://dun3co-sdc-nocodb.hf.space/api/v2/tables/m39a8axnn3980w9/records"
+NOCO_VIEW_ID = "vwjuv5jnaet9npuu"
+NOCO_API_TOKEN = os.getenv("NOCODB_TOKEN")
+HEADERS = {"xc-token": NOCO_API_TOKEN}
+# =====================================================
+# MODEL LOADING
+# =====================================================
+model = joblib.load("model_1mvp.pkl")
+app = FastAPI(title="Logistic Regression API 2")
+# =====================================================
+# DATA SCHEMAS
+# =====================================================
+class InputData(BaseModel):
+    age: int
+    balance: float
+    day: int
+    campaign: int
+    job: str
+    education: str
+    default: Literal["yes", "no", "unknown"]
+    housing: Literal["yes", "no", "unknown"]
+    loan: Literal["yes", "no", "unknown"]
+    months_since_previous_contact: str
+    n_previous_contacts: str
+    poutcome: str
+    had_contact: bool
+    is_single: bool
+    uknown_contact: bool
+class BatchInputData(BaseModel):
+    data: List[InputData]
+# =====================================================
+# HEALTH CHECK
+# =====================================================
+@app.get("/health")
+def health():
+    return {"status": "ok"}
+# =====================================================
+# NOCODB DATA FETCHING
+# =====================================================
+def fetch_test_data(limit: int = 100):
+    """Fetch test or sample data from NoCoDB view."""
+    params = {"offset": 0, "limit": limit, "viewId": NOCO_VIEW_ID}
+    res = requests.get(NOCO_API_URL, headers=HEADERS, params=params)
+    res.raise_for_status()
+    data = res.json()["list"]
+    return pd.DataFrame(data)
+# =====================================================
+# PREDICTION ENDPOINT
+# =====================================================
+@app.post("/predict")
+def predict(batch: BatchInputData):
+    try:
+        X = pd.DataFrame([item.dict() for item in batch.data])
+        preds = model.predict(X)
+        probs = model.predict_proba(X)[:, 1]
+        return {
+            "predictions": preds.tolist(),
+            "probabilities": probs.tolist()
+        }
+    except Exception as e:
+        import traceback
+        return {"error": str(e), "trace": traceback.format_exc()}
+# =====================================================
+# EXPLAINABILITY ENDPOINT
+# =====================================================
+@app.post("/explain")
+def explain(batch: Optional[BatchInputData] = None, limit: int = 100):
+    """Generate SHAP values either from provided data or from NoCoDB test data."""
+    try:
+        if batch:
+            X = pd.DataFrame([item.dict() for item in batch.data])
+            source = "client batch"
+        else:
+            X = fetch_test_data(limit=limit)
+            source = f"NoCoDB (limit={limit})"
+        print(f"[DEBUG] SHAP explain called using {source} | shape={X.shape} | cols={list(X.columns)}")
+        # Ensure model is a pipeline
+        if hasattr(model, "named_steps"):
+            explainer = shap.Explainer(model.named_steps["classifier"], model.named_steps["preprocessor"].transform(X))
+        else:
+            explainer = shap.Explainer(model, X)
+        shap_values = explainer(X)
+        shap_summary = pd.DataFrame({
+            "feature": X.columns,
+            "mean_abs_shap": np.abs(shap_values.values).mean(axis=0)
+        }).sort_values("mean_abs_shap", ascending=False)
+        print(f"[DEBUG] SHAP summary created successfully with {len(shap_summary)} features.")
+        return {
+            "n_samples": len(X),
+            "shap_summary": shap_summary.to_dict(orient="records")
+        }
+    except Exception as e:
+        import traceback
+        print("[ERROR] SHAP explain failed:", e)
+        print(traceback.format_exc())
+        return {"error": str(e), "trace": traceback.format_exc()}
+# =====================================================
+# METRICS ENDPOINT
+# =====================================================
+@app.post("/metrics")
+def metrics(batch: Optional[BatchInputData] = None, y_true: Optional[List[int]] = None, limit: int = 100):
+    """Compute ROC AUC and threshold analysis, using input or NoCoDB test data."""
+    try:
+        # Use provided data or fallback to test data from NoCoDB
+        if batch:
+            X = pd.DataFrame([item.dict() for item in batch.data])
+        else:
+            X = fetch_test_data(limit=limit)
+        if y_true is None:
+            # Look for 'y_true' column in NoCoDB data
+            if "y_true" in X.columns:
+                y_true = X["y_true"].astype(int).tolist()
+                X = X.drop(columns=["y_true"])
+            else:
+                return {"error": "y_true values not provided or found in dataset"}
+        y_prob = model.predict_proba(X)[:, 1]
+        roc_auc = roc_auc_score(y_true, y_prob)
+        precision, recall, thresholds = precision_recall_curve(y_true, y_prob)
+        pr_auc = auc(recall, precision)
+        return {
+            "roc_auc": roc_auc,
+            "pr_auc": pr_auc,
+            "thresholds": thresholds.tolist()[:20],  # limit output size
+            "precision": precision.tolist()[:20],
+            "recall": recall.tolist()[:20]
+        }
+    except Exception as e:
+        import traceback
+        return {"error": str(e), "trace": traceback.format_exc()}
+@app.get("/coefficients")
+def coefficients():
+    """
+    Return logistic regression coefficients and feature names.
+    Works if your model is a pipeline with 'preprocessor' and 'classifier' steps.
+    """
+    try:
+        # Extract classifier and preprocessor
+        classifier = model.named_steps["classifier"]
+        preprocessor = model.named_steps["preprocessor"]
+        # Get feature names after preprocessing
+        feature_names = preprocessor.get_feature_names_out()
+        # Get coefficients
+        coefficients = classifier.coef_[0]
+        df = pd.DataFrame({
+            "feature": feature_names,
+            "coefficient": coefficients.tolist()
+        })
+        return {"coefficients": df.to_dict(orient="records")}
+    except Exception as e:
+        import traceback
+        return {"error": str(e), "trace": traceback.format_exc()}