Spaces:

Dun3Co
/

LogRegModel

Sleeping

App Files Files Community

Dun3Co commited on Oct 9

Commit

f87d4de

verified ·

1 Parent(s): 2f872a2

Update app.py

Browse files

Files changed (1) hide show

app.py +216 -200

app.py CHANGED Viewed

@@ -1,200 +1,216 @@
-from fastapi import FastAPI
-from pydantic import BaseModel
-import os
-from typing import List, Literal, Optional
-import joblib
-import numpy as np
-import pandas as pd
-import requests
-import shap
-from sklearn.metrics import roc_auc_score, precision_recall_curve, auc
-# =====================================================
-# CONFIG
-# =====================================================
-# Replace these with your NoCoDB API details
-NOCO_API_URL = "https://dun3co-sdc-nocodb.hf.space/api/v2/tables/m39a8axnn3980w9/records"
-NOCO_VIEW_ID = "vwjuv5jnaet9npuu"
-NOCO_API_TOKEN = os.getenv("NOCODB_TOKEN")
-HEADERS = {"xc-token": NOCO_API_TOKEN}
-# =====================================================
-# MODEL LOADING
-# =====================================================
-model = joblib.load("model_1mvp.pkl")
-app = FastAPI(title="Logistic Regression API 2")
-# =====================================================
-# DATA SCHEMAS
-# =====================================================
-class InputData(BaseModel):
-    age: int
-    balance: float
-    day: int
-    campaign: int
-    job: str
-    education: str
-    default: Literal["yes", "no", "unknown"]
-    housing: Literal["yes", "no", "unknown"]
-    loan: Literal["yes", "no", "unknown"]
-    months_since_previous_contact: str
-    n_previous_contacts: str
-    poutcome: str
-    had_contact: bool
-    is_single: bool
-    uknown_contact: bool
-class BatchInputData(BaseModel):
-    data: List[InputData]
-# =====================================================
-# HEALTH CHECK
-# =====================================================
-@app.get("/health")
-def health():
-    return {"status": "ok"}
-# =====================================================
-# NOCODB DATA FETCHING
-# =====================================================
-def fetch_test_data(limit: int = 100):
-    """Fetch test or sample data from NoCoDB view."""
-    params = {"offset": 0, "limit": limit, "viewId": NOCO_VIEW_ID}
-    res = requests.get(NOCO_API_URL, headers=HEADERS, params=params)
-    res.raise_for_status()
-    data = res.json()["list"]
-    return pd.DataFrame(data)
-# =====================================================
-# PREDICTION ENDPOINT
-# =====================================================
-@app.post("/predict")
-def predict(batch: BatchInputData):
-    try:
-        X = pd.DataFrame([item.dict() for item in batch.data])
-        preds = model.predict(X)
-        probs = model.predict_proba(X)[:, 1]
-        return {
-            "predictions": preds.tolist(),
-            "probabilities": probs.tolist()
-        }
-    except Exception as e:
-        import traceback
-        return {"error": str(e), "trace": traceback.format_exc()}
-# =====================================================
-# EXPLAINABILITY ENDPOINT
-# =====================================================
-@app.post("/explain")
-def explain(batch: Optional[BatchInputData] = None, limit: int = 100):
-    """Generate SHAP values either from provided data or from NoCoDB test data."""
-    try:
-        if batch:
-            X = pd.DataFrame([item.dict() for item in batch.data])
-            source = "client batch"
-        else:
-            X = fetch_test_data(limit=limit)
-            source = f"NoCoDB (limit={limit})"
-        print(f"[DEBUG] SHAP explain called using {source} | shape={X.shape} | cols={list(X.columns)}")
-        # Ensure model is a pipeline
-        if hasattr(model, "named_steps"):
-            explainer = shap.Explainer(model.named_steps["classifier"], model.named_steps["preprocessor"].transform(X))
-        else:
-            explainer = shap.Explainer(model, X)
-        shap_values = explainer(X)
-        shap_summary = pd.DataFrame({
-            "feature": X.columns,
-            "mean_abs_shap": np.abs(shap_values.values).mean(axis=0)
-        }).sort_values("mean_abs_shap", ascending=False)
-        print(f"[DEBUG] SHAP summary created successfully with {len(shap_summary)} features.")
-        return {
-            "n_samples": len(X),
-            "shap_summary": shap_summary.to_dict(orient="records")
-        }
-    except Exception as e:
-        import traceback
-        print("[ERROR] SHAP explain failed:", e)
-        print(traceback.format_exc())
-        return {"error": str(e), "trace": traceback.format_exc()}
-# =====================================================
-# METRICS ENDPOINT
-# =====================================================
-@app.post("/metrics")
-def metrics(batch: Optional[BatchInputData] = None, y_true: Optional[List[int]] = None, limit: int = 100):
-    """Compute ROC AUC and threshold analysis, using input or NoCoDB test data."""
-    try:
-        # Use provided data or fallback to test data from NoCoDB
-        if batch:
-            X = pd.DataFrame([item.dict() for item in batch.data])
-        else:
-            X = fetch_test_data(limit=limit)
-        if y_true is None:
-            # Look for 'y_true' column in NoCoDB data
-            if "y_true" in X.columns:
-                y_true = X["y_true"].astype(int).tolist()
-                X = X.drop(columns=["y_true"])
-            else:
-                return {"error": "y_true values not provided or found in dataset"}
-        y_prob = model.predict_proba(X)[:, 1]
-        roc_auc = roc_auc_score(y_true, y_prob)
-        precision, recall, thresholds = precision_recall_curve(y_true, y_prob)
-        pr_auc = auc(recall, precision)
-        return {
-            "roc_auc": roc_auc,
-            "pr_auc": pr_auc,
-            "thresholds": thresholds.tolist()[:20],  # limit output size
-            "precision": precision.tolist()[:20],
-            "recall": recall.tolist()[:20]
-        }
-    except Exception as e:
-        import traceback
-        return {"error": str(e), "trace": traceback.format_exc()}
-@app.get("/coefficients")
-def coefficients():
-    """
-    Return logistic regression coefficients and feature names.
-    Works if your model is a pipeline with 'preprocessor' and 'classifier' steps.
-    """
-    try:
-        # Extract classifier and preprocessor
-        classifier = model.named_steps["classifier"]
-        preprocessor = model.named_steps["preprocessor"]
-        # Get feature names after preprocessing
-        feature_names = preprocessor.get_feature_names_out()
-        # Get coefficients
-        coefficients = classifier.coef_[0]
-        df = pd.DataFrame({
-            "feature": feature_names,
-            "coefficient": coefficients.tolist()
-        })
-        return {"coefficients": df.to_dict(orient="records")}
-    except Exception as e:
-        import traceback
-        return {"error": str(e), "trace": traceback.format_exc()}

+from fastapi import FastAPI
+from pydantic import BaseModel
+import os
+from typing import List, Literal, Optional
+import joblib
+import numpy as np
+import pandas as pd
+import requests
+import shap
+from sklearn.metrics import roc_auc_score, precision_recall_curve, auc
+# =====================================================
+# CONFIG
+# =====================================================
+# Replace these with your NoCoDB API details
+NOCO_API_URL = "https://dun3co-sdc-nocodb.hf.space/api/v2/tables/m39a8axnn3980w9/records"
+NOCO_VIEW_ID = "vwjuv5jnaet9npuu"
+NOCO_API_TOKEN = os.getenv("NOCODB_TOKEN")
+HEADERS = {"xc-token": NOCO_API_TOKEN}
+# =====================================================
+# MODEL LOADING
+# =====================================================
+model = joblib.load("model_1mvp.pkl")
+app = FastAPI(title="Logistic Regression API 2")
+# =====================================================
+# DATA SCHEMAS
+# =====================================================
+class InputData(BaseModel):
+    age: int
+    balance: float
+    day: int
+    campaign: int
+    job: str
+    education: str
+    default: Literal["yes", "no", "unknown"]
+    housing: Literal["yes", "no", "unknown"]
+    loan: Literal["yes", "no", "unknown"]
+    months_since_previous_contact: str
+    n_previous_contacts: str
+    poutcome: str
+    had_contact: bool
+    is_single: bool
+    uknown_contact: bool
+class BatchInputData(BaseModel):
+    data: List[InputData]
+# =====================================================
+# HEALTH CHECK
+# =====================================================
+@app.get("/health")
+def health():
+    return {"status": "ok"}
+# =====================================================
+# NOCODB DATA FETCHING
+# =====================================================
+def fetch_test_data(limit: int = 100):
+    """Fetch test or sample data from NoCoDB view."""
+    params = {"offset": 0, "limit": limit, "viewId": NOCO_VIEW_ID}
+    res = requests.get(NOCO_API_URL, headers=HEADERS, params=params)
+    res.raise_for_status()
+    data = res.json()["list"]
+    return pd.DataFrame(data)
+# =====================================================
+# PREDICTION ENDPOINT
+# =====================================================
+@app.post("/predict")
+def predict(batch: BatchInputData):
+    try:
+        X = pd.DataFrame([item.dict() for item in batch.data])
+        preds = model.predict(X)
+        probs = model.predict_proba(X)[:, 1]
+        return {
+            "predictions": preds.tolist(),
+            "probabilities": probs.tolist()
+        }
+    except Exception as e:
+        import traceback
+        return {"error": str(e), "trace": traceback.format_exc()}
+# =====================================================
+# EXPLAINABILITY ENDPOINT
+# =====================================================
+@app.post("/explain")
+def explain(batch: Optional[BatchInputData] = None, limit: int = 100):
+    """Generate SHAP values either from provided data or from NoCoDB test data."""
+    try:
+        if batch:
+            X = pd.DataFrame([item.dict() for item in batch.data])
+            source = "client batch"
+        else:
+            X = fetch_test_data(limit=limit)
+            source = f"NoCoDB (limit={limit})"
+        print(f"[DEBUG] SHAP explain called using {source} | shape={X.shape} | cols={list(X.columns)}")
+        # Remove ID and target columns if they exist
+        drop_cols = [c for c in ["Id", "y", "target"] if c in X.columns]
+        if drop_cols:
+            print(f"[DEBUG] Dropping columns not used for prediction: {drop_cols}")
+            X = X.drop(columns=drop_cols)
+        # Handle pipelines correctly
+        if hasattr(model, "named_steps"):
+            preprocessor = model.named_steps["preprocessor"]
+            classifier = model.named_steps["classifier"]
+            X_transformed = preprocessor.transform(X)
+            feature_names = preprocessor.get_feature_names_out()
+            print(f"[DEBUG] Transformed shape: {X_transformed.shape} | n_features={len(feature_names)}")
+            explainer = shap.Explainer(classifier, X_transformed)
+            shap_values = explainer(X_transformed)
+            shap_summary = pd.DataFrame({
+                "feature": feature_names,
+                "mean_abs_shap": np.abs(shap_values.values).mean(axis=0)
+            }).sort_values("mean_abs_shap", ascending=False)
+        else:
+            # If model is not a pipeline
+            explainer = shap.Explainer(model, X)
+            shap_values = explainer(X)
+            shap_summary = pd.DataFrame({
+                "feature": X.columns,
+                "mean_abs_shap": np.abs(shap_values.values).mean(axis=0)
+            }).sort_values("mean_abs_shap", ascending=False)
+        print(f"[DEBUG] SHAP summary created successfully with {len(shap_summary)} features.")
+        return {"n_samples": len(X), "shap_summary": shap_summary.to_dict(orient="records")}
+    except Exception as e:
+        import traceback
+        print("[ERROR] SHAP explain failed:", e)
+        print(traceback.format_exc())
+        return {"error": str(e), "trace": traceback.format_exc()}
+# =====================================================
+# METRICS ENDPOINT
+# =====================================================
+@app.post("/metrics")
+def metrics(batch: Optional[BatchInputData] = None, y: Optional[List[int]] = None, limit: int = 100):
+    """Compute ROC AUC and threshold analysis, using input or NoCoDB test data."""
+    try:
+        # Use provided data or fallback to test data from NoCoDB
+        if batch:
+            X = pd.DataFrame([item.dict() for item in batch.data])
+        else:
+            X = fetch_test_data(limit=limit)
+        if y is None:
+            # Look for 'y_true' column in NoCoDB data
+            if "y_" in X.columns:
+                y = X["y"].astype(int).tolist()
+                X = X.drop(columns=["y"])
+            else:
+                return {"error": "ye values not provided or found in dataset"}
+        y_prob = model.predict_proba(X)[:, 1]
+        roc_auc = roc_auc_score(y, y_prob)
+        precision, recall, thresholds = precision_recall_curve(y, y_prob)
+        pr_auc = auc(recall, precision)
+        return {
+            "roc_auc": roc_auc,
+            "pr_auc": pr_auc,
+            "thresholds": thresholds.tolist()[:20],  # limit output size
+            "precision": precision.tolist()[:20],
+            "recall": recall.tolist()[:20]
+        }
+    except Exception as e:
+        import traceback
+        return {"error": str(e), "trace": traceback.format_exc()}
+@app.get("/coefficients")
+def coefficients():
+    """
+    Return logistic regression coefficients and feature names.
+    Works if your model is a pipeline with 'preprocessor' and 'classifier' steps.
+    """
+    try:
+        # Extract classifier and preprocessor
+        classifier = model.named_steps["classifier"]
+        preprocessor = model.named_steps["preprocessor"]
+        # Get feature names after preprocessing
+        feature_names = preprocessor.get_feature_names_out()
+        # Get coefficients
+        coefficients = classifier.coef_[0]
+        df = pd.DataFrame({
+            "feature": feature_names,
+            "coefficient": coefficients.tolist()
+        })
+        return {"coefficients": df.to_dict(orient="records")}
+    except Exception as e:
+        import traceback
+        return {"error": str(e), "trace": traceback.format_exc()}