Spaces:
Sleeping
Sleeping
| from fastapi import FastAPI | |
| from pydantic import BaseModel | |
| import os | |
| from typing import List, Literal, Optional | |
| import joblib | |
| import numpy as np | |
| import pandas as pd | |
| import requests | |
| import shap | |
| from sklearn.metrics import roc_auc_score, precision_recall_curve, auc | |
| # ===================================================== | |
| # CONFIG | |
| # ===================================================== | |
| # Replace these with your NoCoDB API details | |
| NOCO_API_URL = "https://dun3co-sdc-nocodb.hf.space/api/v2/tables/m39a8axnn3980w9/records" | |
| NOCO_VIEW_ID = "vwjuv5jnaet9npuu" | |
| NOCO_API_TOKEN = os.getenv("NOCODB_TOKEN") | |
| HEADERS = {"xc-token": NOCO_API_TOKEN} | |
| # ===================================================== | |
| # MODEL LOADING | |
| # ===================================================== | |
| model = joblib.load("model_1mvp.pkl") | |
| app = FastAPI(title="Logistic Regression API 2") | |
| # ===================================================== | |
| # DATA SCHEMAS | |
| # ===================================================== | |
| class InputData(BaseModel): | |
| age: int | |
| balance: float | |
| day: int | |
| campaign: int | |
| job: str | |
| education: str | |
| default: Literal["yes", "no", "unknown"] | |
| housing: Literal["yes", "no", "unknown"] | |
| loan: Literal["yes", "no", "unknown"] | |
| months_since_previous_contact: str | |
| n_previous_contacts: str | |
| poutcome: str | |
| had_contact: bool | |
| is_single: bool | |
| uknown_contact: bool | |
| class BatchInputData(BaseModel): | |
| data: List[InputData] | |
| # ===================================================== | |
| # HEALTH CHECK | |
| # ===================================================== | |
| def health(): | |
| return {"status": "ok"} | |
| # ===================================================== | |
| # NOCODB DATA FETCHING | |
| # ===================================================== | |
| def fetch_test_data(limit: int = 100): | |
| """Fetch test or sample data from NoCoDB view.""" | |
| params = {"offset": 0, "limit": limit, "viewId": NOCO_VIEW_ID} | |
| res = requests.get(NOCO_API_URL, headers=HEADERS, params=params) | |
| res.raise_for_status() | |
| data = res.json()["list"] | |
| return pd.DataFrame(data) | |
| # ===================================================== | |
| # PREDICTION ENDPOINT | |
| # ===================================================== | |
| def predict(batch: BatchInputData): | |
| try: | |
| X = pd.DataFrame([item.dict() for item in batch.data]) | |
| preds = model.predict(X) | |
| probs = model.predict_proba(X)[:, 1] | |
| return { | |
| "predictions": preds.tolist(), | |
| "probabilities": probs.tolist() | |
| } | |
| except Exception as e: | |
| import traceback | |
| return {"error": str(e), "trace": traceback.format_exc()} | |
| # ===================================================== | |
| # EXPLAINABILITY ENDPOINT | |
| # ===================================================== | |
| def explain(batch: Optional[BatchInputData] = None, limit: int = 100): | |
| """Generate SHAP values either from provided data or from NoCoDB test data.""" | |
| try: | |
| if batch: | |
| X = pd.DataFrame([item.dict() for item in batch.data]) | |
| source = "client batch" | |
| else: | |
| X = fetch_test_data(limit=limit) | |
| source = f"NoCoDB (limit={limit})" | |
| print(f"[DEBUG] SHAP explain called using {source} | shape={X.shape} | cols={list(X.columns)}") | |
| # Remove ID and target columns if they exist | |
| drop_cols = [c for c in ["Id", "y", "target"] if c in X.columns] | |
| if drop_cols: | |
| print(f"[DEBUG] Dropping columns not used for prediction: {drop_cols}") | |
| X = X.drop(columns=drop_cols) | |
| # Handle pipelines correctly | |
| if hasattr(model, "named_steps"): | |
| preprocessor = model.named_steps["preprocessor"] | |
| classifier = model.named_steps["classifier"] | |
| X_transformed = preprocessor.transform(X) | |
| feature_names = preprocessor.get_feature_names_out() | |
| print(f"[DEBUG] Transformed shape: {X_transformed.shape} | n_features={len(feature_names)}") | |
| explainer = shap.Explainer(classifier, X_transformed) | |
| shap_values = explainer(X_transformed) | |
| shap_summary = pd.DataFrame({ | |
| "feature": feature_names, | |
| "mean_abs_shap": np.abs(shap_values.values).mean(axis=0) | |
| }).sort_values("mean_abs_shap", ascending=False) | |
| else: | |
| # If model is not a pipeline | |
| explainer = shap.Explainer(model, X) | |
| shap_values = explainer(X) | |
| shap_summary = pd.DataFrame({ | |
| "feature": X.columns, | |
| "mean_abs_shap": np.abs(shap_values.values).mean(axis=0) | |
| }).sort_values("mean_abs_shap", ascending=False) | |
| print(f"[DEBUG] SHAP summary created successfully with {len(shap_summary)} features.") | |
| return {"n_samples": len(X), "shap_summary": shap_summary.to_dict(orient="records")} | |
| except Exception as e: | |
| import traceback | |
| print("[ERROR] SHAP explain failed:", e) | |
| print(traceback.format_exc()) | |
| return {"error": str(e), "trace": traceback.format_exc()} | |
| # ===================================================== | |
| # METRICS ENDPOINT | |
| # ===================================================== | |
| def metrics(batch: Optional[BatchInputData] = None, limit: int = 100): | |
| """ | |
| Compute ROC AUC and threshold analysis using input or NoCoDB test data. | |
| Assumes the target column 'y' is boolean (True/False). | |
| """ | |
| # Defaults in case something fails | |
| roc_auc = None | |
| pr_auc = None | |
| thresholds = [] | |
| precision = [] | |
| recall = [] | |
| try: | |
| # Fetch data from batch or NoCoDB | |
| if batch: | |
| X = pd.DataFrame([item.dict() for item in batch.data]) | |
| source = "client batch" | |
| else: | |
| X = fetch_test_data(limit=limit) | |
| source = f"NoCoDB (limit={limit})" | |
| print(f"[DEBUG] Metrics called using {source} | shape={X.shape}") | |
| # Ensure target 'y' exists | |
| if "y" not in X.columns: | |
| return {"error": "No target column 'y' found in dataset."} | |
| # Robust conversion to int | |
| y_true = X["y"].map(lambda v: 1 if v in [True, "True", "true", 1] else 0).tolist() | |
| print(f"[DEBUG] Found {sum(y_true)} positive cases out of {len(y_true)}") | |
| X = X.drop(columns=["y"]) | |
| # Drop ID if exists | |
| if "Id" in X.columns: | |
| X = X.drop(columns=["Id"]) | |
| # Predict probabilities | |
| y_prob = model.predict_proba(X)[:, 1] | |
| # Compute metrics | |
| roc_auc = roc_auc_score(y_true, y_prob) | |
| precision, recall, thresholds = precision_recall_curve(y_true, y_prob) | |
| pr_auc = auc(recall, precision) | |
| print(f"[DEBUG] ROC AUC={roc_auc:.3f} | PR AUC={pr_auc:.3f}") | |
| return { | |
| "roc_auc": roc_auc, | |
| "pr_auc": pr_auc, | |
| "thresholds": thresholds.tolist()[:20], | |
| "precision": precision.tolist()[:20], | |
| "recall": recall.tolist()[:20] | |
| } | |
| except Exception as e: | |
| import traceback | |
| print("[ERROR] Metrics failed:", e) | |
| print(traceback.format_exc()) | |
| return {"error": str(e), "trace": traceback.format_exc()} | |
| def coefficients(): | |
| """ | |
| Return logistic regression coefficients and feature names. | |
| Works if your model is a pipeline with 'preprocessor' and 'classifier' steps. | |
| """ | |
| try: | |
| # Extract classifier and preprocessor | |
| classifier = model.named_steps["classifier"] | |
| preprocessor = model.named_steps["preprocessor"] | |
| # Get feature names after preprocessing | |
| feature_names = preprocessor.get_feature_names_out() | |
| # Get coefficients | |
| coefficients = classifier.coef_[0] | |
| df = pd.DataFrame({ | |
| "feature": feature_names, | |
| "coefficient": coefficients.tolist() | |
| }) | |
| return {"coefficients": df.to_dict(orient="records")} | |
| except Exception as e: | |
| import traceback | |
| return {"error": str(e), "trace": traceback.format_exc()} | |