from fastapi import FastAPI from pydantic import BaseModel import os from typing import List, Literal, Optional import joblib import numpy as np import pandas as pd import requests import shap from sklearn.metrics import roc_auc_score, precision_recall_curve, auc # ===================================================== # CONFIG # ===================================================== # Replace these with your NoCoDB API details NOCO_API_URL = "https://dun3co-sdc-nocodb.hf.space/api/v2/tables/m39a8axnn3980w9/records" NOCO_VIEW_ID = "vwjuv5jnaet9npuu" NOCO_API_TOKEN = os.getenv("NOCODB_TOKEN") HEADERS = {"xc-token": NOCO_API_TOKEN} # ===================================================== # MODEL LOADING # ===================================================== model = joblib.load("model_1mvp.pkl") app = FastAPI(title="Logistic Regression API 2") # ===================================================== # DATA SCHEMAS # ===================================================== class InputData(BaseModel): age: int balance: float day: int campaign: int job: str education: str default: Literal["yes", "no", "unknown"] housing: Literal["yes", "no", "unknown"] loan: Literal["yes", "no", "unknown"] months_since_previous_contact: str n_previous_contacts: str poutcome: str had_contact: bool is_single: bool uknown_contact: bool class BatchInputData(BaseModel): data: List[InputData] # ===================================================== # HEALTH CHECK # ===================================================== @app.get("/health") def health(): return {"status": "ok"} # ===================================================== # NOCODB DATA FETCHING # ===================================================== def fetch_test_data(limit: int = 100): """Fetch test or sample data from NoCoDB view.""" params = {"offset": 0, "limit": limit, "viewId": NOCO_VIEW_ID} res = requests.get(NOCO_API_URL, headers=HEADERS, params=params) res.raise_for_status() data = res.json()["list"] return pd.DataFrame(data) # ===================================================== # PREDICTION ENDPOINT # ===================================================== @app.post("/predict") def predict(batch: BatchInputData): try: X = pd.DataFrame([item.dict() for item in batch.data]) preds = model.predict(X) probs = model.predict_proba(X)[:, 1] return { "predictions": preds.tolist(), "probabilities": probs.tolist() } except Exception as e: import traceback return {"error": str(e), "trace": traceback.format_exc()} # ===================================================== # EXPLAINABILITY ENDPOINT # ===================================================== @app.post("/explain") def explain(batch: Optional[BatchInputData] = None, limit: int = 100): """Generate SHAP values either from provided data or from NoCoDB test data.""" try: if batch: X = pd.DataFrame([item.dict() for item in batch.data]) source = "client batch" else: X = fetch_test_data(limit=limit) source = f"NoCoDB (limit={limit})" print(f"[DEBUG] SHAP explain called using {source} | shape={X.shape} | cols={list(X.columns)}") # Remove ID and target columns if they exist drop_cols = [c for c in ["Id", "y", "target"] if c in X.columns] if drop_cols: print(f"[DEBUG] Dropping columns not used for prediction: {drop_cols}") X = X.drop(columns=drop_cols) # Handle pipelines correctly if hasattr(model, "named_steps"): preprocessor = model.named_steps["preprocessor"] classifier = model.named_steps["classifier"] X_transformed = preprocessor.transform(X) feature_names = preprocessor.get_feature_names_out() print(f"[DEBUG] Transformed shape: {X_transformed.shape} | n_features={len(feature_names)}") explainer = shap.Explainer(classifier, X_transformed) shap_values = explainer(X_transformed) shap_summary = pd.DataFrame({ "feature": feature_names, "mean_abs_shap": np.abs(shap_values.values).mean(axis=0) }).sort_values("mean_abs_shap", ascending=False) else: # If model is not a pipeline explainer = shap.Explainer(model, X) shap_values = explainer(X) shap_summary = pd.DataFrame({ "feature": X.columns, "mean_abs_shap": np.abs(shap_values.values).mean(axis=0) }).sort_values("mean_abs_shap", ascending=False) print(f"[DEBUG] SHAP summary created successfully with {len(shap_summary)} features.") return {"n_samples": len(X), "shap_summary": shap_summary.to_dict(orient="records")} except Exception as e: import traceback print("[ERROR] SHAP explain failed:", e) print(traceback.format_exc()) return {"error": str(e), "trace": traceback.format_exc()} # ===================================================== # METRICS ENDPOINT # ===================================================== @app.post("/metrics") def metrics(batch: Optional[BatchInputData] = None, limit: int = 100): """ Compute ROC AUC and threshold analysis using input or NoCoDB test data. Assumes the target column 'y' is boolean (True/False). """ # Defaults in case something fails roc_auc = None pr_auc = None thresholds = [] precision = [] recall = [] try: # Fetch data from batch or NoCoDB if batch: X = pd.DataFrame([item.dict() for item in batch.data]) source = "client batch" else: X = fetch_test_data(limit=limit) source = f"NoCoDB (limit={limit})" print(f"[DEBUG] Metrics called using {source} | shape={X.shape}") # Ensure target 'y' exists if "y" not in X.columns: return {"error": "No target column 'y' found in dataset."} # Robust conversion to int y_true = X["y"].map(lambda v: 1 if v in [True, "True", "true", 1] else 0).tolist() print(f"[DEBUG] Found {sum(y_true)} positive cases out of {len(y_true)}") X = X.drop(columns=["y"]) # Drop ID if exists if "Id" in X.columns: X = X.drop(columns=["Id"]) # Predict probabilities y_prob = model.predict_proba(X)[:, 1] # Compute metrics roc_auc = roc_auc_score(y_true, y_prob) precision, recall, thresholds = precision_recall_curve(y_true, y_prob) pr_auc = auc(recall, precision) print(f"[DEBUG] ROC AUC={roc_auc:.3f} | PR AUC={pr_auc:.3f}") return { "roc_auc": roc_auc, "pr_auc": pr_auc, "thresholds": thresholds.tolist()[:20], "precision": precision.tolist()[:20], "recall": recall.tolist()[:20] } except Exception as e: import traceback print("[ERROR] Metrics failed:", e) print(traceback.format_exc()) return {"error": str(e), "trace": traceback.format_exc()} @app.get("/coefficients") def coefficients(): """ Return logistic regression coefficients and feature names. Works if your model is a pipeline with 'preprocessor' and 'classifier' steps. """ try: # Extract classifier and preprocessor classifier = model.named_steps["classifier"] preprocessor = model.named_steps["preprocessor"] # Get feature names after preprocessing feature_names = preprocessor.get_feature_names_out() # Get coefficients coefficients = classifier.coef_[0] df = pd.DataFrame({ "feature": feature_names, "coefficient": coefficients.tolist() }) return {"coefficients": df.to_dict(orient="records")} except Exception as e: import traceback return {"error": str(e), "trace": traceback.format_exc()}