Dun3Co commited on
Commit
2f872a2
·
verified ·
1 Parent(s): d2ca5be

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +200 -0
app.py ADDED
@@ -0,0 +1,200 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from pydantic import BaseModel
3
+ import os
4
+ from typing import List, Literal, Optional
5
+ import joblib
6
+ import numpy as np
7
+ import pandas as pd
8
+ import requests
9
+ import shap
10
+ from sklearn.metrics import roc_auc_score, precision_recall_curve, auc
11
+
12
+ # =====================================================
13
+ # CONFIG
14
+ # =====================================================
15
+
16
+ # Replace these with your NoCoDB API details
17
+ NOCO_API_URL = "https://dun3co-sdc-nocodb.hf.space/api/v2/tables/m39a8axnn3980w9/records"
18
+ NOCO_VIEW_ID = "vwjuv5jnaet9npuu"
19
+ NOCO_API_TOKEN = os.getenv("NOCODB_TOKEN")
20
+
21
+ HEADERS = {"xc-token": NOCO_API_TOKEN}
22
+
23
+ # =====================================================
24
+ # MODEL LOADING
25
+ # =====================================================
26
+
27
+ model = joblib.load("model_1mvp.pkl")
28
+ app = FastAPI(title="Logistic Regression API 2")
29
+
30
+ # =====================================================
31
+ # DATA SCHEMAS
32
+ # =====================================================
33
+
34
+ class InputData(BaseModel):
35
+ age: int
36
+ balance: float
37
+ day: int
38
+ campaign: int
39
+ job: str
40
+ education: str
41
+ default: Literal["yes", "no", "unknown"]
42
+ housing: Literal["yes", "no", "unknown"]
43
+ loan: Literal["yes", "no", "unknown"]
44
+ months_since_previous_contact: str
45
+ n_previous_contacts: str
46
+ poutcome: str
47
+ had_contact: bool
48
+ is_single: bool
49
+ uknown_contact: bool
50
+
51
+ class BatchInputData(BaseModel):
52
+ data: List[InputData]
53
+
54
+ # =====================================================
55
+ # HEALTH CHECK
56
+ # =====================================================
57
+
58
+ @app.get("/health")
59
+ def health():
60
+ return {"status": "ok"}
61
+
62
+ # =====================================================
63
+ # NOCODB DATA FETCHING
64
+ # =====================================================
65
+
66
+ def fetch_test_data(limit: int = 100):
67
+ """Fetch test or sample data from NoCoDB view."""
68
+ params = {"offset": 0, "limit": limit, "viewId": NOCO_VIEW_ID}
69
+ res = requests.get(NOCO_API_URL, headers=HEADERS, params=params)
70
+ res.raise_for_status()
71
+ data = res.json()["list"]
72
+ return pd.DataFrame(data)
73
+
74
+ # =====================================================
75
+ # PREDICTION ENDPOINT
76
+ # =====================================================
77
+
78
+ @app.post("/predict")
79
+ def predict(batch: BatchInputData):
80
+ try:
81
+ X = pd.DataFrame([item.dict() for item in batch.data])
82
+ preds = model.predict(X)
83
+ probs = model.predict_proba(X)[:, 1]
84
+ return {
85
+ "predictions": preds.tolist(),
86
+ "probabilities": probs.tolist()
87
+ }
88
+ except Exception as e:
89
+ import traceback
90
+ return {"error": str(e), "trace": traceback.format_exc()}
91
+
92
+ # =====================================================
93
+ # EXPLAINABILITY ENDPOINT
94
+ # =====================================================
95
+
96
+ @app.post("/explain")
97
+ def explain(batch: Optional[BatchInputData] = None, limit: int = 100):
98
+ """Generate SHAP values either from provided data or from NoCoDB test data."""
99
+ try:
100
+ if batch:
101
+ X = pd.DataFrame([item.dict() for item in batch.data])
102
+ source = "client batch"
103
+ else:
104
+ X = fetch_test_data(limit=limit)
105
+ source = f"NoCoDB (limit={limit})"
106
+
107
+ print(f"[DEBUG] SHAP explain called using {source} | shape={X.shape} | cols={list(X.columns)}")
108
+
109
+ # Ensure model is a pipeline
110
+ if hasattr(model, "named_steps"):
111
+ explainer = shap.Explainer(model.named_steps["classifier"], model.named_steps["preprocessor"].transform(X))
112
+ else:
113
+ explainer = shap.Explainer(model, X)
114
+
115
+ shap_values = explainer(X)
116
+
117
+ shap_summary = pd.DataFrame({
118
+ "feature": X.columns,
119
+ "mean_abs_shap": np.abs(shap_values.values).mean(axis=0)
120
+ }).sort_values("mean_abs_shap", ascending=False)
121
+
122
+ print(f"[DEBUG] SHAP summary created successfully with {len(shap_summary)} features.")
123
+
124
+ return {
125
+ "n_samples": len(X),
126
+ "shap_summary": shap_summary.to_dict(orient="records")
127
+ }
128
+
129
+ except Exception as e:
130
+ import traceback
131
+ print("[ERROR] SHAP explain failed:", e)
132
+ print(traceback.format_exc())
133
+ return {"error": str(e), "trace": traceback.format_exc()}
134
+
135
+ # =====================================================
136
+ # METRICS ENDPOINT
137
+ # =====================================================
138
+
139
+ @app.post("/metrics")
140
+ def metrics(batch: Optional[BatchInputData] = None, y_true: Optional[List[int]] = None, limit: int = 100):
141
+ """Compute ROC AUC and threshold analysis, using input or NoCoDB test data."""
142
+ try:
143
+ # Use provided data or fallback to test data from NoCoDB
144
+ if batch:
145
+ X = pd.DataFrame([item.dict() for item in batch.data])
146
+ else:
147
+ X = fetch_test_data(limit=limit)
148
+
149
+ if y_true is None:
150
+ # Look for 'y_true' column in NoCoDB data
151
+ if "y_true" in X.columns:
152
+ y_true = X["y_true"].astype(int).tolist()
153
+ X = X.drop(columns=["y_true"])
154
+ else:
155
+ return {"error": "y_true values not provided or found in dataset"}
156
+
157
+ y_prob = model.predict_proba(X)[:, 1]
158
+ roc_auc = roc_auc_score(y_true, y_prob)
159
+ precision, recall, thresholds = precision_recall_curve(y_true, y_prob)
160
+ pr_auc = auc(recall, precision)
161
+
162
+ return {
163
+ "roc_auc": roc_auc,
164
+ "pr_auc": pr_auc,
165
+ "thresholds": thresholds.tolist()[:20], # limit output size
166
+ "precision": precision.tolist()[:20],
167
+ "recall": recall.tolist()[:20]
168
+ }
169
+
170
+ except Exception as e:
171
+ import traceback
172
+ return {"error": str(e), "trace": traceback.format_exc()}
173
+
174
+ @app.get("/coefficients")
175
+ def coefficients():
176
+ """
177
+ Return logistic regression coefficients and feature names.
178
+ Works if your model is a pipeline with 'preprocessor' and 'classifier' steps.
179
+ """
180
+ try:
181
+ # Extract classifier and preprocessor
182
+ classifier = model.named_steps["classifier"]
183
+ preprocessor = model.named_steps["preprocessor"]
184
+
185
+ # Get feature names after preprocessing
186
+ feature_names = preprocessor.get_feature_names_out()
187
+
188
+ # Get coefficients
189
+ coefficients = classifier.coef_[0]
190
+
191
+ df = pd.DataFrame({
192
+ "feature": feature_names,
193
+ "coefficient": coefficients.tolist()
194
+ })
195
+
196
+ return {"coefficients": df.to_dict(orient="records")}
197
+
198
+ except Exception as e:
199
+ import traceback
200
+ return {"error": str(e), "trace": traceback.format_exc()}