Dun3Co commited on
Commit
f87d4de
·
verified ·
1 Parent(s): 2f872a2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +216 -200
app.py CHANGED
@@ -1,200 +1,216 @@
1
- from fastapi import FastAPI
2
- from pydantic import BaseModel
3
- import os
4
- from typing import List, Literal, Optional
5
- import joblib
6
- import numpy as np
7
- import pandas as pd
8
- import requests
9
- import shap
10
- from sklearn.metrics import roc_auc_score, precision_recall_curve, auc
11
-
12
- # =====================================================
13
- # CONFIG
14
- # =====================================================
15
-
16
- # Replace these with your NoCoDB API details
17
- NOCO_API_URL = "https://dun3co-sdc-nocodb.hf.space/api/v2/tables/m39a8axnn3980w9/records"
18
- NOCO_VIEW_ID = "vwjuv5jnaet9npuu"
19
- NOCO_API_TOKEN = os.getenv("NOCODB_TOKEN")
20
-
21
- HEADERS = {"xc-token": NOCO_API_TOKEN}
22
-
23
- # =====================================================
24
- # MODEL LOADING
25
- # =====================================================
26
-
27
- model = joblib.load("model_1mvp.pkl")
28
- app = FastAPI(title="Logistic Regression API 2")
29
-
30
- # =====================================================
31
- # DATA SCHEMAS
32
- # =====================================================
33
-
34
- class InputData(BaseModel):
35
- age: int
36
- balance: float
37
- day: int
38
- campaign: int
39
- job: str
40
- education: str
41
- default: Literal["yes", "no", "unknown"]
42
- housing: Literal["yes", "no", "unknown"]
43
- loan: Literal["yes", "no", "unknown"]
44
- months_since_previous_contact: str
45
- n_previous_contacts: str
46
- poutcome: str
47
- had_contact: bool
48
- is_single: bool
49
- uknown_contact: bool
50
-
51
- class BatchInputData(BaseModel):
52
- data: List[InputData]
53
-
54
- # =====================================================
55
- # HEALTH CHECK
56
- # =====================================================
57
-
58
- @app.get("/health")
59
- def health():
60
- return {"status": "ok"}
61
-
62
- # =====================================================
63
- # NOCODB DATA FETCHING
64
- # =====================================================
65
-
66
- def fetch_test_data(limit: int = 100):
67
- """Fetch test or sample data from NoCoDB view."""
68
- params = {"offset": 0, "limit": limit, "viewId": NOCO_VIEW_ID}
69
- res = requests.get(NOCO_API_URL, headers=HEADERS, params=params)
70
- res.raise_for_status()
71
- data = res.json()["list"]
72
- return pd.DataFrame(data)
73
-
74
- # =====================================================
75
- # PREDICTION ENDPOINT
76
- # =====================================================
77
-
78
- @app.post("/predict")
79
- def predict(batch: BatchInputData):
80
- try:
81
- X = pd.DataFrame([item.dict() for item in batch.data])
82
- preds = model.predict(X)
83
- probs = model.predict_proba(X)[:, 1]
84
- return {
85
- "predictions": preds.tolist(),
86
- "probabilities": probs.tolist()
87
- }
88
- except Exception as e:
89
- import traceback
90
- return {"error": str(e), "trace": traceback.format_exc()}
91
-
92
- # =====================================================
93
- # EXPLAINABILITY ENDPOINT
94
- # =====================================================
95
-
96
- @app.post("/explain")
97
- def explain(batch: Optional[BatchInputData] = None, limit: int = 100):
98
- """Generate SHAP values either from provided data or from NoCoDB test data."""
99
- try:
100
- if batch:
101
- X = pd.DataFrame([item.dict() for item in batch.data])
102
- source = "client batch"
103
- else:
104
- X = fetch_test_data(limit=limit)
105
- source = f"NoCoDB (limit={limit})"
106
-
107
- print(f"[DEBUG] SHAP explain called using {source} | shape={X.shape} | cols={list(X.columns)}")
108
-
109
- # Ensure model is a pipeline
110
- if hasattr(model, "named_steps"):
111
- explainer = shap.Explainer(model.named_steps["classifier"], model.named_steps["preprocessor"].transform(X))
112
- else:
113
- explainer = shap.Explainer(model, X)
114
-
115
- shap_values = explainer(X)
116
-
117
- shap_summary = pd.DataFrame({
118
- "feature": X.columns,
119
- "mean_abs_shap": np.abs(shap_values.values).mean(axis=0)
120
- }).sort_values("mean_abs_shap", ascending=False)
121
-
122
- print(f"[DEBUG] SHAP summary created successfully with {len(shap_summary)} features.")
123
-
124
- return {
125
- "n_samples": len(X),
126
- "shap_summary": shap_summary.to_dict(orient="records")
127
- }
128
-
129
- except Exception as e:
130
- import traceback
131
- print("[ERROR] SHAP explain failed:", e)
132
- print(traceback.format_exc())
133
- return {"error": str(e), "trace": traceback.format_exc()}
134
-
135
- # =====================================================
136
- # METRICS ENDPOINT
137
- # =====================================================
138
-
139
- @app.post("/metrics")
140
- def metrics(batch: Optional[BatchInputData] = None, y_true: Optional[List[int]] = None, limit: int = 100):
141
- """Compute ROC AUC and threshold analysis, using input or NoCoDB test data."""
142
- try:
143
- # Use provided data or fallback to test data from NoCoDB
144
- if batch:
145
- X = pd.DataFrame([item.dict() for item in batch.data])
146
- else:
147
- X = fetch_test_data(limit=limit)
148
-
149
- if y_true is None:
150
- # Look for 'y_true' column in NoCoDB data
151
- if "y_true" in X.columns:
152
- y_true = X["y_true"].astype(int).tolist()
153
- X = X.drop(columns=["y_true"])
154
- else:
155
- return {"error": "y_true values not provided or found in dataset"}
156
-
157
- y_prob = model.predict_proba(X)[:, 1]
158
- roc_auc = roc_auc_score(y_true, y_prob)
159
- precision, recall, thresholds = precision_recall_curve(y_true, y_prob)
160
- pr_auc = auc(recall, precision)
161
-
162
- return {
163
- "roc_auc": roc_auc,
164
- "pr_auc": pr_auc,
165
- "thresholds": thresholds.tolist()[:20], # limit output size
166
- "precision": precision.tolist()[:20],
167
- "recall": recall.tolist()[:20]
168
- }
169
-
170
- except Exception as e:
171
- import traceback
172
- return {"error": str(e), "trace": traceback.format_exc()}
173
-
174
- @app.get("/coefficients")
175
- def coefficients():
176
- """
177
- Return logistic regression coefficients and feature names.
178
- Works if your model is a pipeline with 'preprocessor' and 'classifier' steps.
179
- """
180
- try:
181
- # Extract classifier and preprocessor
182
- classifier = model.named_steps["classifier"]
183
- preprocessor = model.named_steps["preprocessor"]
184
-
185
- # Get feature names after preprocessing
186
- feature_names = preprocessor.get_feature_names_out()
187
-
188
- # Get coefficients
189
- coefficients = classifier.coef_[0]
190
-
191
- df = pd.DataFrame({
192
- "feature": feature_names,
193
- "coefficient": coefficients.tolist()
194
- })
195
-
196
- return {"coefficients": df.to_dict(orient="records")}
197
-
198
- except Exception as e:
199
- import traceback
200
- return {"error": str(e), "trace": traceback.format_exc()}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from pydantic import BaseModel
3
+ import os
4
+ from typing import List, Literal, Optional
5
+ import joblib
6
+ import numpy as np
7
+ import pandas as pd
8
+ import requests
9
+ import shap
10
+ from sklearn.metrics import roc_auc_score, precision_recall_curve, auc
11
+
12
+ # =====================================================
13
+ # CONFIG
14
+ # =====================================================
15
+
16
+ # Replace these with your NoCoDB API details
17
+ NOCO_API_URL = "https://dun3co-sdc-nocodb.hf.space/api/v2/tables/m39a8axnn3980w9/records"
18
+ NOCO_VIEW_ID = "vwjuv5jnaet9npuu"
19
+ NOCO_API_TOKEN = os.getenv("NOCODB_TOKEN")
20
+
21
+ HEADERS = {"xc-token": NOCO_API_TOKEN}
22
+
23
+ # =====================================================
24
+ # MODEL LOADING
25
+ # =====================================================
26
+
27
+ model = joblib.load("model_1mvp.pkl")
28
+ app = FastAPI(title="Logistic Regression API 2")
29
+
30
+ # =====================================================
31
+ # DATA SCHEMAS
32
+ # =====================================================
33
+
34
+ class InputData(BaseModel):
35
+ age: int
36
+ balance: float
37
+ day: int
38
+ campaign: int
39
+ job: str
40
+ education: str
41
+ default: Literal["yes", "no", "unknown"]
42
+ housing: Literal["yes", "no", "unknown"]
43
+ loan: Literal["yes", "no", "unknown"]
44
+ months_since_previous_contact: str
45
+ n_previous_contacts: str
46
+ poutcome: str
47
+ had_contact: bool
48
+ is_single: bool
49
+ uknown_contact: bool
50
+
51
+ class BatchInputData(BaseModel):
52
+ data: List[InputData]
53
+
54
+ # =====================================================
55
+ # HEALTH CHECK
56
+ # =====================================================
57
+
58
+ @app.get("/health")
59
+ def health():
60
+ return {"status": "ok"}
61
+
62
+ # =====================================================
63
+ # NOCODB DATA FETCHING
64
+ # =====================================================
65
+
66
+ def fetch_test_data(limit: int = 100):
67
+ """Fetch test or sample data from NoCoDB view."""
68
+ params = {"offset": 0, "limit": limit, "viewId": NOCO_VIEW_ID}
69
+ res = requests.get(NOCO_API_URL, headers=HEADERS, params=params)
70
+ res.raise_for_status()
71
+ data = res.json()["list"]
72
+ return pd.DataFrame(data)
73
+
74
+ # =====================================================
75
+ # PREDICTION ENDPOINT
76
+ # =====================================================
77
+
78
+ @app.post("/predict")
79
+ def predict(batch: BatchInputData):
80
+ try:
81
+ X = pd.DataFrame([item.dict() for item in batch.data])
82
+ preds = model.predict(X)
83
+ probs = model.predict_proba(X)[:, 1]
84
+ return {
85
+ "predictions": preds.tolist(),
86
+ "probabilities": probs.tolist()
87
+ }
88
+ except Exception as e:
89
+ import traceback
90
+ return {"error": str(e), "trace": traceback.format_exc()}
91
+
92
+ # =====================================================
93
+ # EXPLAINABILITY ENDPOINT
94
+ # =====================================================
95
+
96
+ @app.post("/explain")
97
+ def explain(batch: Optional[BatchInputData] = None, limit: int = 100):
98
+ """Generate SHAP values either from provided data or from NoCoDB test data."""
99
+ try:
100
+ if batch:
101
+ X = pd.DataFrame([item.dict() for item in batch.data])
102
+ source = "client batch"
103
+ else:
104
+ X = fetch_test_data(limit=limit)
105
+ source = f"NoCoDB (limit={limit})"
106
+
107
+ print(f"[DEBUG] SHAP explain called using {source} | shape={X.shape} | cols={list(X.columns)}")
108
+
109
+ # Remove ID and target columns if they exist
110
+ drop_cols = [c for c in ["Id", "y", "target"] if c in X.columns]
111
+ if drop_cols:
112
+ print(f"[DEBUG] Dropping columns not used for prediction: {drop_cols}")
113
+ X = X.drop(columns=drop_cols)
114
+
115
+ # Handle pipelines correctly
116
+ if hasattr(model, "named_steps"):
117
+ preprocessor = model.named_steps["preprocessor"]
118
+ classifier = model.named_steps["classifier"]
119
+
120
+ X_transformed = preprocessor.transform(X)
121
+ feature_names = preprocessor.get_feature_names_out()
122
+
123
+ print(f"[DEBUG] Transformed shape: {X_transformed.shape} | n_features={len(feature_names)}")
124
+
125
+ explainer = shap.Explainer(classifier, X_transformed)
126
+ shap_values = explainer(X_transformed)
127
+
128
+ shap_summary = pd.DataFrame({
129
+ "feature": feature_names,
130
+ "mean_abs_shap": np.abs(shap_values.values).mean(axis=0)
131
+ }).sort_values("mean_abs_shap", ascending=False)
132
+ else:
133
+ # If model is not a pipeline
134
+ explainer = shap.Explainer(model, X)
135
+ shap_values = explainer(X)
136
+ shap_summary = pd.DataFrame({
137
+ "feature": X.columns,
138
+ "mean_abs_shap": np.abs(shap_values.values).mean(axis=0)
139
+ }).sort_values("mean_abs_shap", ascending=False)
140
+
141
+ print(f"[DEBUG] SHAP summary created successfully with {len(shap_summary)} features.")
142
+ return {"n_samples": len(X), "shap_summary": shap_summary.to_dict(orient="records")}
143
+
144
+ except Exception as e:
145
+ import traceback
146
+ print("[ERROR] SHAP explain failed:", e)
147
+ print(traceback.format_exc())
148
+ return {"error": str(e), "trace": traceback.format_exc()}
149
+
150
+
151
+ # =====================================================
152
+ # METRICS ENDPOINT
153
+ # =====================================================
154
+
155
+ @app.post("/metrics")
156
+ def metrics(batch: Optional[BatchInputData] = None, y: Optional[List[int]] = None, limit: int = 100):
157
+ """Compute ROC AUC and threshold analysis, using input or NoCoDB test data."""
158
+ try:
159
+ # Use provided data or fallback to test data from NoCoDB
160
+ if batch:
161
+ X = pd.DataFrame([item.dict() for item in batch.data])
162
+ else:
163
+ X = fetch_test_data(limit=limit)
164
+
165
+ if y is None:
166
+ # Look for 'y_true' column in NoCoDB data
167
+ if "y_" in X.columns:
168
+ y = X["y"].astype(int).tolist()
169
+ X = X.drop(columns=["y"])
170
+ else:
171
+ return {"error": "ye values not provided or found in dataset"}
172
+
173
+ y_prob = model.predict_proba(X)[:, 1]
174
+ roc_auc = roc_auc_score(y, y_prob)
175
+ precision, recall, thresholds = precision_recall_curve(y, y_prob)
176
+ pr_auc = auc(recall, precision)
177
+
178
+ return {
179
+ "roc_auc": roc_auc,
180
+ "pr_auc": pr_auc,
181
+ "thresholds": thresholds.tolist()[:20], # limit output size
182
+ "precision": precision.tolist()[:20],
183
+ "recall": recall.tolist()[:20]
184
+ }
185
+
186
+ except Exception as e:
187
+ import traceback
188
+ return {"error": str(e), "trace": traceback.format_exc()}
189
+
190
+ @app.get("/coefficients")
191
+ def coefficients():
192
+ """
193
+ Return logistic regression coefficients and feature names.
194
+ Works if your model is a pipeline with 'preprocessor' and 'classifier' steps.
195
+ """
196
+ try:
197
+ # Extract classifier and preprocessor
198
+ classifier = model.named_steps["classifier"]
199
+ preprocessor = model.named_steps["preprocessor"]
200
+
201
+ # Get feature names after preprocessing
202
+ feature_names = preprocessor.get_feature_names_out()
203
+
204
+ # Get coefficients
205
+ coefficients = classifier.coef_[0]
206
+
207
+ df = pd.DataFrame({
208
+ "feature": feature_names,
209
+ "coefficient": coefficients.tolist()
210
+ })
211
+
212
+ return {"coefficients": df.to_dict(orient="records")}
213
+
214
+ except Exception as e:
215
+ import traceback
216
+ return {"error": str(e), "trace": traceback.format_exc()}