Dun3Co commited on
Commit
d2ca5be
·
verified ·
1 Parent(s): 7befd91

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -199
app.py DELETED
@@ -1,199 +0,0 @@
1
- from fastapi import FastAPI
2
- from pydantic import BaseModel
3
- from typing import List, Literal, Optional
4
- import joblib
5
- import numpy as np
6
- import pandas as pd
7
- import requests
8
- import shap
9
- from sklearn.metrics import roc_auc_score, precision_recall_curve, auc
10
-
11
- # =====================================================
12
- # CONFIG
13
- # =====================================================
14
-
15
- # Replace these with your NoCoDB API details
16
- NOCO_API_URL = "https://dun3co-sdc-nocodb.hf.space/api/v2/tables/m39a8axnn3980w9/records"
17
- NOCO_VIEW_ID = "vwjuv5jnaet9npuu"
18
- NOCO_API_TOKEN = "YOUR_NOCODB_TOKEN" # Replace or load from env variable
19
-
20
- HEADERS = {"xc-token": NOCO_API_TOKEN}
21
-
22
- # =====================================================
23
- # MODEL LOADING
24
- # =====================================================
25
-
26
- model = joblib.load("model_1mvp.pkl")
27
- app = FastAPI(title="Logistic Regression API 2")
28
-
29
- # =====================================================
30
- # DATA SCHEMAS
31
- # =====================================================
32
-
33
- class InputData(BaseModel):
34
- age: int
35
- balance: float
36
- day: int
37
- campaign: int
38
- job: str
39
- education: str
40
- default: Literal["yes", "no", "unknown"]
41
- housing: Literal["yes", "no", "unknown"]
42
- loan: Literal["yes", "no", "unknown"]
43
- months_since_previous_contact: str
44
- n_previous_contacts: str
45
- poutcome: str
46
- had_contact: bool
47
- is_single: bool
48
- uknown_contact: bool
49
-
50
- class BatchInputData(BaseModel):
51
- data: List[InputData]
52
-
53
- # =====================================================
54
- # HEALTH CHECK
55
- # =====================================================
56
-
57
- @app.get("/health")
58
- def health():
59
- return {"status": "ok"}
60
-
61
- # =====================================================
62
- # NOCODB DATA FETCHING
63
- # =====================================================
64
-
65
- def fetch_test_data(limit: int = 100):
66
- """Fetch test or sample data from NoCoDB view."""
67
- params = {"offset": 0, "limit": limit, "viewId": NOCO_VIEW_ID}
68
- res = requests.get(NOCO_API_URL, headers=HEADERS, params=params)
69
- res.raise_for_status()
70
- data = res.json()["list"]
71
- return pd.DataFrame(data)
72
-
73
- # =====================================================
74
- # PREDICTION ENDPOINT
75
- # =====================================================
76
-
77
- @app.post("/predict")
78
- def predict(batch: BatchInputData):
79
- try:
80
- X = pd.DataFrame([item.dict() for item in batch.data])
81
- preds = model.predict(X)
82
- probs = model.predict_proba(X)[:, 1]
83
- return {
84
- "predictions": preds.tolist(),
85
- "probabilities": probs.tolist()
86
- }
87
- except Exception as e:
88
- import traceback
89
- return {"error": str(e), "trace": traceback.format_exc()}
90
-
91
- # =====================================================
92
- # EXPLAINABILITY ENDPOINT
93
- # =====================================================
94
-
95
- @app.post("/explain")
96
- def explain(batch: Optional[BatchInputData] = None, limit: int = 100):
97
- """Generate SHAP values either from provided data or from NoCoDB test data."""
98
- try:
99
- if batch:
100
- X = pd.DataFrame([item.dict() for item in batch.data])
101
- source = "client batch"
102
- else:
103
- X = fetch_test_data(limit=limit)
104
- source = f"NoCoDB (limit={limit})"
105
-
106
- print(f"[DEBUG] SHAP explain called using {source} | shape={X.shape} | cols={list(X.columns)}")
107
-
108
- # Ensure model is a pipeline
109
- if hasattr(model, "named_steps"):
110
- explainer = shap.Explainer(model.named_steps["classifier"], model.named_steps["preprocessor"].transform(X))
111
- else:
112
- explainer = shap.Explainer(model, X)
113
-
114
- shap_values = explainer(X)
115
-
116
- shap_summary = pd.DataFrame({
117
- "feature": X.columns,
118
- "mean_abs_shap": np.abs(shap_values.values).mean(axis=0)
119
- }).sort_values("mean_abs_shap", ascending=False)
120
-
121
- print(f"[DEBUG] SHAP summary created successfully with {len(shap_summary)} features.")
122
-
123
- return {
124
- "n_samples": len(X),
125
- "shap_summary": shap_summary.to_dict(orient="records")
126
- }
127
-
128
- except Exception as e:
129
- import traceback
130
- print("[ERROR] SHAP explain failed:", e)
131
- print(traceback.format_exc())
132
- return {"error": str(e), "trace": traceback.format_exc()}
133
-
134
- # =====================================================
135
- # METRICS ENDPOINT
136
- # =====================================================
137
-
138
- @app.post("/metrics")
139
- def metrics(batch: Optional[BatchInputData] = None, y_true: Optional[List[int]] = None, limit: int = 100):
140
- """Compute ROC AUC and threshold analysis, using input or NoCoDB test data."""
141
- try:
142
- # Use provided data or fallback to test data from NoCoDB
143
- if batch:
144
- X = pd.DataFrame([item.dict() for item in batch.data])
145
- else:
146
- X = fetch_test_data(limit=limit)
147
-
148
- if y_true is None:
149
- # Look for 'y_true' column in NoCoDB data
150
- if "y_true" in X.columns:
151
- y_true = X["y_true"].astype(int).tolist()
152
- X = X.drop(columns=["y_true"])
153
- else:
154
- return {"error": "y_true values not provided or found in dataset"}
155
-
156
- y_prob = model.predict_proba(X)[:, 1]
157
- roc_auc = roc_auc_score(y_true, y_prob)
158
- precision, recall, thresholds = precision_recall_curve(y_true, y_prob)
159
- pr_auc = auc(recall, precision)
160
-
161
- return {
162
- "roc_auc": roc_auc,
163
- "pr_auc": pr_auc,
164
- "thresholds": thresholds.tolist()[:20], # limit output size
165
- "precision": precision.tolist()[:20],
166
- "recall": recall.tolist()[:20]
167
- }
168
-
169
- except Exception as e:
170
- import traceback
171
- return {"error": str(e), "trace": traceback.format_exc()}
172
-
173
- @app.get("/coefficients")
174
- def coefficients():
175
- """
176
- Return logistic regression coefficients and feature names.
177
- Works if your model is a pipeline with 'preprocessor' and 'classifier' steps.
178
- """
179
- try:
180
- # Extract classifier and preprocessor
181
- classifier = model.named_steps["classifier"]
182
- preprocessor = model.named_steps["preprocessor"]
183
-
184
- # Get feature names after preprocessing
185
- feature_names = preprocessor.get_feature_names_out()
186
-
187
- # Get coefficients
188
- coefficients = classifier.coef_[0]
189
-
190
- df = pd.DataFrame({
191
- "feature": feature_names,
192
- "coefficient": coefficients.tolist()
193
- })
194
-
195
- return {"coefficients": df.to_dict(orient="records")}
196
-
197
- except Exception as e:
198
- import traceback
199
- return {"error": str(e), "trace": traceback.format_exc()}