AshenH commited on
Commit
74bc39f
·
verified ·
1 Parent(s): c02152a

Create ts_preprocess.py

Browse files
Files changed (1) hide show
  1. tools/ts_preprocess.py +104 -0
tools/ts_preprocess.py ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # space/tools/ts_preprocess.py
2
+ import pandas as pd
3
+ import numpy as np
4
+ from typing import List
5
+
6
+ MONTH = "MS" # month-start frequency
7
+
8
+ def _emi(principal: float, annual_rate: float, n_months: int) -> float:
9
+ """
10
+ EMI formula with monthly compounding.
11
+ r_m = annual_rate / 12
12
+ EMI = P * r_m * (1+r_m)^n / ((1+r_m)^n - 1)
13
+ """
14
+ if n_months <= 0 or principal <= 0:
15
+ return 0.0
16
+ r = annual_rate / 12.0
17
+ if r <= 0:
18
+ return principal / n_months
19
+ fac = (1.0 + r) ** n_months
20
+ return principal * r * fac / (fac - 1.0)
21
+
22
+ def _project_deposit(principal: float, annual_rate: float, months: int) -> pd.DataFrame:
23
+ """
24
+ Monthly path for a deposit. Value compounds monthly.
25
+ """
26
+ r = annual_rate / 12.0
27
+ data = []
28
+ bal = principal
29
+ for m in range(months + 1):
30
+ data.append({"step": m, "portfolio_value": bal})
31
+ bal = bal * (1.0 + r)
32
+ return pd.DataFrame(data)
33
+
34
+ def _project_asset(principal: float, annual_rate: float, tenor_months: int) -> pd.DataFrame:
35
+ """
36
+ Monthly amortization schedule for an asset/loan using EMI.
37
+ """
38
+ emi = _emi(principal, annual_rate, tenor_months)
39
+ r = annual_rate / 12.0
40
+ data = []
41
+ bal = principal
42
+ for m in range(tenor_months + 1):
43
+ interest = bal * r
44
+ principal_pay = max(0.0, emi - interest)
45
+ next_bal = max(0.0, bal - principal_pay)
46
+ data.append({
47
+ "step": m,
48
+ "portfolio_value": bal,
49
+ "emi": emi,
50
+ "interest_component": interest,
51
+ "principal_component": principal_pay,
52
+ "remaining_balance": next_bal
53
+ })
54
+ bal = next_bal
55
+ return pd.DataFrame(data)
56
+
57
+ def build_timeseries(df: pd.DataFrame) -> pd.DataFrame:
58
+ """
59
+ Input df columns (example):
60
+ - portfolio_date (datetime or str)
61
+ - instrument_type: 'Deposit' or 'Asset'
62
+ - balance: float
63
+ - interest_rate: annual rate (e.g., 0.12)
64
+ - time_to_maturity: months (int)
65
+ - tenor_months: months (for Assets; if missing, fallback to time_to_maturity)
66
+ Output:
67
+ Long time-series with monthly timestamps, projected 'portfolio_value'
68
+ (and EMI breakdown for Assets).
69
+ """
70
+ df = df.copy()
71
+ if "timestamp" not in df.columns:
72
+ df["timestamp"] = pd.to_datetime(df["portfolio_date"])
73
+
74
+ out_frames: List[pd.DataFrame] = []
75
+ for _, row in df.iterrows():
76
+ itype = str(row.get("instrument_type", "")).strip().lower()
77
+ start = pd.to_datetime(row["timestamp"])
78
+ months = int(row.get("time_to_maturity", 0) or 0)
79
+ principal = float(row.get("balance", 0.0) or 0.0)
80
+ annual_rate = float(row.get("interest_rate", 0.0) or 0.0)
81
+
82
+ if itype == "deposit":
83
+ sched = _project_deposit(principal, annual_rate, months)
84
+ elif itype == "asset":
85
+ tenor = int(row.get("tenor_months", months) or months or 0)
86
+ sched = _project_asset(principal, annual_rate, tenor)
87
+ else:
88
+ # unknown types: keep flat
89
+ sched = pd.DataFrame({"step": range(months + 1), "portfolio_value": principal})
90
+
91
+ # Build timestamps: month-start frequency
92
+ sched["timestamp"] = pd.date_range(start=start, periods=len(sched), freq=MONTH)
93
+ # Carry identifiers
94
+ for col in ["instrument_type", "interest_rate"]:
95
+ if col in df.columns:
96
+ sched[col] = row.get(col)
97
+ sched["origin_portfolio_date"] = start
98
+ sched["origin_balance"] = principal
99
+
100
+ out_frames.append(sched)
101
+
102
+ ts = pd.concat(out_frames, ignore_index=True)
103
+ ts = ts.sort_values(["timestamp", "instrument_type"]).reset_index(drop=True)
104
+ return ts