waliaMuskaan011 commited on
Commit
05215f3
Β·
verified Β·
1 Parent(s): cb17f50

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +142 -0
app.py ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import json
3
+ import torch
4
+ from transformers import AutoTokenizer, AutoModelForCausalLM
5
+ from peft import PeftModel
6
+
7
+ # Lazy load model and tokenizer on first request (more reliable on Spaces)
8
+ MODEL = None
9
+ TOKENIZER = None
10
+
11
+ def get_model():
12
+ global MODEL, TOKENIZER
13
+ if MODEL is None or TOKENIZER is None:
14
+ print("Loading model...")
15
+ base_model = AutoModelForCausalLM.from_pretrained(
16
+ "HuggingFaceTB/SmolLM-360M",
17
+ torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
18
+ device_map="auto"
19
+ )
20
+ tokenizer = AutoTokenizer.from_pretrained("HuggingFaceTB/SmolLM-360M")
21
+ if tokenizer.pad_token is None:
22
+ tokenizer.pad_token = tokenizer.eos_token
23
+
24
+ # Load LoRA adapters
25
+ model = PeftModel.from_pretrained(base_model, "waliaMuskaan011/calendar-event-extractor-smollm")
26
+ model.eval()
27
+ MODEL, TOKENIZER = model, tokenizer
28
+ print("Model loaded successfully!")
29
+ return MODEL, TOKENIZER
30
+
31
+ def extract_calendar_event(event_text):
32
+ """Extract calendar information from natural language text."""
33
+
34
+ if not event_text.strip():
35
+ return "Please enter some text describing a calendar event."
36
+
37
+ model, tokenizer = get_model()
38
+
39
+ # Build prompt
40
+ prompt = f"""Extract calendar fields from: "{event_text}".
41
+ Return ONLY valid JSON with keys [action,date,time,attendees,location,duration,recurrence,notes].
42
+ Use null for unknown.
43
+ """
44
+
45
+ try:
46
+ # Tokenize and generate
47
+ inputs = tokenizer(prompt, return_tensors="pt")
48
+ inputs = {k: v.to(model.device) for k, v in inputs.items()}
49
+
50
+ with torch.no_grad():
51
+ outputs = model.generate(
52
+ **inputs,
53
+ max_new_tokens=160,
54
+ temperature=0.0,
55
+ do_sample=False,
56
+ pad_token_id=tokenizer.eos_token_id
57
+ )
58
+
59
+ # Decode response
60
+ full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
61
+
62
+ # Extract JSON part (after the prompt)
63
+ response_start = full_response.find('"}')
64
+ if response_start != -1:
65
+ json_part = full_response[response_start + 2:].strip()
66
+ else:
67
+ # Fallback: take everything after "Use null for unknown."
68
+ prompt_end = full_response.find("Use null for unknown.")
69
+ if prompt_end != -1:
70
+ json_part = full_response[prompt_end + len("Use null for unknown."):].strip()
71
+ else:
72
+ json_part = full_response.split("\n")[-1].strip()
73
+
74
+ # Try to parse as JSON for validation
75
+ try:
76
+ parsed = json.loads(json_part)
77
+ return json.dumps(parsed, indent=2, ensure_ascii=False)
78
+ except json.JSONDecodeError:
79
+ return f"Generated (may need manual cleanup):\n{json_part}"
80
+
81
+ except Exception as e:
82
+ return f"Error processing request: {str(e)}"
83
+
84
+ # Create Gradio interface
85
+ with gr.Blocks(title="Calendar Event Extractor", theme=gr.themes.Soft()) as demo:
86
+ gr.Markdown("""
87
+ # πŸ“… Calendar Event Extractor
88
+
89
+ This AI model extracts structured calendar information from natural language text.
90
+ Powered by fine-tuned SmolLM-360M with LoRA adapters.
91
+
92
+ **Try it out**: Enter any calendar-related text and get structured JSON output!
93
+ """)
94
+
95
+ with gr.Row():
96
+ with gr.Column():
97
+ input_text = gr.Textbox(
98
+ label="πŸ“ Event Description",
99
+ placeholder="e.g., 'Meeting with John tomorrow at 2pm for 1 hour'",
100
+ lines=3
101
+ )
102
+ extract_btn = gr.Button("πŸ” Extract Event Info", variant="primary")
103
+
104
+ with gr.Column():
105
+ output_json = gr.Textbox(
106
+ label="πŸ“‹ Extracted Information (JSON)",
107
+ lines=10,
108
+ max_lines=15
109
+ )
110
+
111
+ # Examples
112
+ gr.Markdown("### πŸ” Try these examples:")
113
+ examples = gr.Examples(
114
+ examples=[
115
+ ["Quick meeting at the coworking space on 10th May 2025 starting at 11:00 am for 45 minutes"],
116
+ ["Coffee chat with Sarah tomorrow at 3pm"],
117
+ ["Weekly standup every Monday at 9am on Zoom"],
118
+ ["Doctor appointment next Friday at 2:30 PM for 30 minutes"],
119
+ ["Team lunch at the new restaurant on 15th December"],
120
+ ["Call with client on 25/12/2024 at 10:00 AM, needs to discuss project timeline"],
121
+ ],
122
+ inputs=[input_text],
123
+ outputs=[output_json],
124
+ fn=extract_calendar_event,
125
+ cache_examples=False
126
+ )
127
+
128
+ extract_btn.click(
129
+ fn=extract_calendar_event,
130
+ inputs=[input_text],
131
+ outputs=[output_json]
132
+ )
133
+
134
+ gr.Markdown("""
135
+ ---
136
+ **Model Details**: Fine-tuned SmolLM-360M using LoRA β€’ **Dataset**: ~2500 calendar events β€’ **Training**: Custom augmentation pipeline
137
+
138
+ [πŸ”— Model Card](https://huggingface.co/waliaMuskaan011/calendar-event-extractor-smollm) β€’ [πŸ’» Training Code](https://github.com/muskaanwalia098/Calendar-Event-Entity-Extraction)
139
+ """)
140
+
141
+ if __name__ == "__main__":
142
+ demo.launch()