s2049 commited on
Commit
b90f766
·
verified ·
1 Parent(s): cbab1da

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +105 -90
app.py CHANGED
@@ -10,27 +10,30 @@ import torch
10
  import gradio as gr
11
  import time
12
 
 
 
 
13
  # --- Configuration ---
14
  load_dotenv()
15
  TMDB_API_KEY = os.environ.get("TMDB_API_KEY", "442a13f1865d8936f95aa20737e6f6f5")
16
- HF_TOKEN = os.environ.get("HF_TOKEN")
17
 
18
- MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.1"
 
19
 
20
  BASE_TMDB_URL = "https://api.themoviedb.org/3"
21
  POSTER_BASE_URL = "https://image.tmdb.org/t/p/w500"
22
- NUM_RECOMMENDATIONS_TO_GENERATE = 20
23
  NUM_RECOMMENDATIONS_TO_DISPLAY = 5
24
  MIN_RATING_FOR_SEED = 3.5
25
  MIN_VOTE_COUNT_TMDB = 100
26
 
27
- # --- Global Variables for Data (Load once) ---
28
  df_profile_global = None
29
  df_watchlist_global = None
30
  df_reviews_global = None
31
  df_diary_global = None
32
  df_ratings_global = None
33
- df_watched_global = None # Consolidated
34
 
35
  uri_to_movie_map_global = {}
36
  all_watched_titles_global = set()
@@ -43,8 +46,7 @@ llm_tokenizer = None
43
 
44
  # --- Helper Functions ---
45
  def clean_html(raw_html):
46
- if pd.isna(raw_html) or raw_html is None:
47
- return ""
48
  text = str(raw_html)
49
  text = re.sub(r'<br\s*/?>', '\n', text)
50
  soup = BeautifulSoup(text, "html.parser")
@@ -54,7 +56,6 @@ def get_movie_uri_map(dfs_dict):
54
  uri_map = {}
55
  df_priority = ['reviews.csv', 'diary.csv', 'ratings.csv', 'watched.csv', 'watchlist.csv']
56
  processed_uris = set()
57
-
58
  for df_name in df_priority:
59
  df = dfs_dict.get(df_name)
60
  if df is not None and 'Letterboxd URI' in df.columns and 'Name' in df.columns and 'Year' in df.columns:
@@ -66,8 +67,7 @@ def get_movie_uri_map(dfs_dict):
66
  year = int(row['Year'])
67
  uri_map[uri] = (str(row['Name']), year)
68
  processed_uris.add(uri)
69
- except ValueError:
70
- pass
71
  return uri_map
72
 
73
  def load_all_data():
@@ -77,14 +77,13 @@ def load_all_data():
77
 
78
  try:
79
  df_profile_global = pd.read_csv("profile.csv")
80
- df_comments_global = pd.read_csv("comments.csv") # Loaded but not explicitly used in this version for recs
81
  df_watchlist_global = pd.read_csv("watchlist.csv")
82
  df_reviews_global = pd.read_csv("reviews.csv")
83
  df_diary_global = pd.read_csv("diary.csv")
84
  df_ratings_global = pd.read_csv("ratings.csv")
85
  _df_watched_log = pd.read_csv("watched.csv")
86
  except FileNotFoundError as e:
87
- print(f"ERROR: CSV file not found: {e}. Please ensure all CSV files are uploaded to the HF Space.")
88
  return False
89
 
90
  dfs_for_uri_map = {
@@ -115,9 +114,13 @@ def load_all_data():
115
  consolidated.drop(columns=['Rating_simple'], inplace=True)
116
 
117
  watched_log_subset = _df_watched_log[['Letterboxd URI', 'Name', 'Year']].copy()
118
- watched_log_subset['from_watched_log'] = True
119
  consolidated = pd.merge(consolidated, watched_log_subset, on=['Letterboxd URI', 'Name', 'Year'], how='outer')
120
- consolidated['from_watched_log'] = consolidated['from_watched_log'].fillna(False)
 
 
 
 
121
 
122
  consolidated['Review Text'] = consolidated['Review Text'].fillna('').apply(clean_html)
123
  consolidated['Year'] = pd.to_numeric(consolidated['Year'], errors='coerce').astype('Int64')
@@ -128,8 +131,7 @@ def load_all_data():
128
  all_watched_titles_global = set(zip(df_watched_global['Name'].astype(str), df_watched_global['Year'].astype(int)))
129
  for _, row in _df_watched_log.iterrows():
130
  if pd.notna(row['Name']) and pd.notna(row['Year']):
131
- try:
132
- all_watched_titles_global.add((str(row['Name']), int(row['Year'])))
133
  except ValueError: pass
134
 
135
  if df_watchlist_global is not None:
@@ -162,8 +164,12 @@ def load_all_data():
162
  'review_text': row['Review Text'], 'uri': row['Letterboxd URI']
163
  })
164
  temp_df = pd.DataFrame(seed_movies_global)
165
- temp_df.drop_duplicates(subset=['name', 'year'], keep='first', inplace=True)
166
- seed_movies_global = temp_df.to_dict('records')
 
 
 
 
167
  random.shuffle(seed_movies_global)
168
  return True
169
 
@@ -171,23 +177,38 @@ def initialize_llm():
171
  global llm_pipeline, llm_tokenizer
172
  if llm_pipeline is None:
173
  print(f"Initializing LLM: {MODEL_NAME}")
 
 
 
 
 
 
174
  try:
175
- llm_tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
176
  model = AutoModelForCausalLM.from_pretrained(
177
- MODEL_NAME, torch_dtype=torch.float16, device_map="auto",
178
- trust_remote_code=True, token=HF_TOKEN if HF_TOKEN else None
 
 
 
 
179
  )
 
 
 
 
180
  llm_pipeline = pipeline(
181
- "text-generation", model=model, tokenizer=llm_tokenizer,
182
- torch_dtype=torch.float16, device_map="auto"
183
  )
184
- print("LLM Initialized Successfully.")
185
  except Exception as e:
186
- print(f"Error initializing LLM: {e}")
187
  llm_pipeline = None
188
 
 
 
189
  def search_tmdb_movie_details(title, year):
190
- if not TMDB_API_KEY or TMDB_API_KEY == "YOUR_TMDB_API_KEY_FALLBACK": # Fallback check
191
  print("TMDB API Key not properly configured.")
192
  return None
193
  try:
@@ -211,15 +232,13 @@ def search_tmdb_movie_details(title, year):
211
  'vote_average': movie.get('vote_average'), 'vote_count': movie.get('vote_count'),
212
  'popularity': movie.get('popularity')
213
  }
214
- time.sleep(0.25) # Adjusted delay
215
- except requests.RequestException as e:
216
- print(f"Error searching TMDB for {title} ({year}): {e}")
217
- except Exception as ex:
218
- print(f"Unexpected error in search_tmdb_movie_details for {title} ({year}): {ex}")
219
  return None
220
 
221
  def get_tmdb_recommendations(movie_id, page=1):
222
- if not TMDB_API_KEY or TMDB_API_KEY == "YOUR_TMDB_API_KEY_FALLBACK": # Fallback check
223
  print("TMDB API Key not properly configured.")
224
  return []
225
  recommendations = []
@@ -239,13 +258,12 @@ def get_tmdb_recommendations(movie_id, page=1):
239
  'vote_average': movie.get('vote_average'), 'vote_count': movie.get('vote_count'),
240
  'popularity': movie.get('popularity')
241
  })
242
- time.sleep(0.25) # Adjusted delay
243
- except requests.RequestException as e:
244
- print(f"Error getting TMDB recommendations for movie ID {movie_id}: {e}")
245
- except Exception as ex:
246
- print(f"Unexpected error in get_tmdb_recommendations for movie ID {movie_id}: {ex}")
247
  return recommendations
248
 
 
249
  def generate_saudi_explanation(recommended_movie_title, seed_movie_title, seed_movie_context=""):
250
  global llm_pipeline, llm_tokenizer
251
  if llm_pipeline is None or llm_tokenizer is None:
@@ -254,55 +272,59 @@ def generate_saudi_explanation(recommended_movie_title, seed_movie_title, seed_m
254
  max_context_len = 150
255
  seed_movie_context_short = (seed_movie_context[:max_context_len] + "...") if len(seed_movie_context) > max_context_len else seed_movie_context
256
 
257
- prompt_template = f"""<s>[INST] أنت ناقد أفلام سعودي خبير ودمك خفيف. المستخدم أعجب بالفيلم "{seed_movie_title}".
258
- سبب إعجابه بالفيلم الأول (إذا متوفر): "{seed_movie_context_short}"
259
- بناءً على ذلك، نُرشح له فيلم "{recommended_movie_title}".
260
- اكتب جملة أو جملتين باللهجة السعودية العامية، تشرح ليش ممكن يعجبه الفيلم الجديد "{recommended_movie_title}"، مع ربطها بالفيلم اللي عجبه "{seed_movie_title}". خلي كلامك وناسة ويشد الواحد وما يكون طويل. لا تذكر أبداً أنك نموذج لغوي أو ذكاء اصطناعي.
261
-
262
- مثال للأسلوب المطلوب (لو الفيلم اللي عجبه "Mad Max: Fury Road" والفيلم المرشح "Dune"):
263
- "يا طويل العمر، شفت كيف 'Mad Max: Fury Road' عجّبك بجوّه الصحراوي والأكشن اللي ما يوقّف؟ أجل اسمع، 'Dune' بيوديك لصحراء ثانية بس أعظم وأفخم، وقصة تحبس الأنفاس! شد حيلك وشوفه."
264
-
265
- الآن، الفيلم الذي أعجب المستخدم هو: "{seed_movie_title}"
266
- سبب إعجابه بالفيلم الأول (إذا متوفر): "{seed_movie_context_short}"
267
- الفيلم المرشح: "{recommended_movie_title}"
268
- اشرح باللهجة السعودية: [/INST]"""
 
 
 
269
 
270
  try:
271
  sequences = llm_pipeline(
272
- prompt_template, do_sample=True, top_k=10, num_return_sequences=1,
273
- eos_token_id=llm_tokenizer.eos_token_id, max_new_tokens=120 # Increased slightly
 
 
274
  )
275
  explanation = sequences[0]['generated_text'].split("[/INST]")[-1].strip()
276
- explanation = re.sub(r"^اشرح باللهجة السعودية:\s*", "", explanation, flags=re.IGNORECASE)
277
  explanation = explanation.replace("<s>", "").replace("</s>", "").strip()
278
- if not explanation or explanation.lower().startswith("أنت ناقد أفلام"):
 
 
 
279
  return f"شكلك بتنبسط على فيلم '{recommended_movie_title}' لأنه يشبه جو فيلم '{seed_movie_title}' اللي حبيته! عطيه تجربة."
280
  return explanation
281
  except Exception as e:
282
- print(f"Error during LLM generation: {e}")
283
  return f"يا كابتن، شكلك بتحب '{recommended_movie_title}'، خاصة إنك استمتعت بـ'{seed_movie_title}'. جربه وعطنا رأيك!"
284
 
 
285
  def get_recommendations(progress=gr.Progress()):
286
- if not TMDB_API_KEY or TMDB_API_KEY == "442a13f1865d8936f95aa20737e6f6f5" and not os.environ.get("TMDB_API_KEY"):
287
- # This condition means the fallback key is used AND no env var was set.
288
- # It's a bit redundant with the earlier check but emphasizes if the hardcoded one is active without an override.
289
- print("Warning: Using fallback TMDB API Key. For production, set as environment variable/secret.")
290
-
291
- if not TMDB_API_KEY: # Final check if it's truly None
292
- return "<p style='color:red; text-align:right;'>خطأ: مفتاح TMDB API مو موجود. الرجاء إضافته كـ Secret في Hugging Face Space.</p>"
293
-
294
-
295
  if not all([df_profile_global is not None, df_watched_global is not None, seed_movies_global]):
296
- return "<p style='color:red; text-align:right;'>خطأ: فشل في تحميل بيانات المستخدم. تأكد من رفع ملفات CSV بشكل صحيح.</p>"
297
 
 
298
  if llm_pipeline is None:
299
- initialize_llm()
300
- if llm_pipeline is None:
301
- return "<p style='color:red; text-align:right;'>خطأ: فشل في تهيئة نموذج الذكاء الاصطناعي. حاول تحديث الصفحة.</p>"
302
 
303
- progress(0.1, desc="نجمع أفلامك المفضلة واللي قيمتها عالي...")
304
  potential_recs = {}
305
- seeds_to_process = seed_movies_global[:30]
306
 
307
  for i, seed_movie in enumerate(seeds_to_process):
308
  progress(0.1 + (i / len(seeds_to_process)) * 0.4, desc=f"نبحث عن توصيات بناءً على: {seed_movie['name']}")
@@ -310,7 +332,7 @@ def get_recommendations(progress=gr.Progress()):
310
  if seed_tmdb_details and seed_tmdb_details.get('id'):
311
  tmdb_recs = get_tmdb_recommendations(seed_tmdb_details['id'])
312
  for rec in tmdb_recs:
313
- try: # Add try-except for year conversion
314
  rec_tuple = (str(rec['title']), int(rec['year']))
315
  if rec.get('id') and rec_tuple not in all_watched_titles_global and rec_tuple not in watchlist_titles_global:
316
  if rec['id'] not in potential_recs:
@@ -318,13 +340,9 @@ def get_recommendations(progress=gr.Progress()):
318
  'movie_info': rec, 'seed_movie_title': seed_movie['name'],
319
  'seed_movie_context': seed_movie.get('review_text', '') or seed_movie.get('comment_text', '')
320
  }
321
- except ValueError:
322
- # print(f"Warning: Could not parse year for recommended movie {rec.get('title')}. Skipping.")
323
- continue # Skip if year is not a valid integer
324
-
325
-
326
  if not potential_recs:
327
- return "<p style='text-align:right;'>ما لقينا توصيات جديدة لك حالياً. يمكن شفت كل شيء رهيب! 😉</p>"
328
 
329
  sorted_recs_list = sorted(potential_recs.values(), key=lambda x: x['movie_info'].get('popularity', 0), reverse=True)
330
  final_recommendations_data = []
@@ -334,13 +352,11 @@ def get_recommendations(progress=gr.Progress()):
334
  if rec_data['movie_info']['id'] not in displayed_ids:
335
  final_recommendations_data.append(rec_data)
336
  displayed_ids.add(rec_data['movie_info']['id'])
337
-
338
  if not final_recommendations_data:
339
- return "<p style='text-align:right;'>ما لقينا توصيات جديدة لك حالياً بعد الفلترة. يمكن شفت كل شيء رهيب! 😉</p>"
340
 
341
  output_html = "<div>"
342
  progress(0.6, desc="نجهز لك الشرح باللغة العامية...")
343
-
344
  for i, rec_data in enumerate(final_recommendations_data):
345
  progress(0.6 + (i / len(final_recommendations_data)) * 0.4, desc=f"نكتب شرح لفيلم: {rec_data['movie_info']['title']}")
346
  explanation = generate_saudi_explanation(
@@ -349,7 +365,6 @@ def get_recommendations(progress=gr.Progress()):
349
  poster_url = rec_data['movie_info']['poster_path']
350
  if not poster_url or "placeholder.com" in poster_url:
351
  poster_url = f"https://via.placeholder.com/300x450.png?text={rec_data['movie_info']['title'].replace(' ', '+')}"
352
-
353
  output_html += f"""
354
  <div style="display: flex; flex-direction: row-reverse; align-items: flex-start; margin-bottom: 25px; border-bottom: 1px solid #ddd; padding-bottom:15px; background-color: #f9f9f9; border-radius: 8px; padding: 15px;">
355
  <img src="{poster_url}" alt="{rec_data['movie_info']['title']}" style="width: 150px; max-width:30%; height: auto; margin-left: 20px; border-radius: 5px; box-shadow: 2px 2px 5px rgba(0,0,0,0.1);">
@@ -369,15 +384,13 @@ body { font-family: 'Tajawal', sans-serif; }
369
  footer { display: none !important; }
370
  .gr-button { background-color: #c70039 !important; color: white !important; font-size: 1.2em !important; padding: 10px 20px !important; border-radius: 8px !important; }
371
  .gr-button:hover { background-color: #a3002f !important; }
372
- .gr-input { text-align: right !important; }
373
- .gr-output { text-align: right !important; }
374
  h1, h3 { color: #900c3f !important; }
375
- """
376
 
377
  data_loaded_successfully = load_all_data()
378
  if data_loaded_successfully:
379
  print("All user data loaded and preprocessed successfully.")
380
- initialize_llm() # Initialize LLM after data loading
381
  else:
382
  print("Failed to load user data. The app might not function correctly.")
383
 
@@ -392,10 +405,13 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="red", secondary_hue="pink"), cs
392
  recommend_button = gr.Button("عطني توصيات أفلام!")
393
  with gr.Column():
394
  output_recommendations = gr.HTML(label="توصياتك النارية 🔥")
 
 
 
 
 
395
 
396
- recommend_button.click(
397
- fn=get_recommendations, inputs=[], outputs=[output_recommendations]
398
- )
399
  gr.Markdown(
400
  """
401
  <div style="text-align: center; margin-top: 30px; font-size: 0.9em; color: #777;">
@@ -404,7 +420,6 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="red", secondary_hue="pink"), cs
404
  )
405
 
406
  if __name__ == "__main__":
407
- if not TMDB_API_KEY or TMDB_API_KEY == "442a13f1865d8936f95aa20737e6f6f5" and not os.environ.get("TMDB_API_KEY"):
408
  print("\nWARNING: TMDB_API_KEY is using the hardcoded fallback or is missing.")
409
- print("For deployment or regular use, please set it as an environment variable or a Hugging Face Secret.\n")
410
- iface.launch(debug=True)
 
10
  import gradio as gr
11
  import time
12
 
13
+ # Opt-in to future pandas behavior to potentially silence the downcasting warning
14
+ # pd.set_option('future.no_silent_downcasting', True) # You can uncomment this if you wish
15
+
16
  # --- Configuration ---
17
  load_dotenv()
18
  TMDB_API_KEY = os.environ.get("TMDB_API_KEY", "442a13f1865d8936f95aa20737e6f6f5")
19
+ HF_TOKEN = os.environ.get("HF_TOKEN") # CRUCIAL for gated models
20
 
21
+ # CORRECTED MODEL NAME
22
+ MODEL_NAME = "ALLaM-AI/ALLaM-7B-Instruct-preview"
23
 
24
  BASE_TMDB_URL = "https://api.themoviedb.org/3"
25
  POSTER_BASE_URL = "https://image.tmdb.org/t/p/w500"
 
26
  NUM_RECOMMENDATIONS_TO_DISPLAY = 5
27
  MIN_RATING_FOR_SEED = 3.5
28
  MIN_VOTE_COUNT_TMDB = 100
29
 
30
+ # --- Global Variables ---
31
  df_profile_global = None
32
  df_watchlist_global = None
33
  df_reviews_global = None
34
  df_diary_global = None
35
  df_ratings_global = None
36
+ df_watched_global = None
37
 
38
  uri_to_movie_map_global = {}
39
  all_watched_titles_global = set()
 
46
 
47
  # --- Helper Functions ---
48
  def clean_html(raw_html):
49
+ if pd.isna(raw_html) or raw_html is None: return ""
 
50
  text = str(raw_html)
51
  text = re.sub(r'<br\s*/?>', '\n', text)
52
  soup = BeautifulSoup(text, "html.parser")
 
56
  uri_map = {}
57
  df_priority = ['reviews.csv', 'diary.csv', 'ratings.csv', 'watched.csv', 'watchlist.csv']
58
  processed_uris = set()
 
59
  for df_name in df_priority:
60
  df = dfs_dict.get(df_name)
61
  if df is not None and 'Letterboxd URI' in df.columns and 'Name' in df.columns and 'Year' in df.columns:
 
67
  year = int(row['Year'])
68
  uri_map[uri] = (str(row['Name']), year)
69
  processed_uris.add(uri)
70
+ except ValueError: pass
 
71
  return uri_map
72
 
73
  def load_all_data():
 
77
 
78
  try:
79
  df_profile_global = pd.read_csv("profile.csv")
 
80
  df_watchlist_global = pd.read_csv("watchlist.csv")
81
  df_reviews_global = pd.read_csv("reviews.csv")
82
  df_diary_global = pd.read_csv("diary.csv")
83
  df_ratings_global = pd.read_csv("ratings.csv")
84
  _df_watched_log = pd.read_csv("watched.csv")
85
  except FileNotFoundError as e:
86
+ print(f"ERROR: CSV file not found: {e}.")
87
  return False
88
 
89
  dfs_for_uri_map = {
 
114
  consolidated.drop(columns=['Rating_simple'], inplace=True)
115
 
116
  watched_log_subset = _df_watched_log[['Letterboxd URI', 'Name', 'Year']].copy()
117
+ watched_log_subset['from_watched_log'] = True # This column is an object/boolean dtype
118
  consolidated = pd.merge(consolidated, watched_log_subset, on=['Letterboxd URI', 'Name', 'Year'], how='outer')
119
+
120
+ # Address the FutureWarning directly or use pd.set_option
121
+ # This ensures 'from_watched_log' becomes boolean after fillna
122
+ consolidated['from_watched_log'] = consolidated['from_watched_log'].fillna(False).astype(bool)
123
+
124
 
125
  consolidated['Review Text'] = consolidated['Review Text'].fillna('').apply(clean_html)
126
  consolidated['Year'] = pd.to_numeric(consolidated['Year'], errors='coerce').astype('Int64')
 
131
  all_watched_titles_global = set(zip(df_watched_global['Name'].astype(str), df_watched_global['Year'].astype(int)))
132
  for _, row in _df_watched_log.iterrows():
133
  if pd.notna(row['Name']) and pd.notna(row['Year']):
134
+ try: all_watched_titles_global.add((str(row['Name']), int(row['Year'])))
 
135
  except ValueError: pass
136
 
137
  if df_watchlist_global is not None:
 
164
  'review_text': row['Review Text'], 'uri': row['Letterboxd URI']
165
  })
166
  temp_df = pd.DataFrame(seed_movies_global)
167
+ if not temp_df.empty: # Check if DataFrame is not empty before dropping duplicates
168
+ temp_df.drop_duplicates(subset=['name', 'year'], keep='first', inplace=True)
169
+ seed_movies_global = temp_df.to_dict('records')
170
+ else:
171
+ seed_movies_global = [] # Ensure it's an empty list if temp_df was empty
172
+
173
  random.shuffle(seed_movies_global)
174
  return True
175
 
 
177
  global llm_pipeline, llm_tokenizer
178
  if llm_pipeline is None:
179
  print(f"Initializing LLM: {MODEL_NAME}")
180
+ if not HF_TOKEN:
181
+ print("WARNING: HF_TOKEN not found. Access to gated models like ALLaM will fail.")
182
+ # Optionally, you could prevent the attempt to load if no token,
183
+ # or let it try and fail, as it currently does.
184
+ # return # uncomment to stop here if no token
185
+
186
  try:
187
+ llm_tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True, token=HF_TOKEN)
188
  model = AutoModelForCausalLM.from_pretrained(
189
+ MODEL_NAME,
190
+ torch_dtype=torch.float16,
191
+ device_map="auto",
192
+ load_in_8bit=True,
193
+ trust_remote_code=True,
194
+ token=HF_TOKEN
195
  )
196
+ if llm_tokenizer.pad_token is None:
197
+ llm_tokenizer.pad_token = llm_tokenizer.eos_token
198
+ model.config.pad_token_id = model.config.eos_token_id
199
+
200
  llm_pipeline = pipeline(
201
+ "text-generation", model=model, tokenizer=llm_tokenizer
 
202
  )
203
+ print(f"LLM ({MODEL_NAME}) Initialized Successfully.")
204
  except Exception as e:
205
+ print(f"Error initializing LLM ({MODEL_NAME}): {e}")
206
  llm_pipeline = None
207
 
208
+
209
+ # --- TMDB API Functions ---
210
  def search_tmdb_movie_details(title, year):
211
+ if not TMDB_API_KEY or TMDB_API_KEY == "YOUR_TMDB_API_KEY_FALLBACK":
212
  print("TMDB API Key not properly configured.")
213
  return None
214
  try:
 
232
  'vote_average': movie.get('vote_average'), 'vote_count': movie.get('vote_count'),
233
  'popularity': movie.get('popularity')
234
  }
235
+ time.sleep(0.25)
236
+ except requests.RequestException as e: print(f"Error searching TMDB for {title} ({year}): {e}")
237
+ except Exception as ex: print(f"Unexpected error in search_tmdb_movie_details for {title} ({year}): {ex}")
 
 
238
  return None
239
 
240
  def get_tmdb_recommendations(movie_id, page=1):
241
+ if not TMDB_API_KEY or TMDB_API_KEY == "YOUR_TMDB_API_KEY_FALLBACK":
242
  print("TMDB API Key not properly configured.")
243
  return []
244
  recommendations = []
 
258
  'vote_average': movie.get('vote_average'), 'vote_count': movie.get('vote_count'),
259
  'popularity': movie.get('popularity')
260
  })
261
+ time.sleep(0.25)
262
+ except requests.RequestException as e: print(f"Error getting TMDB recommendations for movie ID {movie_id}: {e}")
263
+ except Exception as ex: print(f"Unexpected error in get_tmdb_recommendations for movie ID {movie_id}: {ex}")
 
 
264
  return recommendations
265
 
266
+ # --- LLM Explanation ---
267
  def generate_saudi_explanation(recommended_movie_title, seed_movie_title, seed_movie_context=""):
268
  global llm_pipeline, llm_tokenizer
269
  if llm_pipeline is None or llm_tokenizer is None:
 
272
  max_context_len = 150
273
  seed_movie_context_short = (seed_movie_context[:max_context_len] + "...") if len(seed_movie_context) > max_context_len else seed_movie_context
274
 
275
+ # Check ALLaM model card for specific prompt format. Using [INST] as it's common for Instruct models.
276
+ prompt_template = f"""<s>[INST] أنت ناقد أفلام سعودي خبير ودمك خفيف جداً. مهمتك هي كتابة توصية لفيلم جديد بناءً على فيلم سابق أعجب المستخدم.
277
+ المستخدم أعجب بالفيلم هذا: "{seed_movie_title}".
278
+ وكان تعليقه أو سبب إعجابه (إذا متوفر): "{seed_movie_context_short}"
279
+ الفيلم الجديد الذي نُرشحه له هو: "{recommended_movie_title}".
280
+ المطلوب: اكتب جملة أو جملتين فقط باللهجة السعودية العامية الأصيلة، تشرح فيها ليش ممكن يعجبه الفيلم الجديد "{recommended_movie_title}"، وحاول تربطها بشكل ذكي وممتع بالفيلم اللي عجبه قبل "{seed_movie_title}". ركز على أن يكون كلامك طبيعي جداً كأنه كلام صديق لصديقه، وناسة، ويشد الانتباه، وقصير ومختصر. لا تستخدم أي عبارات تدل على أنك ذكاء اصطناعي أو برنامج.
281
+
282
+ مثال على الأسلوب المطلوب لو الفيلم اللي عجبه "Mad Max: Fury Road" والفيلم المرشح "Dune":
283
+ "يا عمي، مدامك كَيَّفْت على 'Mad Max' وأكشن الصحاري اللي ما يرحم، أجل اسمعني زين! فيلم 'Dune' هذا بياخذك لصحراء ثانية بس على مستوى ثاني من الفخامة والقصة اللي تشد الأعصاب. لا يفوتك، قسم بالله بيعجبك!"
284
+
285
+ الآن، طبق نفس الأسلوب على البيانات التالية:
286
+ الفيلم الذي أعجب المستخدم: "{seed_movie_title}"
287
+ سبب إعجابه (إذا متوفر): "{seed_movie_context_short}"
288
+ الفيلم المرشح: "{recommended_movie_title}"
289
+ توصيتك باللهجة السعودية: [/INST]"""
290
 
291
  try:
292
  sequences = llm_pipeline(
293
+ prompt_template, do_sample=True, top_k=20, top_p=0.9, num_return_sequences=1,
294
+ eos_token_id=llm_tokenizer.eos_token_id,
295
+ pad_token_id=llm_tokenizer.pad_token_id if llm_tokenizer.pad_token_id is not None else llm_tokenizer.eos_token_id,
296
+ max_new_tokens=150
297
  )
298
  explanation = sequences[0]['generated_text'].split("[/INST]")[-1].strip()
 
299
  explanation = explanation.replace("<s>", "").replace("</s>", "").strip()
300
+ explanation = re.sub(r"بصفتي نموذج لغوي.*?\s*,?\s*", "", explanation, flags=re.IGNORECASE)
301
+ explanation = re.sub(r"كنموذج لغوي.*?\s*,?\s*", "", explanation, flags=re.IGNORECASE)
302
+
303
+ if not explanation or explanation.lower().startswith("أنت ناقد أفلام") or len(explanation) < 20 :
304
  return f"شكلك بتنبسط على فيلم '{recommended_movie_title}' لأنه يشبه جو فيلم '{seed_movie_title}' اللي حبيته! عطيه تجربة."
305
  return explanation
306
  except Exception as e:
307
+ print(f"Error during LLM generation with {MODEL_NAME}: {e}")
308
  return f"يا كابتن، شكلك بتحب '{recommended_movie_title}'، خاصة إنك استمتعت بـ'{seed_movie_title}'. جربه وعطنا رأيك!"
309
 
310
+ # --- Recommendation Logic ---
311
  def get_recommendations(progress=gr.Progress()):
312
+ if not TMDB_API_KEY or (TMDB_API_KEY == "442a13f1865d8936f95aa20737e6f6f5" and not os.environ.get("TMDB_API_KEY")):
313
+ print("Warning: Using fallback TMDB API Key.")
314
+ if not TMDB_API_KEY:
315
+ return "<p style='color:red; text-align:right;'>خطأ: مفتاح TMDB API مو موجود.</p>"
 
 
 
 
 
316
  if not all([df_profile_global is not None, df_watched_global is not None, seed_movies_global]):
317
+ return "<p style='color:red; text-align:right;'>خطأ: فشل في تحميل بيانات المستخدم.</p>"
318
 
319
+ # Ensure LLM is initialized before trying to use it
320
  if llm_pipeline is None:
321
+ initialize_llm() # Attempt to initialize if not already done
322
+ if llm_pipeline is None: # Check again if initialization failed
323
+ return "<p style='color:red; text-align:right;'>خطأ: فشل في تهيئة نموذج الذكاء الاصطناعي. تأكد من وجود HF_TOKEN وأن لديك صلاحية الوصول للنموذج.</p>"
324
 
325
+ progress(0.1, desc="نجمع أفلامك المفضلة...")
326
  potential_recs = {}
327
+ seeds_to_process = seed_movies_global[:25]
328
 
329
  for i, seed_movie in enumerate(seeds_to_process):
330
  progress(0.1 + (i / len(seeds_to_process)) * 0.4, desc=f"نبحث عن توصيات بناءً على: {seed_movie['name']}")
 
332
  if seed_tmdb_details and seed_tmdb_details.get('id'):
333
  tmdb_recs = get_tmdb_recommendations(seed_tmdb_details['id'])
334
  for rec in tmdb_recs:
335
+ try:
336
  rec_tuple = (str(rec['title']), int(rec['year']))
337
  if rec.get('id') and rec_tuple not in all_watched_titles_global and rec_tuple not in watchlist_titles_global:
338
  if rec['id'] not in potential_recs:
 
340
  'movie_info': rec, 'seed_movie_title': seed_movie['name'],
341
  'seed_movie_context': seed_movie.get('review_text', '') or seed_movie.get('comment_text', '')
342
  }
343
+ except (ValueError, TypeError): continue # Catch TypeError if year is None
 
 
 
 
344
  if not potential_recs:
345
+ return "<p style='text-align:right;'>ما لقينا توصيات جديدة لك حالياً. 😉</p>"
346
 
347
  sorted_recs_list = sorted(potential_recs.values(), key=lambda x: x['movie_info'].get('popularity', 0), reverse=True)
348
  final_recommendations_data = []
 
352
  if rec_data['movie_info']['id'] not in displayed_ids:
353
  final_recommendations_data.append(rec_data)
354
  displayed_ids.add(rec_data['movie_info']['id'])
 
355
  if not final_recommendations_data:
356
+ return "<p style='text-align:right;'>ما لقينا توصيات جديدة لك حالياً بعد الفلترة. 😉</p>"
357
 
358
  output_html = "<div>"
359
  progress(0.6, desc="نجهز لك الشرح باللغة العامية...")
 
360
  for i, rec_data in enumerate(final_recommendations_data):
361
  progress(0.6 + (i / len(final_recommendations_data)) * 0.4, desc=f"نكتب شرح لفيلم: {rec_data['movie_info']['title']}")
362
  explanation = generate_saudi_explanation(
 
365
  poster_url = rec_data['movie_info']['poster_path']
366
  if not poster_url or "placeholder.com" in poster_url:
367
  poster_url = f"https://via.placeholder.com/300x450.png?text={rec_data['movie_info']['title'].replace(' ', '+')}"
 
368
  output_html += f"""
369
  <div style="display: flex; flex-direction: row-reverse; align-items: flex-start; margin-bottom: 25px; border-bottom: 1px solid #ddd; padding-bottom:15px; background-color: #f9f9f9; border-radius: 8px; padding: 15px;">
370
  <img src="{poster_url}" alt="{rec_data['movie_info']['title']}" style="width: 150px; max-width:30%; height: auto; margin-left: 20px; border-radius: 5px; box-shadow: 2px 2px 5px rgba(0,0,0,0.1);">
 
384
  footer { display: none !important; }
385
  .gr-button { background-color: #c70039 !important; color: white !important; font-size: 1.2em !important; padding: 10px 20px !important; border-radius: 8px !important; }
386
  .gr-button:hover { background-color: #a3002f !important; }
 
 
387
  h1, h3 { color: #900c3f !important; }
388
+ """ # Removed .gr-input and .gr-output as they aren't used directly for styling here
389
 
390
  data_loaded_successfully = load_all_data()
391
  if data_loaded_successfully:
392
  print("All user data loaded and preprocessed successfully.")
393
+ # LLM will be initialized on first click if not already
394
  else:
395
  print("Failed to load user data. The app might not function correctly.")
396
 
 
405
  recommend_button = gr.Button("عطني توصيات أفلام!")
406
  with gr.Column():
407
  output_recommendations = gr.HTML(label="توصياتك النارية 🔥")
408
+
409
+ # Call initialize_llm once when the interface is defined if data loaded successfully
410
+ # This way, it tries to load the LLM when the app starts, not just on the first click.
411
+ if data_loaded_successfully:
412
+ initialize_llm() # Moved initialization here
413
 
414
+ recommend_button.click(fn=get_recommendations, inputs=[], outputs=[output_recommendations])
 
 
415
  gr.Markdown(
416
  """
417
  <div style="text-align: center; margin-top: 30px; font-size: 0.9em; color: #777;">
 
420
  )
421
 
422
  if __name__ == "__main__":
423
+ if not TMDB_API_KEY or (TMDB_API_KEY == "442a13f1865d8936f95aa20737e6f6f5" and not os.environ.get("TMDB_API_KEY")):
424
  print("\nWARNING: TMDB_API_KEY is using the hardcoded fallback or is missing.")
425
+ iface.launch(debug=True) # Set debug=False for production or normal HF Space operation