Spaces:

s2049
/

LetterboxdRecommender

Running

App Files Files Community

s2049 commited on May 9

Commit

b90f766

verified ·

1 Parent(s): cbab1da

Update app.py

Browse files

Files changed (1) hide show

app.py +105 -90

app.py CHANGED Viewed

@@ -10,27 +10,30 @@ import torch
 import gradio as gr
 import time
 # --- Configuration ---
 load_dotenv()
 TMDB_API_KEY = os.environ.get("TMDB_API_KEY", "442a13f1865d8936f95aa20737e6f6f5")
-HF_TOKEN = os.environ.get("HF_TOKEN")
-MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.1"
 BASE_TMDB_URL = "https://api.themoviedb.org/3"
 POSTER_BASE_URL = "https://image.tmdb.org/t/p/w500"
-NUM_RECOMMENDATIONS_TO_GENERATE = 20
 NUM_RECOMMENDATIONS_TO_DISPLAY = 5
 MIN_RATING_FOR_SEED = 3.5
 MIN_VOTE_COUNT_TMDB = 100
-# --- Global Variables for Data (Load once) ---
 df_profile_global = None
 df_watchlist_global = None
 df_reviews_global = None
 df_diary_global = None
 df_ratings_global = None
-df_watched_global = None # Consolidated
 uri_to_movie_map_global = {}
 all_watched_titles_global = set()
@@ -43,8 +46,7 @@ llm_tokenizer = None
 # --- Helper Functions ---
 def clean_html(raw_html):
-    if pd.isna(raw_html) or raw_html is None:
-        return ""
     text = str(raw_html)
     text = re.sub(r'<br\s*/?>', '\n', text)
     soup = BeautifulSoup(text, "html.parser")
@@ -54,7 +56,6 @@ def get_movie_uri_map(dfs_dict):
     uri_map = {}
     df_priority = ['reviews.csv', 'diary.csv', 'ratings.csv', 'watched.csv', 'watchlist.csv']
     processed_uris = set()
     for df_name in df_priority:
         df = dfs_dict.get(df_name)
         if df is not None and 'Letterboxd URI' in df.columns and 'Name' in df.columns and 'Year' in df.columns:
@@ -66,8 +67,7 @@ def get_movie_uri_map(dfs_dict):
                             year = int(row['Year'])
                             uri_map[uri] = (str(row['Name']), year)
                             processed_uris.add(uri)
-                        except ValueError:
-                            pass
     return uri_map
 def load_all_data():
@@ -77,14 +77,13 @@ def load_all_data():
     try:
         df_profile_global = pd.read_csv("profile.csv")
-        df_comments_global = pd.read_csv("comments.csv") # Loaded but not explicitly used in this version for recs
         df_watchlist_global = pd.read_csv("watchlist.csv")
         df_reviews_global = pd.read_csv("reviews.csv")
         df_diary_global = pd.read_csv("diary.csv")
         df_ratings_global = pd.read_csv("ratings.csv")
         _df_watched_log = pd.read_csv("watched.csv")
     except FileNotFoundError as e:
-        print(f"ERROR: CSV file not found: {e}. Please ensure all CSV files are uploaded to the HF Space.")
         return False
     dfs_for_uri_map = {
@@ -115,9 +114,13 @@ def load_all_data():
     consolidated.drop(columns=['Rating_simple'], inplace=True)
     watched_log_subset = _df_watched_log[['Letterboxd URI', 'Name', 'Year']].copy()
-    watched_log_subset['from_watched_log'] = True
     consolidated = pd.merge(consolidated, watched_log_subset, on=['Letterboxd URI', 'Name', 'Year'], how='outer')
-    consolidated['from_watched_log'] = consolidated['from_watched_log'].fillna(False)
     consolidated['Review Text'] = consolidated['Review Text'].fillna('').apply(clean_html)
     consolidated['Year'] = pd.to_numeric(consolidated['Year'], errors='coerce').astype('Int64')
@@ -128,8 +131,7 @@ def load_all_data():
     all_watched_titles_global = set(zip(df_watched_global['Name'].astype(str), df_watched_global['Year'].astype(int)))
     for _, row in _df_watched_log.iterrows():
         if pd.notna(row['Name']) and pd.notna(row['Year']):
-            try:
-                all_watched_titles_global.add((str(row['Name']), int(row['Year'])))
             except ValueError: pass
     if df_watchlist_global is not None:
@@ -162,8 +164,12 @@ def load_all_data():
                 'review_text': row['Review Text'], 'uri': row['Letterboxd URI']
             })
     temp_df = pd.DataFrame(seed_movies_global)
-    temp_df.drop_duplicates(subset=['name', 'year'], keep='first', inplace=True)
-    seed_movies_global = temp_df.to_dict('records')
     random.shuffle(seed_movies_global)
     return True
@@ -171,23 +177,38 @@ def initialize_llm():
     global llm_pipeline, llm_tokenizer
     if llm_pipeline is None:
         print(f"Initializing LLM: {MODEL_NAME}")
         try:
-            llm_tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
             model = AutoModelForCausalLM.from_pretrained(
-                MODEL_NAME, torch_dtype=torch.float16, device_map="auto",
-                trust_remote_code=True, token=HF_TOKEN if HF_TOKEN else None
             )
             llm_pipeline = pipeline(
-                "text-generation", model=model, tokenizer=llm_tokenizer,
-                torch_dtype=torch.float16, device_map="auto"
             )
-            print("LLM Initialized Successfully.")
         except Exception as e:
-            print(f"Error initializing LLM: {e}")
             llm_pipeline = None
 def search_tmdb_movie_details(title, year):
-    if not TMDB_API_KEY or TMDB_API_KEY == "YOUR_TMDB_API_KEY_FALLBACK": # Fallback check
         print("TMDB API Key not properly configured.")
         return None
     try:
@@ -211,15 +232,13 @@ def search_tmdb_movie_details(title, year):
                 'vote_average': movie.get('vote_average'), 'vote_count': movie.get('vote_count'),
                 'popularity': movie.get('popularity')
             }
-        time.sleep(0.25) # Adjusted delay
-    except requests.RequestException as e:
-        print(f"Error searching TMDB for {title} ({year}): {e}")
-    except Exception as ex:
-        print(f"Unexpected error in search_tmdb_movie_details for {title} ({year}): {ex}")
     return None
 def get_tmdb_recommendations(movie_id, page=1):
-    if not TMDB_API_KEY or TMDB_API_KEY == "YOUR_TMDB_API_KEY_FALLBACK": # Fallback check
         print("TMDB API Key not properly configured.")
         return []
     recommendations = []
@@ -239,13 +258,12 @@ def get_tmdb_recommendations(movie_id, page=1):
                     'vote_average': movie.get('vote_average'), 'vote_count': movie.get('vote_count'),
                     'popularity': movie.get('popularity')
                 })
-        time.sleep(0.25) # Adjusted delay
-    except requests.RequestException as e:
-        print(f"Error getting TMDB recommendations for movie ID {movie_id}: {e}")
-    except Exception as ex:
-        print(f"Unexpected error in get_tmdb_recommendations for movie ID {movie_id}: {ex}")
     return recommendations
 def generate_saudi_explanation(recommended_movie_title, seed_movie_title, seed_movie_context=""):
     global llm_pipeline, llm_tokenizer
     if llm_pipeline is None or llm_tokenizer is None:
@@ -254,55 +272,59 @@ def generate_saudi_explanation(recommended_movie_title, seed_movie_title, seed_m
     max_context_len = 150
     seed_movie_context_short = (seed_movie_context[:max_context_len] + "...") if len(seed_movie_context) > max_context_len else seed_movie_context
-    prompt_template = f"""<s>[INST] أنت ناقد أفلام سعودي خبير ودمك خفيف. المستخدم أعجب بالفيلم "{seed_movie_title}".
-سبب إعجابه بالفيلم الأول (إذا متوفر): "{seed_movie_context_short}"
-بناءً على ذلك، نُرشح له فيلم "{recommended_movie_title}".
-اكتب جملة أو جملتين باللهجة السعودية العامية، تشرح ليش ممكن يعجبه الفيلم الجديد "{recommended_movie_title}"، مع ربطها بالفيلم اللي عجبه "{seed_movie_title}". خلي كلامك وناسة ويشد الواحد وما يكون طويل. لا تذكر أبداً أنك نموذج لغوي أو ذكاء اصطناعي.
-مثال للأسلوب المطلوب (لو الفيلم اللي عجبه "Mad Max: Fury Road" والفيلم المرشح "Dune"):
-"يا طويل العمر، شفت كيف 'Mad Max: Fury Road' عجّبك بجوّه الصحراوي والأكشن اللي ما يوقّف؟ أجل اسمع، 'Dune' بيوديك لصحراء ثانية بس أعظم وأفخم، وقصة تحبس الأنفاس! شد حيلك وشوفه."
-الآن، الفيلم الذي أعجب المستخدم هو: "{seed_movie_title}"
-سبب إعجابه بالفيلم الأول (إذا متوفر): "{seed_movie_context_short}"
-الفيلم المرشح: "{recommended_movie_title}"
-اشرح باللهجة السعودية: [/INST]"""
     try:
         sequences = llm_pipeline(
-            prompt_template, do_sample=True, top_k=10, num_return_sequences=1,
-            eos_token_id=llm_tokenizer.eos_token_id, max_new_tokens=120 # Increased slightly
         )
         explanation = sequences[0]['generated_text'].split("[/INST]")[-1].strip()
-        explanation = re.sub(r"^اشرح باللهجة السعودية:\s*", "", explanation, flags=re.IGNORECASE)
         explanation = explanation.replace("<s>", "").replace("</s>", "").strip()
-        if not explanation or explanation.lower().startswith("أنت ناقد أفلام"):
             return f"شكلك بتنبسط على فيلم '{recommended_movie_title}' لأنه يشبه جو فيلم '{seed_movie_title}' اللي حبيته! عطيه تجربة."
         return explanation
     except Exception as e:
-        print(f"Error during LLM generation: {e}")
         return f"يا كابتن، شكلك بتحب '{recommended_movie_title}'، خاصة إنك استمتعت بـ'{seed_movie_title}'. جربه وعطنا رأيك!"
 def get_recommendations(progress=gr.Progress()):
-    if not TMDB_API_KEY or TMDB_API_KEY == "442a13f1865d8936f95aa20737e6f6f5" and not os.environ.get("TMDB_API_KEY"):
-         # This condition means the fallback key is used AND no env var was set.
-         # It's a bit redundant with the earlier check but emphasizes if the hardcoded one is active without an override.
-        print("Warning: Using fallback TMDB API Key. For production, set as environment variable/secret.")
-    if not TMDB_API_KEY: # Final check if it's truly None
-        return "<p style='color:red; text-align:right;'>خطأ: مفتاح TMDB API مو موجود. الرجاء إضافته كـ Secret في Hugging Face Space.</p>"
     if not all([df_profile_global is not None, df_watched_global is not None, seed_movies_global]):
-        return "<p style='color:red; text-align:right;'>خطأ: فشل في تحميل بيانات المستخدم. تأكد من رفع ملفات CSV بشكل صحيح.</p>"
     if llm_pipeline is None:
-        initialize_llm()
-        if llm_pipeline is None:
-             return "<p style='color:red; text-align:right;'>خطأ: فشل في تهيئة نموذج الذكاء الاصطناعي. حاول تحديث الصفحة.</p>"
-    progress(0.1, desc="نجمع أفلامك المفضلة واللي قيمتها عالي...")
     potential_recs = {}
-    seeds_to_process = seed_movies_global[:30]
     for i, seed_movie in enumerate(seeds_to_process):
         progress(0.1 + (i / len(seeds_to_process)) * 0.4, desc=f"نبحث عن توصيات بناءً على: {seed_movie['name']}")
@@ -310,7 +332,7 @@ def get_recommendations(progress=gr.Progress()):
         if seed_tmdb_details and seed_tmdb_details.get('id'):
             tmdb_recs = get_tmdb_recommendations(seed_tmdb_details['id'])
             for rec in tmdb_recs:
-                try: # Add try-except for year conversion
                     rec_tuple = (str(rec['title']), int(rec['year']))
                     if rec.get('id') and rec_tuple not in all_watched_titles_global and rec_tuple not in watchlist_titles_global:
                         if rec['id'] not in potential_recs:
@@ -318,13 +340,9 @@ def get_recommendations(progress=gr.Progress()):
                                 'movie_info': rec, 'seed_movie_title': seed_movie['name'],
                                 'seed_movie_context': seed_movie.get('review_text', '') or seed_movie.get('comment_text', '')
                             }
-                except ValueError:
-                    # print(f"Warning: Could not parse year for recommended movie {rec.get('title')}. Skipping.")
-                    continue # Skip if year is not a valid integer
     if not potential_recs:
-        return "<p style='text-align:right;'>ما لقينا توصيات جديدة لك حالياً. يمكن شفت كل شيء رهيب! 😉</p>"
     sorted_recs_list = sorted(potential_recs.values(), key=lambda x: x['movie_info'].get('popularity', 0), reverse=True)
     final_recommendations_data = []
@@ -334,13 +352,11 @@ def get_recommendations(progress=gr.Progress()):
         if rec_data['movie_info']['id'] not in displayed_ids:
             final_recommendations_data.append(rec_data)
             displayed_ids.add(rec_data['movie_info']['id'])
     if not final_recommendations_data:
-         return "<p style='text-align:right;'>ما لقينا توصيات جديدة لك حالياً بعد الفلترة. يمكن شفت كل شيء رهيب! 😉</p>"
     output_html = "<div>"
     progress(0.6, desc="نجهز لك الشرح باللغة العامية...")
     for i, rec_data in enumerate(final_recommendations_data):
         progress(0.6 + (i / len(final_recommendations_data)) * 0.4, desc=f"نكتب شرح لفيلم: {rec_data['movie_info']['title']}")
         explanation = generate_saudi_explanation(
@@ -349,7 +365,6 @@ def get_recommendations(progress=gr.Progress()):
         poster_url = rec_data['movie_info']['poster_path']
         if not poster_url or "placeholder.com" in poster_url:
             poster_url = f"https://via.placeholder.com/300x450.png?text={rec_data['movie_info']['title'].replace(' ', '+')}"
         output_html += f"""
         <div style="display: flex; flex-direction: row-reverse; align-items: flex-start; margin-bottom: 25px; border-bottom: 1px solid #ddd; padding-bottom:15px; background-color: #f9f9f9; border-radius: 8px; padding: 15px;">
             <img src="{poster_url}" alt="{rec_data['movie_info']['title']}" style="width: 150px; max-width:30%; height: auto; margin-left: 20px; border-radius: 5px; box-shadow: 2px 2px 5px rgba(0,0,0,0.1);">
@@ -369,15 +384,13 @@ body { font-family: 'Tajawal', sans-serif; }
 footer { display: none !important; }
 .gr-button { background-color: #c70039 !important; color: white !important; font-size: 1.2em !important; padding: 10px 20px !important; border-radius: 8px !important; }
 .gr-button:hover { background-color: #a3002f !important; }
-.gr-input { text-align: right !important; }
-.gr-output { text-align: right !important; }
 h1, h3 { color: #900c3f !important; }
-"""
 data_loaded_successfully = load_all_data()
 if data_loaded_successfully:
     print("All user data loaded and preprocessed successfully.")
-    initialize_llm() # Initialize LLM after data loading
 else:
     print("Failed to load user data. The app might not function correctly.")
@@ -392,10 +405,13 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="red", secondary_hue="pink"), cs
     recommend_button = gr.Button("عطني توصيات أفلام!")
     with gr.Column():
         output_recommendations = gr.HTML(label="توصياتك النارية 🔥")
-    recommend_button.click(
-        fn=get_recommendations, inputs=[], outputs=[output_recommendations]
-    )
     gr.Markdown(
         """
         <div style="text-align: center; margin-top: 30px; font-size: 0.9em; color: #777;">
@@ -404,7 +420,6 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="red", secondary_hue="pink"), cs
     )
 if __name__ == "__main__":
-    if not TMDB_API_KEY or TMDB_API_KEY == "442a13f1865d8936f95aa20737e6f6f5" and not os.environ.get("TMDB_API_KEY"):
         print("\nWARNING: TMDB_API_KEY is using the hardcoded fallback or is missing.")
-        print("For deployment or regular use, please set it as an environment variable or a Hugging Face Secret.\n")
-    iface.launch(debug=True)

 import gradio as gr
 import time
+# Opt-in to future pandas behavior to potentially silence the downcasting warning
+# pd.set_option('future.no_silent_downcasting', True) # You can uncomment this if you wish
 # --- Configuration ---
 load_dotenv()
 TMDB_API_KEY = os.environ.get("TMDB_API_KEY", "442a13f1865d8936f95aa20737e6f6f5")
+HF_TOKEN = os.environ.get("HF_TOKEN") # CRUCIAL for gated models
+# CORRECTED MODEL NAME
+MODEL_NAME = "ALLaM-AI/ALLaM-7B-Instruct-preview"
 BASE_TMDB_URL = "https://api.themoviedb.org/3"
 POSTER_BASE_URL = "https://image.tmdb.org/t/p/w500"
 NUM_RECOMMENDATIONS_TO_DISPLAY = 5
 MIN_RATING_FOR_SEED = 3.5
 MIN_VOTE_COUNT_TMDB = 100
+# --- Global Variables ---
 df_profile_global = None
 df_watchlist_global = None
 df_reviews_global = None
 df_diary_global = None
 df_ratings_global = None
+df_watched_global = None
 uri_to_movie_map_global = {}
 all_watched_titles_global = set()
 # --- Helper Functions ---
 def clean_html(raw_html):
+    if pd.isna(raw_html) or raw_html is None: return ""
     text = str(raw_html)
     text = re.sub(r'<br\s*/?>', '\n', text)
     soup = BeautifulSoup(text, "html.parser")
     uri_map = {}
     df_priority = ['reviews.csv', 'diary.csv', 'ratings.csv', 'watched.csv', 'watchlist.csv']
     processed_uris = set()
     for df_name in df_priority:
         df = dfs_dict.get(df_name)
         if df is not None and 'Letterboxd URI' in df.columns and 'Name' in df.columns and 'Year' in df.columns:
                             year = int(row['Year'])
                             uri_map[uri] = (str(row['Name']), year)
                             processed_uris.add(uri)
+                        except ValueError: pass
     return uri_map
 def load_all_data():
     try:
         df_profile_global = pd.read_csv("profile.csv")
         df_watchlist_global = pd.read_csv("watchlist.csv")
         df_reviews_global = pd.read_csv("reviews.csv")
         df_diary_global = pd.read_csv("diary.csv")
         df_ratings_global = pd.read_csv("ratings.csv")
         _df_watched_log = pd.read_csv("watched.csv")
     except FileNotFoundError as e:
+        print(f"ERROR: CSV file not found: {e}.")
         return False
     dfs_for_uri_map = {
     consolidated.drop(columns=['Rating_simple'], inplace=True)
     watched_log_subset = _df_watched_log[['Letterboxd URI', 'Name', 'Year']].copy()
+    watched_log_subset['from_watched_log'] = True # This column is an object/boolean dtype
     consolidated = pd.merge(consolidated, watched_log_subset, on=['Letterboxd URI', 'Name', 'Year'], how='outer')
+    # Address the FutureWarning directly or use pd.set_option
+    # This ensures 'from_watched_log' becomes boolean after fillna
+    consolidated['from_watched_log'] = consolidated['from_watched_log'].fillna(False).astype(bool)
     consolidated['Review Text'] = consolidated['Review Text'].fillna('').apply(clean_html)
     consolidated['Year'] = pd.to_numeric(consolidated['Year'], errors='coerce').astype('Int64')
     all_watched_titles_global = set(zip(df_watched_global['Name'].astype(str), df_watched_global['Year'].astype(int)))
     for _, row in _df_watched_log.iterrows():
         if pd.notna(row['Name']) and pd.notna(row['Year']):
+            try: all_watched_titles_global.add((str(row['Name']), int(row['Year'])))
             except ValueError: pass
     if df_watchlist_global is not None:
                 'review_text': row['Review Text'], 'uri': row['Letterboxd URI']
             })
     temp_df = pd.DataFrame(seed_movies_global)
+    if not temp_df.empty: # Check if DataFrame is not empty before dropping duplicates
+        temp_df.drop_duplicates(subset=['name', 'year'], keep='first', inplace=True)
+        seed_movies_global = temp_df.to_dict('records')
+    else:
+        seed_movies_global = [] # Ensure it's an empty list if temp_df was empty
     random.shuffle(seed_movies_global)
     return True
     global llm_pipeline, llm_tokenizer
     if llm_pipeline is None:
         print(f"Initializing LLM: {MODEL_NAME}")
+        if not HF_TOKEN:
+            print("WARNING: HF_TOKEN not found. Access to gated models like ALLaM will fail.")
+            # Optionally, you could prevent the attempt to load if no token,
+            # or let it try and fail, as it currently does.
+            # return # uncomment to stop here if no token
         try:
+            llm_tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True, token=HF_TOKEN)
             model = AutoModelForCausalLM.from_pretrained(
+                MODEL_NAME,
+                torch_dtype=torch.float16,
+                device_map="auto",
+                load_in_8bit=True,
+                trust_remote_code=True,
+                token=HF_TOKEN
             )
+            if llm_tokenizer.pad_token is None:
+                llm_tokenizer.pad_token = llm_tokenizer.eos_token
+                model.config.pad_token_id = model.config.eos_token_id
             llm_pipeline = pipeline(
+                "text-generation", model=model, tokenizer=llm_tokenizer
             )
+            print(f"LLM ({MODEL_NAME}) Initialized Successfully.")
         except Exception as e:
+            print(f"Error initializing LLM ({MODEL_NAME}): {e}")
             llm_pipeline = None
+# --- TMDB API Functions ---
 def search_tmdb_movie_details(title, year):
+    if not TMDB_API_KEY or TMDB_API_KEY == "YOUR_TMDB_API_KEY_FALLBACK":
         print("TMDB API Key not properly configured.")
         return None
     try:
                 'vote_average': movie.get('vote_average'), 'vote_count': movie.get('vote_count'),
                 'popularity': movie.get('popularity')
             }
+        time.sleep(0.25)
+    except requests.RequestException as e: print(f"Error searching TMDB for {title} ({year}): {e}")
+    except Exception as ex: print(f"Unexpected error in search_tmdb_movie_details for {title} ({year}): {ex}")
     return None
 def get_tmdb_recommendations(movie_id, page=1):
+    if not TMDB_API_KEY or TMDB_API_KEY == "YOUR_TMDB_API_KEY_FALLBACK":
         print("TMDB API Key not properly configured.")
         return []
     recommendations = []
                     'vote_average': movie.get('vote_average'), 'vote_count': movie.get('vote_count'),
                     'popularity': movie.get('popularity')
                 })
+        time.sleep(0.25)
+    except requests.RequestException as e: print(f"Error getting TMDB recommendations for movie ID {movie_id}: {e}")
+    except Exception as ex: print(f"Unexpected error in get_tmdb_recommendations for movie ID {movie_id}: {ex}")
     return recommendations
+# --- LLM Explanation ---
 def generate_saudi_explanation(recommended_movie_title, seed_movie_title, seed_movie_context=""):
     global llm_pipeline, llm_tokenizer
     if llm_pipeline is None or llm_tokenizer is None:
     max_context_len = 150
     seed_movie_context_short = (seed_movie_context[:max_context_len] + "...") if len(seed_movie_context) > max_context_len else seed_movie_context
+    # Check ALLaM model card for specific prompt format. Using [INST] as it's common for Instruct models.
+    prompt_template = f"""<s>[INST] أنت ناقد أفلام سعودي خبير ودمك خفيف جداً. مهمتك هي كتابة توصية لفيلم جديد بناءً على فيلم سابق أعجب المستخدم.
+    المستخدم أعجب بالفيلم هذا: "{seed_movie_title}".
+    وكان تعليقه أو سبب إعجابه (إذا متوفر): "{seed_movie_context_short}"
+    الفيلم الجديد الذي نُرشحه له هو: "{recommended_movie_title}".
+    المطلوب: اكتب جملة أو جملتين فقط باللهجة السعودية العامية الأصيلة، تشرح فيها ليش ممكن يعجبه الفيلم الجديد "{recommended_movie_title}"، وحاول تربطها بشكل ذكي وممتع بالفيلم اللي عجبه قبل "{seed_movie_title}". ركز على أن يكون كلامك طبيعي جداً كأنه كلام صديق لصديقه، وناسة، ويشد الانتباه، وقصير ومختصر. لا تستخدم أي عبارات تدل على أنك ذكاء اصطناعي أو برنامج.
+    مثال على الأسلوب المطلوب لو الفيلم اللي عجبه "Mad Max: Fury Road" والفيلم المرشح "Dune":
+    "يا عمي، مدامك كَيَّفْت على 'Mad Max' وأكشن الصحاري اللي ما يرحم، أجل اسمعني زين! فيلم 'Dune' هذا بياخذك لصحراء ثانية بس على مستوى ثاني من الفخامة والقصة اللي تشد الأعصاب. لا يفوتك، قسم بالله بيعجبك!"
+    الآن، طبق نفس الأسلوب على البيانات التالية:
+    الفيلم الذي أعجب المستخدم: "{seed_movie_title}"
+    سبب إعجابه (إذا متوفر): "{seed_movie_context_short}"
+    الفيلم المرشح: "{recommended_movie_title}"
+    توصيتك باللهجة السعودية: [/INST]"""
     try:
         sequences = llm_pipeline(
+            prompt_template, do_sample=True, top_k=20, top_p=0.9, num_return_sequences=1,
+            eos_token_id=llm_tokenizer.eos_token_id,
+            pad_token_id=llm_tokenizer.pad_token_id if llm_tokenizer.pad_token_id is not None else llm_tokenizer.eos_token_id,
+            max_new_tokens=150
         )
         explanation = sequences[0]['generated_text'].split("[/INST]")[-1].strip()
         explanation = explanation.replace("<s>", "").replace("</s>", "").strip()
+        explanation = re.sub(r"بصفتي نموذج لغوي.*?\s*,?\s*", "", explanation, flags=re.IGNORECASE)
+        explanation = re.sub(r"كنموذج لغوي.*?\s*,?\s*", "", explanation, flags=re.IGNORECASE)
+        if not explanation or explanation.lower().startswith("أنت ناقد أفلام") or len(explanation) < 20 :
             return f"شكلك بتنبسط على فيلم '{recommended_movie_title}' لأنه يشبه جو فيلم '{seed_movie_title}' اللي حبيته! عطيه تجربة."
         return explanation
     except Exception as e:
+        print(f"Error during LLM generation with {MODEL_NAME}: {e}")
         return f"يا كابتن، شكلك بتحب '{recommended_movie_title}'، خاصة إنك استمتعت بـ'{seed_movie_title}'. جربه وعطنا رأيك!"
+# --- Recommendation Logic ---
 def get_recommendations(progress=gr.Progress()):
+    if not TMDB_API_KEY or (TMDB_API_KEY == "442a13f1865d8936f95aa20737e6f6f5" and not os.environ.get("TMDB_API_KEY")):
+        print("Warning: Using fallback TMDB API Key.")
+    if not TMDB_API_KEY:
+        return "<p style='color:red; text-align:right;'>خطأ: مفتاح TMDB API مو موجود.</p>"
     if not all([df_profile_global is not None, df_watched_global is not None, seed_movies_global]):
+        return "<p style='color:red; text-align:right;'>خطأ: فشل في تحميل بيانات المستخدم.</p>"
+    # Ensure LLM is initialized before trying to use it
     if llm_pipeline is None:
+        initialize_llm() # Attempt to initialize if not already done
+        if llm_pipeline is None: # Check again if initialization failed
+             return "<p style='color:red; text-align:right;'>خطأ: فشل في تهيئة نموذج الذكاء الاصطناعي. تأكد من وجود HF_TOKEN وأن لديك صلاحية الوصول للنموذج.</p>"
+    progress(0.1, desc="نجمع أفلامك المفضلة...")
     potential_recs = {}
+    seeds_to_process = seed_movies_global[:25]
     for i, seed_movie in enumerate(seeds_to_process):
         progress(0.1 + (i / len(seeds_to_process)) * 0.4, desc=f"نبحث عن توصيات بناءً على: {seed_movie['name']}")
         if seed_tmdb_details and seed_tmdb_details.get('id'):
             tmdb_recs = get_tmdb_recommendations(seed_tmdb_details['id'])
             for rec in tmdb_recs:
+                try:
                     rec_tuple = (str(rec['title']), int(rec['year']))
                     if rec.get('id') and rec_tuple not in all_watched_titles_global and rec_tuple not in watchlist_titles_global:
                         if rec['id'] not in potential_recs:
                                 'movie_info': rec, 'seed_movie_title': seed_movie['name'],
                                 'seed_movie_context': seed_movie.get('review_text', '') or seed_movie.get('comment_text', '')
                             }
+                except (ValueError, TypeError): continue # Catch TypeError if year is None
     if not potential_recs:
+        return "<p style='text-align:right;'>ما لقينا توصيات جديدة لك حالياً. 😉</p>"
     sorted_recs_list = sorted(potential_recs.values(), key=lambda x: x['movie_info'].get('popularity', 0), reverse=True)
     final_recommendations_data = []
         if rec_data['movie_info']['id'] not in displayed_ids:
             final_recommendations_data.append(rec_data)
             displayed_ids.add(rec_data['movie_info']['id'])
     if not final_recommendations_data:
+         return "<p style='text-align:right;'>ما لقينا توصيات جديدة لك حالياً بعد الفلترة. 😉</p>"
     output_html = "<div>"
     progress(0.6, desc="نجهز لك الشرح باللغة العامية...")
     for i, rec_data in enumerate(final_recommendations_data):
         progress(0.6 + (i / len(final_recommendations_data)) * 0.4, desc=f"نكتب شرح لفيلم: {rec_data['movie_info']['title']}")
         explanation = generate_saudi_explanation(
         poster_url = rec_data['movie_info']['poster_path']
         if not poster_url or "placeholder.com" in poster_url:
             poster_url = f"https://via.placeholder.com/300x450.png?text={rec_data['movie_info']['title'].replace(' ', '+')}"
         output_html += f"""
         <div style="display: flex; flex-direction: row-reverse; align-items: flex-start; margin-bottom: 25px; border-bottom: 1px solid #ddd; padding-bottom:15px; background-color: #f9f9f9; border-radius: 8px; padding: 15px;">
             <img src="{poster_url}" alt="{rec_data['movie_info']['title']}" style="width: 150px; max-width:30%; height: auto; margin-left: 20px; border-radius: 5px; box-shadow: 2px 2px 5px rgba(0,0,0,0.1);">
 footer { display: none !important; }
 .gr-button { background-color: #c70039 !important; color: white !important; font-size: 1.2em !important; padding: 10px 20px !important; border-radius: 8px !important; }
 .gr-button:hover { background-color: #a3002f !important; }
 h1, h3 { color: #900c3f !important; }
+""" # Removed .gr-input and .gr-output as they aren't used directly for styling here
 data_loaded_successfully = load_all_data()
 if data_loaded_successfully:
     print("All user data loaded and preprocessed successfully.")
+    # LLM will be initialized on first click if not already
 else:
     print("Failed to load user data. The app might not function correctly.")
     recommend_button = gr.Button("عطني توصيات أفلام!")
     with gr.Column():
         output_recommendations = gr.HTML(label="توصياتك النارية 🔥")
+    # Call initialize_llm once when the interface is defined if data loaded successfully
+    # This way, it tries to load the LLM when the app starts, not just on the first click.
+    if data_loaded_successfully:
+        initialize_llm() # Moved initialization here
+    recommend_button.click(fn=get_recommendations, inputs=[], outputs=[output_recommendations])
     gr.Markdown(
         """
         <div style="text-align: center; margin-top: 30px; font-size: 0.9em; color: #777;">
     )
 if __name__ == "__main__":
+    if not TMDB_API_KEY or (TMDB_API_KEY == "442a13f1865d8936f95aa20737e6f6f5" and not os.environ.get("TMDB_API_KEY")):
         print("\nWARNING: TMDB_API_KEY is using the hardcoded fallback or is missing.")
+    iface.launch(debug=True) # Set debug=False for production or normal HF Space operation