| import json | |
| import re | |
| import hashlib | |
| from typing import List, Dict | |
| class KnowledgeBase: | |
| def __init__(self): | |
| self.programs = {} | |
| self.courses = [] | |
| self._load_data() | |
| self.itmo_keywords = [ | |
| 'итмо', 'магистратура', 'учебный план', 'дисциплина', 'курс', | |
| 'ии', 'ai', 'ai product', 'институт ии', 'программа', | |
| 'машинное обучение', 'глубокое обучение', 'nlp', 'компьютерное зрение', | |
| 'нейронные сети', 'анализ данных', 'продуктовая аналитика' | |
| ] | |
| def _load_data(self): | |
| try: | |
| with open('data/processed/programs.json', 'r', encoding='utf-8') as f: | |
| self.programs = json.load(f) | |
| except FileNotFoundError: | |
| print('Файл programs.json не найден') | |
| try: | |
| with open('data/processed/courses.json', 'r', encoding='utf-8') as f: | |
| self.courses = json.load(f) | |
| except FileNotFoundError: | |
| print('Файл courses.json не найден') | |
| def is_itmo_query(self, message: str) -> bool: | |
| message_lower = message.lower() | |
| keyword_match = any(keyword in message_lower for keyword in self.itmo_keywords) | |
| if keyword_match: | |
| return True | |
| return False | |
| def recommend(self, profile: dict) -> List[Dict]: | |
| semester = profile.get('semester') | |
| if not semester: | |
| return [] | |
| semester = int(semester) | |
| interests = profile.get('interests', []) | |
| programming_exp = profile.get('programming_experience', 2) | |
| math_level = profile.get('math_level', 2) | |
| filtered_courses = [ | |
| course for course in self.courses | |
| if course.get('semester') == semester | |
| ] | |
| if not filtered_courses: | |
| return [] | |
| scored_courses = [] | |
| for course in filtered_courses: | |
| score = self._calculate_recommendation_score(course, profile) | |
| scored_courses.append((course, score)) | |
| scored_courses.sort(key=lambda x: x[1], reverse=True) | |
| recommendations = [] | |
| for course, score in scored_courses[:7]: | |
| why = self._generate_recommendation_reason(course, profile) | |
| recommendations.append({ | |
| 'semester': course['semester'], | |
| 'name': course['name'], | |
| 'credits': course['credits'], | |
| 'why': why | |
| }) | |
| return recommendations | |
| def _calculate_recommendation_score(self, course: dict, profile: dict) -> float: | |
| interests = profile.get('interests', []) | |
| programming_exp = profile.get('programming_experience', 2) | |
| math_level = profile.get('math_level', 2) | |
| course_text = f"{course.get('name', '')} {course.get('short_desc', '')}".lower() | |
| course_tags = course.get('tags', []) | |
| similarity_score = 0.0 | |
| if interests: | |
| interest_matches = sum(1 for interest in interests if interest in course_tags) | |
| similarity_score = interest_matches / len(interests) | |
| rule_score = 0.0 | |
| if programming_exp >= 3: | |
| if any(tag in course_tags for tag in ['ml', 'dl', 'systems']): | |
| rule_score += 0.3 | |
| if 'product' in interests or 'business' in interests: | |
| if any(tag in course_tags for tag in ['product', 'business', 'pm']): | |
| rule_score += 0.3 | |
| if math_level >= 3: | |
| if any(tag in course_tags for tag in ['math', 'stats', 'dl']): | |
| rule_score += 0.3 | |
| generic_score = 0.1 | |
| final_score = 0.6 * similarity_score + 0.3 * rule_score + 0.1 * generic_score | |
| return final_score | |
| def _generate_recommendation_reason(self, course: dict, profile: dict) -> str: | |
| interests = profile.get('interests', []) | |
| course_tags = course.get('tags', []) | |
| matching_tags = [tag for tag in interests if tag in course_tags] | |
| if matching_tags: | |
| tag_names = { | |
| 'ml': 'машинное обучение', | |
| 'dl': 'глубокое обучение', | |
| 'nlp': 'обработка естественного языка', | |
| 'cv': 'компьютерное зрение', | |
| 'product': 'продуктовая разработка', | |
| 'business': 'бизнес-аналитика', | |
| 'research': 'исследования', | |
| 'data': 'анализ данных', | |
| 'systems': 'системная архитектура' | |
| } | |
| tag_descriptions = [tag_names.get(tag, tag) for tag in matching_tags] | |
| return f'Соответствует вашим интересам: {", ".join(tag_descriptions)}' | |
| return 'Курс из учебного плана программы' | |
| def get_course_by_id(self, course_id: str) -> dict: | |
| for course in self.courses: | |
| if course.get('id') == course_id: | |
| return course | |
| return {} | |
| def get_program_by_id(self, program_id: str) -> dict: | |
| return self.programs.get(program_id, {}) | |
| def search_courses(self, query: str, limit: int = 10) -> List[Dict]: | |
| query_lower = query.lower() | |
| results = [] | |
| for course in self.courses: | |
| course_text = f"{course.get('name', '')} {course.get('short_desc', '')}".lower() | |
| if query_lower in course_text: | |
| results.append(course) | |
| if len(results) >= limit: | |
| break | |
| return results | |