barunsaha commited on
Commit
622c44e
·
1 Parent(s): 7d35509

Update docstring & imports for icons embeddings

Browse files
src/slidedeckai/helpers/icons_embeddings.py CHANGED
@@ -2,10 +2,7 @@
2
  Generate and save the embeddings of a pre-defined list of icons.
3
  Compare them with keywords embeddings to find most relevant icons.
4
  """
5
- import os
6
- import pathlib
7
- import sys
8
- from typing import List, Tuple
9
 
10
  import numpy as np
11
  from sklearn.metrics.pairwise import cosine_similarity
@@ -18,32 +15,32 @@ tokenizer = BertTokenizer.from_pretrained(GlobalConfig.TINY_BERT_MODEL)
18
  model = BertModel.from_pretrained(GlobalConfig.TINY_BERT_MODEL)
19
 
20
 
21
- def get_icons_list() -> List[str]:
22
  """
23
  Get a list of available icons.
24
 
25
- :return: The icons file names.
 
26
  """
27
-
28
  items = GlobalConfig.ICONS_DIR.glob('*.png')
29
- items = [
30
- item.stem for item in items
31
- ]
32
 
33
  return items
34
 
35
 
36
- def get_embeddings(texts) -> np.ndarray:
37
  """
38
  Generate embeddings for a list of texts using a pre-trained language model.
39
 
40
- :param texts: A string or a list of strings to be converted into embeddings.
41
- :type texts: Union[str, List[str]]
42
- :return: A NumPy array containing the embeddings for the input texts.
43
- :rtype: numpy.ndarray
 
44
 
45
- :raises ValueError: If the input is not a string or a list of strings, or if any element
46
- in the list is not a string.
 
47
 
48
  Example usage:
49
  >>> keyword = 'neural network'
@@ -51,7 +48,6 @@ def get_embeddings(texts) -> np.ndarray:
51
  >>> keyword_embeddings = get_embeddings(keyword)
52
  >>> file_name_embeddings = get_embeddings(file_names)
53
  """
54
-
55
  inputs = tokenizer(texts, return_tensors='pt', padding=True, max_length=128, truncation=True)
56
  outputs = model(**inputs)
57
 
@@ -62,7 +58,6 @@ def save_icons_embeddings():
62
  """
63
  Generate and save the embeddings for the icon file names.
64
  """
65
-
66
  file_names = get_icons_list()
67
  print(f'{len(file_names)} icon files available...')
68
  file_name_embeddings = get_embeddings(file_names)
@@ -73,27 +68,29 @@ def save_icons_embeddings():
73
  np.save(GlobalConfig.ICONS_FILE_NAME, file_names) # Save file names for reference
74
 
75
 
76
- def load_saved_embeddings() -> Tuple[np.ndarray, np.ndarray]:
77
  """
78
  Load precomputed embeddings and icons file names.
79
 
80
- :return: The embeddings and the icon file names.
 
81
  """
82
-
83
  file_name_embeddings = np.load(GlobalConfig.EMBEDDINGS_FILE_NAME)
84
  file_names = np.load(GlobalConfig.ICONS_FILE_NAME)
85
 
86
  return file_name_embeddings, file_names
87
 
88
 
89
- def find_icons(keywords: List[str]) -> List[str]:
90
  """
91
  Find relevant icon file names for a list of keywords.
92
 
93
- :param keywords: The list of one or more keywords.
94
- :return: A list of the file names relevant for each keyword.
95
- """
96
 
 
 
 
97
  keyword_embeddings = get_embeddings(keywords)
98
  file_name_embeddings, file_names = load_saved_embeddings()
99
 
@@ -108,7 +105,6 @@ def main():
108
  """
109
  Example usage.
110
  """
111
-
112
  # Run this again if icons are to be added/removed
113
  save_icons_embeddings()
114
 
 
2
  Generate and save the embeddings of a pre-defined list of icons.
3
  Compare them with keywords embeddings to find most relevant icons.
4
  """
5
+ from typing import Union
 
 
 
6
 
7
  import numpy as np
8
  from sklearn.metrics.pairwise import cosine_similarity
 
15
  model = BertModel.from_pretrained(GlobalConfig.TINY_BERT_MODEL)
16
 
17
 
18
+ def get_icons_list() -> list[str]:
19
  """
20
  Get a list of available icons.
21
 
22
+ Returns:
23
+ The icons file names.
24
  """
 
25
  items = GlobalConfig.ICONS_DIR.glob('*.png')
26
+ items = [item.stem for item in items]
 
 
27
 
28
  return items
29
 
30
 
31
+ def get_embeddings(texts: Union[str, list[str]]) -> np.ndarray:
32
  """
33
  Generate embeddings for a list of texts using a pre-trained language model.
34
 
35
+ Args:
36
+ texts: A string or a list of strings to be converted into embeddings.
37
+
38
+ Returns:
39
+ A NumPy array containing the embeddings for the input texts.
40
 
41
+ Raises:
42
+ ValueError: If the input is not a string or a list of strings, or if any element
43
+ in the list is not a string.
44
 
45
  Example usage:
46
  >>> keyword = 'neural network'
 
48
  >>> keyword_embeddings = get_embeddings(keyword)
49
  >>> file_name_embeddings = get_embeddings(file_names)
50
  """
 
51
  inputs = tokenizer(texts, return_tensors='pt', padding=True, max_length=128, truncation=True)
52
  outputs = model(**inputs)
53
 
 
58
  """
59
  Generate and save the embeddings for the icon file names.
60
  """
 
61
  file_names = get_icons_list()
62
  print(f'{len(file_names)} icon files available...')
63
  file_name_embeddings = get_embeddings(file_names)
 
68
  np.save(GlobalConfig.ICONS_FILE_NAME, file_names) # Save file names for reference
69
 
70
 
71
+ def load_saved_embeddings() -> tuple[np.ndarray, np.ndarray]:
72
  """
73
  Load precomputed embeddings and icons file names.
74
 
75
+ Returns:
76
+ The embeddings and the icon file names.
77
  """
 
78
  file_name_embeddings = np.load(GlobalConfig.EMBEDDINGS_FILE_NAME)
79
  file_names = np.load(GlobalConfig.ICONS_FILE_NAME)
80
 
81
  return file_name_embeddings, file_names
82
 
83
 
84
+ def find_icons(keywords: list[str]) -> list[str]:
85
  """
86
  Find relevant icon file names for a list of keywords.
87
 
88
+ Args:
89
+ keywords: The list of one or more keywords.
 
90
 
91
+ Returns:
92
+ A list of the file names relevant for each keyword.
93
+ """
94
  keyword_embeddings = get_embeddings(keywords)
95
  file_name_embeddings, file_names = load_saved_embeddings()
96
 
 
105
  """
106
  Example usage.
107
  """
 
108
  # Run this again if icons are to be added/removed
109
  save_icons_embeddings()
110