adding main files

Browse files

Files changed (5) hide show

.gitignore +3 -0
Dockerfile +7 -0
eval.py +147 -0
mteb_meta.py +118 -0
requirements.txt +5 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,3 @@


1	+
2	+ .DS_Store
3	+ *.json

Dockerfile ADDED Viewed

	@@ -0,0 +1,7 @@

+FROM huggingface/transformers-pytorch-cpu:latest
+# install requirements
+COPY requirements.txt .
+RUN pip install -r requirements.txt

eval.py ADDED Viewed

	@@ -0,0 +1,147 @@

+from mteb import MTEB
+import torch
+import clip
+import numpy as np
+DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+MODEL, PREPROCESS = clip.load("RN50", device=DEVICE)
+TASK_LIST_CLASSIFICATION = [
+    "AmazonCounterfactualClassification",
+    "AmazonPolarityClassification",
+    "AmazonReviewsClassification",
+    "Banking77Classification",
+    "EmotionClassification",
+    "ImdbClassification",
+    "MassiveIntentClassification",
+    "MassiveScenarioClassification",
+    "MTOPDomainClassification",
+    "MTOPIntentClassification",
+    "ToxicConversationsClassification",
+    "TweetSentimentExtractionClassification",
+]
+TASK_LIST_CLUSTERING = [
+    "ArxivClusteringP2P",
+    "ArxivClusteringS2S",
+    "BiorxivClusteringP2P",
+    "BiorxivClusteringS2S",
+    "MedrxivClusteringP2P",
+    "MedrxivClusteringS2S",
+    "RedditClustering",
+    "RedditClusteringP2P",
+    "StackExchangeClustering",
+    "StackExchangeClusteringP2P",
+    "TwentyNewsgroupsClustering",
+]
+TASK_LIST_PAIR_CLASSIFICATION = [
+    "SprintDuplicateQuestions",
+    "TwitterSemEval2015",
+    "TwitterURLCorpus",
+]
+TASK_LIST_RERANKING = [
+    "AskUbuntuDupQuestions",
+    "MindSmallReranking",
+    "SciDocsRR",
+    "StackOverflowDupQuestions",
+]
+TASK_LIST_RETRIEVAL = [
+    "ArguAna",
+    "ClimateFEVER",
+    "CQADupstackAndroidRetrieval",
+    "CQADupstackEnglishRetrieval",
+    "CQADupstackGamingRetrieval",
+    "CQADupstackGisRetrieval",
+    "CQADupstackMathematicaRetrieval",
+    "CQADupstackPhysicsRetrieval",
+    "CQADupstackProgrammersRetrieval",
+    "CQADupstackStatsRetrieval",
+    "CQADupstackTexRetrieval",
+    "CQADupstackUnixRetrieval",
+    "CQADupstackWebmastersRetrieval",
+    "CQADupstackWordpressRetrieval",
+    "DBPedia",
+    "FEVER",
+    "FiQA2018",
+    "HotpotQA",
+    "MSMARCO",
+    "NFCorpus",
+    "NQ",
+    "QuoraRetrieval",
+    "SCIDOCS",
+    "SciFact",
+    "Touche2020",
+    "TRECCOVID",
+]
+TASK_LIST_STS = [
+    "BIOSSES",
+    "SICK-R",
+    "STS12",
+    "STS13",
+    "STS14",
+    "STS15",
+    "STS16",
+    "STS17",
+    "STS22",
+    "STSBenchmark",
+    "SummEval",
+]
+TASK_LIST = TASK_LIST_CLASSIFICATION
+    + TASK_LIST_CLUSTERING
+    + TASK_LIST_PAIR_CLASSIFICATION
+    + TASK_LIST_RERANKING
+    + TASK_LIST_RETRIEVAL
+    + TASK_LIST_STS
+class ClipModel:
+    """
+    This is an wrapper class for the clip embedding model.
+    """
+    def encode(self, sentences, batch_size=1, **kwargs):
+        """Returns a list of embeddings for the given sentences.
+        Args:
+            sentences (`List[str]`): List of sentences to encode
+            batch_size (`int`): Batch size for the encoding
+        Returns:
+            `List[np.ndarray]` or `List[tensor]`: List of embeddings for the given sentences
+        """
+        embeddings = []
+        for i in range(0, len(sentences)):
+            batch = sentences[i]
+            try:
+                text = clip.tokenize(batch).to(DEVICE)[
+                    :, :77
+                ]  # clip.tokenize(batch).to(DEVICE)
+                with torch.no_grad():
+                    text_features = MODEL.encode_text(text)
+            except:
+                print("too long token")
+                text = clip.tokenize(batch[: (77 * 2)]).to(DEVICE)[
+                    :, :77
+                ]  # clip.tokenize(batch).to(DEVICE)
+                with torch.no_grad():
+                    text_features = MODEL.encode_text(text)
+            embeddings.append(text_features.cpu().numpy().squeeze())
+        return embeddings
+model = ClipModel()
+evaluation = MTEB(tasks=TASK_LIST, output_folder=f"results/clip/", task_langs=["en"])
+evaluation.run(model)

mteb_meta.py ADDED Viewed

	@@ -0,0 +1,118 @@

+"""
+Usage: python mteb_meta.py path_to_results_folder
+Creates evaluation results metadata for the model card.
+E.g.
+---
+tags:
+- mteb
+model-index:
+- name: SGPT-5.8B-weightedmean-msmarco-specb-bitfit
+  results:
+  - task:
+      type: classification
+    dataset:
+      type: mteb/banking77
+      name: MTEB Banking77
+      config: default
+      split: test
+      revision: 44fa15921b4c889113cc5df03dd4901b49161ab7
+    metrics:
+    - type: accuracy
+      value: 84.49350649350649
+---
+"""
+import json
+import logging
+import os
+import sys
+from mteb import MTEB
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+results_folder = sys.argv[1].strip("/")
+model_name = results_folder.split("/")[-1]
+all_results = {}
+for file_name in os.listdir(results_folder):
+    if not file_name.endswith(".json"):
+        logger.info(f"Skipping non-json {file_name}")
+        continue
+    with open(os.path.join(results_folder, file_name), "r", encoding="utf-8") as f:
+        results = json.load(f)
+        all_results = {**all_results, **{file_name.replace(".json", ""): results}}
+MARKER = "---"
+TAGS = "tags:"
+MTEB_TAG = "- mteb"
+HEADER = "model-index:"
+MODEL = f"- name: {model_name}"
+RES = "  results:"
+META_STRING = "\n".join([MARKER, TAGS, MTEB_TAG, HEADER, MODEL, RES])
+ONE_TASK = "  - task:\n      type: {}\n    dataset:\n      type: {}\n      name: {}\n      config: {}\n      split: {}\n      revision: {}\n    metrics:"
+ONE_METRIC = "    - type: {}\n      value: {}"
+SKIP_KEYS = ["std", "evaluation_time", "main_score", "threshold"]
+for ds_name, res_dict in sorted(all_results.items()):
+    mteb_desc = (
+        MTEB(tasks=[ds_name.replace("CQADupstackRetrieval", "CQADupstackAndroidRetrieval")])
+        .tasks[0]
+        .description
+    )
+    hf_hub_name = mteb_desc.get("hf_hub_name", mteb_desc.get("beir_name"))
+    if "CQADupstack" in ds_name:
+        hf_hub_name = "BeIR/cqadupstack"
+    mteb_type = mteb_desc["type"]
+    revision = res_dict.get("dataset_revision") # Okay if it's None
+    split = "test"
+    if ds_name == "MSMARCO":
+        split = "dev" if "dev" in res_dict else "validation"
+    if split not in res_dict:
+        logger.info(f"Skipping {ds_name} as split {split} not present.")
+        continue
+    res_dict = res_dict.get(split)
+    for lang in mteb_desc["eval_langs"]:
+        mteb_name = f"MTEB {ds_name}"
+        mteb_name += f" ({lang})" if len(mteb_desc["eval_langs"]) > 1 else ""
+        # For English there is no language key if it's the only language
+        test_result_lang = res_dict.get(lang) if len(mteb_desc["eval_langs"]) > 1 else res_dict
+        # Skip if the language was not found but it has other languages
+        if test_result_lang is None:
+            continue
+        META_STRING += "\n" + ONE_TASK.format(
+            mteb_type,
+            hf_hub_name,
+            mteb_name,
+            lang if len(mteb_desc["eval_langs"]) > 1 else "default",
+            split,
+            revision
+        )
+        for (metric, score) in test_result_lang.items():
+            if not isinstance(score, dict):
+                score = {metric: score}
+            for sub_metric, sub_score in score.items():
+                if any([x in sub_metric for x in SKIP_KEYS]):
+                    continue
+                META_STRING += "\n" + ONE_METRIC.format(
+                    f"{metric}_{sub_metric}" if metric != sub_metric else metric,
+                    # All MTEB scores are 0-1, multiply them by 100 for 3 reasons:
+                    # 1) It's easier to visually digest (You need two chars less: "0.1" -> "1")
+                    # 2) Others may multiply them by 100, when building on MTEB making it confusing what the range is
+                    # This happend with Text and Code Embeddings paper (OpenAI) vs original BEIR paper
+                    # 3) It's accepted practice (SuperGLUE, GLUE are 0-100)
+                    sub_score * 100,
+                )
+META_STRING += "\n" + MARKER
+if os.path.exists("./mteb_metadata.md"):
+    logger.warning("Overwriting mteb_metadata.md")
+with open(f"./mteb_metadata.md", "w") as f:
+    f.write(META_STRING)

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+mteb
+ftfy
+regex
+tqdm
+git+https://github.com/openai/CLIP.git