File size: 1,600 Bytes
901117e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import boto3
import logging

import os
from dotenv import load_dotenv

load_dotenv()

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)


def download_model_from_s3(

    local_dir="./model", s3_prefix="ml-models/tinybert-sentiment-analysis"

):
    """

    Download the fine-tuned model from S3 bucket

    """
    bucket_name = os.getenv("BUCKET_NAME")
    if not bucket_name:
        raise ValueError("BUCKET_NAME not found in .env file")

    os.makedirs(local_dir, exist_ok=True)

    s3_client = boto3.client("s3")

    model_files = [
        "config.json",
        "model.safetensors",
        "special_tokens_map.json",
        "tokenizer_config.json",
        "tokenizer.json",
        "vocab.txt",
    ]

    logger.info(f"Downloading model from S3 bucket: {bucket_name}/{s3_prefix}")

    for file_name in model_files:
        try:
            local_file_path = os.path.join(local_dir, file_name)

            if os.path.exists(local_file_path):
                logger.info(f"File {file_name} already exists, skipping...")
                continue

            s3_key = f"{s3_prefix}/{file_name}" if s3_prefix else file_name

            logger.info(f"Downloading {s3_key}...")
            s3_client.download_file(bucket_name, s3_key, local_file_path)
            logger.info(f"Successfully downloaded {file_name}")
        except Exception as e:
            logger.error(f"Error downloading {file_name}: {e}")
            raise

    logger.info("Model download completed successfully")
    return local_dir