File size: 1,572 Bytes
c509185
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
from __future__ import annotations

import io
import json
from typing import Any, Dict, List

import pandas as pd

from .base import BaseConnector, ConnectorConfig
from ..storage import Document


def _text_from_csv_bytes(b: bytes, max_rows: int = 500) -> str:
    df = pd.read_csv(io.BytesIO(b))
    if len(df) > max_rows:
        df = df.head(max_rows)
    return df.to_csv(index=False)


def _text_from_json_bytes(b: bytes) -> str:
    try:
        data = json.loads(b.decode("utf-8"))
        return json.dumps(data, indent=2)
    except Exception:
        return b.decode("utf-8", errors="ignore")


class UploadConnector(BaseConnector):
    """Handles CSV/JSON/TXT uploads provided as bytes in params.

    params expected:
      - filename: str
      - mime: str
      - content: bytes
    """

    def fetch(self) -> List[Document]:
        p: Dict[str, Any] = self.config.params
        filename = p.get("filename", "upload")
        mime = p.get("mime", "text/plain")
        content: bytes = p.get("content", b"")
        text = ""
        if mime in ("text/csv", "application/csv") or filename.endswith(".csv"):
            text = _text_from_csv_bytes(content)
        elif mime in ("application/json",) or filename.endswith(".json"):
            text = _text_from_json_bytes(content)
        else:
            text = content.decode("utf-8", errors="ignore")
        return [
            Document(
                text=text,
                source=f"upload://{filename}",
                metadata={"filename": filename, "mime": mime},
            )
        ]