| | import numpy as np |
| | import torch |
| | import torchvision.transforms as T |
| | from decord import VideoReader, cpu |
| | from PIL import Image |
| | from torchvision.transforms.functional import InterpolationMode |
| | from transformers import AutoModel, AutoTokenizer |
| |
|
| | IMAGENET_MEAN = (0.485, 0.456, 0.406) |
| | IMAGENET_STD = (0.229, 0.224, 0.225) |
| |
|
| | def build_transform(input_size): |
| | MEAN, STD = IMAGENET_MEAN, IMAGENET_STD |
| | transform = T.Compose([ |
| | T.Lambda(lambda img: img.convert('RGB') if img.mode != 'RGB' else img), |
| | T.Resize((input_size, input_size), interpolation=InterpolationMode.BICUBIC), |
| | T.ToTensor(), |
| | T.Normalize(mean=MEAN, std=STD) |
| | ]) |
| | return transform |
| |
|
| |
|
| | def preprocess_for_model(image): |
| | """Prepares an image for the OpenGVLab model.""" |
| | |
| | |
| | transform = T.Compose([ |
| | |
| | T.ToTensor(), |
| | T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) |
| | ]) |
| | |
| | image = transform(image).unsqueeze(0) |
| | return image |
| |
|
| |
|
| | def load_image(image_file): |
| | transform = build_transform(input_size=800) |
| | pixel_values = preprocess_for_model(image_file) |
| | return pixel_values |
| |
|
| |
|
| | def main(image_path,model,tokenizer): |
| | pixel_values = load_image(image_path).to(torch.float32).to("cpu") |
| | generation_config = dict(max_new_tokens=1024, do_sample=True) |
| | |
| | question = """<image>\n**Instruction:** |
| | Analyze the image to extract values for the specified keys. Use the detailed descriptions below to determine the correct value for each key. Handle missing or ambiguous data as instructed. |
| | |
| | --- |
| | |
| | ### Keys and Descriptions |
| | |
| | 1. **`surat_tanda_nomor_kendaraan_bermotor`** |
| | - **Extract**: The value of the field labeled as "Surat Tanda Nomor Kendaraan Bermotor" and this is titel. |
| | - **If the Field is Absent**: `"null"` |
| | - **If the Field is Present but No Value is Provided**: `"empty"` |
| | |
| | 2. **`tempat_tanggal`** |
| | - **Extract**: The location and date from the top right corner of the document. |
| | - **Note**: This field does not have a title such as "Tempat - Tanggal." |
| | - **Format**: `"CITY, DD MMM YYYY"` (e.g., `"JAKARTA, 07 DES 2018"`). |
| | - **If the Field is Absent**: `"null"` |
| | - **If the Field is Present but No Value is Provided**: `"empty"` |
| | |
| | 3. **`no`** |
| | - **Extract**: The value in the "NO" field. |
| | - **If the Field is Absent**: `"null"` |
| | - **If the Field is Present but No Value is Provided**: `"empty"` |
| | |
| | 4. **`nomor_registrasi`** |
| | - **Extract**: The "NOMOR REGISTRASI" field. |
| | - **If the Field is Absent**: `"null"` |
| | - **If the Field is Present but No Value is Provided**: `"empty"` |
| | |
| | 5. **`nama_pemilik`** |
| | - **Extract**: The "NAMA PEMILIK" field. |
| | - **If the Field is Absent**: `"null"` |
| | - **If the Field is Present but No Value is Provided**: `"empty"` |
| | |
| | 6. **`alamat`** |
| | - **Extract**: The "ALAMAT" field. |
| | - **If the Field is Absent**: `"null"` |
| | - **If the Field is Present but No Value is Provided**: `"empty"` |
| | |
| | 7. **`merk`** |
| | - **Extract**: The "MERK" field. |
| | - **If the Field is Absent**: `"null"` |
| | - **If the Field is Present but No Value is Provided**: `"empty"` |
| | |
| | 8. **`type`** |
| | - **Extract**: The "TYPE" field. |
| | - **If the Field is Absent**: `"null"` |
| | - **If the Field is Present but No Value is Provided**: `"empty"` |
| | |
| | 9. **`jenis`** |
| | - **Extract**: The "JENIS" field. |
| | - **If the Field is Absent**: `"null"` |
| | - **If the Field is Present but No Value is Provided**: `"empty"` |
| | |
| | 10. **`model`** |
| | - **Extract**: The "MODEL" field. |
| | - **If the Field is Absent**: `"null"` |
| | - **If the Field is Present but No Value is Provided**: `"empty"` |
| | |
| | 11. **`tahun_pembuatan`** |
| | - **Extract**: The "TAHUN PEMBUATAN" field. |
| | - **If the Field is Absent**: `"null"` |
| | - **If the Field is Present but No Value is Provided**: `"empty"` |
| | |
| | 12. **`isi_silinder_daya_listrik`** |
| | - **Extract**: The "ISI SILINDER / DAYA LISTRIK" field. |
| | - **If the Field is Absent**: `"null"` |
| | - **If the Field is Present but No Value is Provided**: `"empty"` |
| | |
| | 13. **`nomor_rangka`** |
| | - **Extract**: The "NOMOR RANGKA" field. |
| | - **If the Field is Absent**: `"null"` |
| | - **If the Field is Present but No Value is Provided**: `"empty"` |
| | |
| | 14. **`nomor_mesin`** |
| | - **Extract**: The "NOMOR MESIN" field. |
| | - **If the Field is Absent**: `"null"` |
| | - **If the Field is Present but No Value is Provided**: `"empty"` |
| | |
| | 15. **`nik_tdp_nie_kitas_kitap`** |
| | - **Extract**: The "NIK/TDP/NIE/KITAS/KITAP" field. |
| | - **If the Field is Absent**: `"null"` |
| | - **If the Field is Present but No Value is Provided**: `"empty"` |
| | |
| | 16. **`warna`** |
| | - **Extract**: The "WARNA" field. |
| | - **If the Field is Absent**: `"null"` |
| | - **If the Field is Present but No Value is Provided**: `"empty"` |
| | |
| | 17. **`bahan_bakar`** |
| | - **Extract**: The "BAHAN BAKAR" field. |
| | - **If the Field is Absent**: `"null"` |
| | - **If the Field is Present but No Value is Provided**: `"empty"` |
| | |
| | 18. **`warna_tnkb`** |
| | - **Extract**: The "WARNA TNKB" field. |
| | - **If the Field is Absent**: `"null"` |
| | - **If the Field is Present but No Value is Provided**: `"empty"` |
| | |
| | 19. **`tahun_registrasi`** |
| | - **Extract**: The "TAHUN REGISTRASI" field. |
| | - **If the Field is Absent**: `"null"` |
| | - **If the Field is Present but No Value is Provided**: `"empty"` |
| | |
| | 20. **`nomor_bpkb`** |
| | - **Extract**: The "NOMOR BPKB" field. |
| | - **If the Field is Absent**: `"null"` |
| | - **If the Field is Present but No Value is Provided**: `"empty"` |
| | |
| | 21. **`kode_lokasi`** |
| | - **Extract**: The "KODE LOKASI" field. |
| | - **If the Field is Absent**: `"null"` |
| | - **If the Field is Present but No Value is Provided**: `"empty"` |
| | |
| | 22. **`no_urut_pendaftaran`** |
| | - **Extract**: The "NO URUT PENDAFTARAN" field. |
| | - **If the Field is Absent**: `"null"` |
| | - **If the Field is Present but No Value is Provided**: `"empty"` |
| | |
| | 23. **`berlaku_sampai`** |
| | - **Extract**: The "BERLAKU SAMPAI" field. |
| | - **If the Field is Absent**: `"null"` |
| | - **If the Field is Present but No Value is Provided**: `"empty"` |
| | 24. **`qr_code`** |
| | - **Extract**: The value encoded in the QR code, if present. |
| | - **If No QR Code is Found**: `"null"` |
| | - **If a QR Code is Present but Contains No Data**: `"empty"` |
| | |
| | --- |
| | |
| | ### Output Format |
| | |
| | ```json |
| | { |
| | "surat_tanda_nomor_kendaraan_bermotor": "<value> OR empty OR null", |
| | "tempat_tanggal": "<value> OR empty OR null", |
| | "no": "<value> OR empty OR null", |
| | "nomor_registrasi": "<value> OR empty OR null", |
| | "nama_pemilik": "<value> OR empty OR null", |
| | "alamat": "<value> OR empty OR null", |
| | "merk": "<value> OR empty OR null", |
| | "type": "<value> OR empty OR null", |
| | "jenis": "<value> OR empty OR null", |
| | "model": "<value> OR empty OR null", |
| | "tahun_pembuatan": "<value> OR empty OR null", |
| | "isi_silinder_daya_listrik": "<value> OR empty OR null", |
| | "nomor_rangka": "<value> OR empty OR null", |
| | "nomor_mesin": "<value> OR empty OR null", |
| | "nik_tdp_nie_kitas_kitap": "<value> OR empty OR null", |
| | "warna": "<value> OR empty OR null", |
| | "bahan_bakar": "<value> OR empty OR null", |
| | "warna_tnkb": "<value> OR empty OR null", |
| | "tahun_registrasi": "<value> OR empty OR null", |
| | "nomor_bpkb": "<value> OR empty OR null", |
| | "kode_lokasi": "<value> OR empty OR null", |
| | "no_urut_pendaftaran": "<value> OR empty OR null", |
| | "berlaku_sampai": "<value> OR empty OR null" |
| | "qr_code" : "<value> OR empty OR null" |
| | } |
| | ### Reasoning Process |
| | For each key, explain your reasoning: |
| | Indicate whether the field was present. |
| | Justify the extracted value or the use of "null" or "empty" as per the conditions. |
| | Return Output: |
| | Generate a JSON object: |
| | { |
| | "reasoning": "reasoning for each key", |
| | "output JSON": "key-value pairs" |
| | } |
| | --- |
| | """ |
| | print("Before requesting model................................................................................") |
| | response = model.chat(tokenizer, pixel_values, question, generation_config) |
| | print("After requesting model................................................................................",response) |
| | return (f'User: {question}\nAssistant: {response}') |
| |
|