axiilay commited on
Commit
73f54ee
·
1 Parent(s): 94fd0fd

display the output proper

Browse files
Files changed (1) hide show
  1. app.py +59 -16
app.py CHANGED
@@ -20,14 +20,22 @@ model = model.eval()
20
  @spaces.GPU
21
  def process_image(image, model_size, task_type):
22
  """
23
- Process image with DeepSeek-OCR
24
 
25
  Args:
26
  image: PIL Image or file path
27
  model_size: Model size configuration
28
  task_type: OCR task type
 
 
 
 
 
 
29
  """
30
- # GPU 函数内部移动模型到 GPU
 
 
31
  model_gpu = model.cuda().to(torch.bfloat16)
32
 
33
  # Create temporary directory for output
@@ -60,7 +68,7 @@ def process_image(image, model_size, task_type):
60
  config = size_configs.get(model_size, size_configs["Gundam (Recommended)"])
61
 
62
  # Run inference
63
- result = model_gpu.infer(
64
  tokenizer,
65
  prompt=prompt,
66
  image_file=temp_image_path,
@@ -68,17 +76,34 @@ def process_image(image, model_size, task_type):
68
  base_size=config["base_size"],
69
  image_size=config["image_size"],
70
  crop_mode=config["crop_mode"],
71
- save_results=True,
72
  test_compress=True,
73
- eval_mode=True,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
  )
75
 
76
- print(f"====\nresult: {result}\n====\n")
77
- return result
78
 
79
 
80
  # Create Gradio interface
81
- with gr.Blocks(title="DeepSeek-OCR") as demo:
82
  gr.Markdown(
83
  """
84
  # DeepSeek-OCR Document Recognition
@@ -96,7 +121,7 @@ with gr.Blocks(title="DeepSeek-OCR") as demo:
96
  )
97
 
98
  with gr.Row():
99
- with gr.Column():
100
  image_input = gr.Image(
101
  type="pil", label="Upload Image", sources=["upload", "clipboard"]
102
  )
@@ -113,12 +138,29 @@ with gr.Blocks(title="DeepSeek-OCR") as demo:
113
  label="Task Type",
114
  )
115
 
 
 
 
 
 
 
116
  submit_btn = gr.Button("Process Image", variant="primary")
117
 
118
- with gr.Column():
119
- output_text = gr.Textbox(
120
- label="OCR Result", lines=20, show_copy_button=True
121
- )
 
 
 
 
 
 
 
 
 
 
 
122
 
123
  # Examples
124
  gr.Examples(
@@ -127,18 +169,19 @@ with gr.Blocks(title="DeepSeek-OCR") as demo:
127
  ["examples/receipt.jpg", "Base", "Free OCR"],
128
  ],
129
  inputs=[image_input, model_size, task_type],
130
- outputs=output_text,
131
  fn=process_image,
132
  cache_examples=False,
133
  )
134
 
135
  submit_btn.click(
136
  fn=process_image,
137
- inputs=[image_input, model_size, task_type],
138
- outputs=output_text,
139
  )
140
 
141
  # Launch the app
142
  if __name__ == "__main__":
143
  demo.queue(max_size=20)
144
  demo.launch()
 
 
20
  @spaces.GPU
21
  def process_image(image, model_size, task_type):
22
  """
23
+ Process image with DeepSeek-OCR and return multiple output formats.
24
 
25
  Args:
26
  image: PIL Image or file path
27
  model_size: Model size configuration
28
  task_type: OCR task type
29
+
30
+ Returns:
31
+ A tuple containing:
32
+ - Path to the image with bounding boxes.
33
+ - The content of the markdown result file.
34
+ - The plain text OCR result.
35
  """
36
+ if image is None:
37
+ return None, "Please upload an image first.", "Please upload an image first."
38
+
39
  model_gpu = model.cuda().to(torch.bfloat16)
40
 
41
  # Create temporary directory for output
 
68
  config = size_configs.get(model_size, size_configs["Gundam (Recommended)"])
69
 
70
  # Run inference
71
+ plain_text_result = model_gpu.infer(
72
  tokenizer,
73
  prompt=prompt,
74
  image_file=temp_image_path,
 
76
  base_size=config["base_size"],
77
  image_size=config["image_size"],
78
  crop_mode=config["crop_mode"],
79
+ save_results=True, # Ensure results are saved to disk
80
  test_compress=True,
81
+ # eval_mode=True,
82
+ )
83
+
84
+ # Define paths for the generated files
85
+ image_result_path = os.path.join(output_path, "result_with_boxes.jpg")
86
+ markdown_result_path = os.path.join(output_path, "result.mmd")
87
+
88
+ # Read the markdown file content if it exists
89
+ markdown_content = ""
90
+ if os.path.exists(markdown_result_path):
91
+ with open(markdown_result_path, "r", encoding="utf-8") as f:
92
+ markdown_content = f.read()
93
+ else:
94
+ markdown_content = "Markdown result was not generated. This is expected for 'Free OCR' task."
95
+
96
+ # Check if the annotated image exists
97
+ final_image_path = (
98
+ image_result_path if os.path.exists(image_result_path) else None
99
  )
100
 
101
+ # Return all three results. Gradio will handle the temporary file path for the image.
102
+ return final_image_path, markdown_content, plain_text_result
103
 
104
 
105
  # Create Gradio interface
106
+ with gr.Blocks(title="DeepSeek-OCR", theme=gr.themes.Soft()) as demo:
107
  gr.Markdown(
108
  """
109
  # DeepSeek-OCR Document Recognition
 
121
  )
122
 
123
  with gr.Row():
124
+ with gr.Column(scale=1):
125
  image_input = gr.Image(
126
  type="pil", label="Upload Image", sources=["upload", "clipboard"]
127
  )
 
138
  label="Task Type",
139
  )
140
 
141
+ eval_mode_checkbox = gr.Checkbox(
142
+ value=False,
143
+ label="Enable Evaluation Mode",
144
+ info="Returns only plain text, but might be faster. Uncheck to get annotated image and markdown.",
145
+ )
146
+
147
  submit_btn = gr.Button("Process Image", variant="primary")
148
 
149
+ with gr.Column(scale=2):
150
+ with gr.Tabs():
151
+ with gr.TabItem("Annotated Image"):
152
+ output_image = gr.Image(
153
+ label="Result with Bounding Boxes", interactive=False
154
+ )
155
+ with gr.TabItem("Markdown Output"):
156
+ output_markdown = gr.Markdown(label="Markdown Formatted Result")
157
+ with gr.TabItem("Plain Text"):
158
+ output_text = gr.Textbox(
159
+ label="OCR Result (eval_mode == True)",
160
+ lines=20,
161
+ show_copy_button=True,
162
+ interactive=False,
163
+ )
164
 
165
  # Examples
166
  gr.Examples(
 
169
  ["examples/receipt.jpg", "Base", "Free OCR"],
170
  ],
171
  inputs=[image_input, model_size, task_type],
172
+ outputs=[output_image, output_markdown, output_text, eval_mode_checkbox],
173
  fn=process_image,
174
  cache_examples=False,
175
  )
176
 
177
  submit_btn.click(
178
  fn=process_image,
179
+ inputs=[image_input, model_size, task_type, eval_mode_checkbox],
180
+ outputs=[output_image, output_markdown, output_text],
181
  )
182
 
183
  # Launch the app
184
  if __name__ == "__main__":
185
  demo.queue(max_size=20)
186
  demo.launch()
187
+