SVGThinker-7B / app.py
Caffin's picture
Update app.py
f82f988 verified
raw
history blame
21.6 kB
import gradio as gr
import torch
import os
from PIL import Image
import cairosvg
import io
import tempfile
import argparse
import gc
import yaml
import glob
from huggingface_hub import hf_hub_download
import spaces
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
GLOBAL_PIPE = None
# System prompt
SYSTEM_PROMPT = "You are a multimodal SVG generation assistant capable of generating SVG code from both text descriptions and images."
def parse_args():
"""Parse command line arguments"""
parser = argparse.ArgumentParser(description='SVG Generator Service')
parser.add_argument('--listen', type=str, default='0.0.0.0',
help='Listen address (default: 0.0.0.0)')
parser.add_argument('--port', type=int, default=7860,
help='Port number (default: 7860)')
parser.add_argument('--share', action='store_true',
help='Enable gradio share link')
parser.add_argument('--debug', action='store_true',
help='Enable debug mode')
return parser.parse_args()
def load_models():
"""Load models"""
global GLOBAL_PIPE
if GLOBAL_PIPE is None:
# model_path = "/mnt/data-1/svg/model_outputs/hw_reason_0816"
model_path = "Caffin/SVGThinker-7B"
tok = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(
model_path,
torch_dtype=torch.bfloat16, # 或 torch.float16(bfloat16更稳)
attn_implementation="flash_attention_2"
)
GLOBAL_PIPE = pipeline("text-generation", model=model, tokenizer=tok)
import re
def svg2png(svg_str, max_size=448):
img_data = cairosvg.svg2png(bytestring=svg_str, output_height=448, background_color="white")
image = Image.open(io.BytesIO(img_data))
if image.mode == "RGBA":
new_img = Image.new("RGB", image.size, (255, 255, 255))
new_img.paste(image, mask=image.split()[3])
else:
new_img = image.convert("RGB")
width, height = new_img.size
if width > height:
new_width = max_size
new_height = int((max_size / width) * height)
else:
new_height = max_size
new_width = int((max_size / height) * width)
new_img = new_img.resize((new_width, new_height))
return new_img
def extract_svg(text):
pattern = re.compile(r'<svg.*?</svg>', re.DOTALL)
match = pattern.search(text)
if match:
return match.group()
else:
return None
def generate_svg(text_description):
"""Generate SVG"""
try:
# Clean memory before generation
gc.collect()
torch.cuda.empty_cache() if torch.cuda.is_available() else None
gen_config = dict(
do_sample=True,
temperature=0.6,
top_p=0.8,
top_k=50,
repetition_penalty=1.05,
early_stopping=True,
max_new_tokens=4096,
)
if torch.cuda.is_available():
torch.cuda.synchronize()
prompt = f"Review the given information below and generate a svg according to it.\n{text_description}"
svg = GLOBAL_PIPE(prompt, **gen_config)[0]['generated_text']
# print(f'Raw SVG: {svg}')
svg_str = svg
svg_str = extract_svg(svg_str)
# print(f"Extracted SVG: {svg_str}")
png_image = svg2png(svg_str)
# png_image.save(f'./debug_output.png')
return svg_str, png_image
except Exception as e:
print(f"Generation error: {e}")
import traceback
traceback.print_exc()
return f"Error: {e}", None
@spaces.GPU(duration=75)
def gradio_text_to_svg(text_description, progress=gr.Progress(track_tqdm=True)):
"""Gradio interface function - text-to-svg"""
if not text_description or text_description.strip() == "":
return "Please enter a description", None
svg_code, png_image = generate_svg(text_description)
return svg_code, png_image
def create_interface():
# Example texts
example_texts = [
"The SVG is a black and white line drawing of a chair. The chair has a rectangular backrest with a pattern of small dots arranged in a grid near the top center. It features two armrests that are rounded at the ends. The seat is a horizontal rectangle, and the legs are straight and vertical, with two visible at the front and two at the back. The design is simple and stylized, focusing on basic geometric shapes and clear lines."
,"The SVG is a black and white illustration featuring a circular shape with a scalloped edge. The circle is outlined with a series of small, evenly spaced, triangular indentations, giving it a textured, wavy appearance. The interior of the circle is plain and unadorned, contrasting with the detailed border. The background is completely white, emphasizing the simplicity and symmetry of the design. The overall orientation is centered, with no additional elements or colors present."
,"The SVG is a stylized illustration of a satellite dish. The dish is depicted in a light blue color with black outlines, emphasizing its shape and structure. It features a parabolic reflector with a central feed horn, which is also outlined in black. The dish is mounted on a vertical support structure, which connects to a rectangular base, all outlined in black to provide contrast. The orientation of the dish is slightly angled, suggesting it is directed towards a specific point for signal reception. The overall design is simple and uses minimal colors, focusing on the essential elements of the satellite dish."
,"The SVG is a stylized icon featuring a sailboat on water. The sailboat is depicted with a single triangular sail, colored in a vibrant purple. The boat itself is also purple, with a curved shape that suggests a simple hull design. Below the boat, there are three wavy horizontal lines, also in purple, representing water waves. The overall design is minimalist, using clean lines and a single color to convey the SVG of a sailboat on the water. The orientation is such that the sail is positioned at the top right, while the hull curves towards the bottom left, creating a dynamic sense of movement."
,"The SVG is a simple, minimalistic SVG graphic. It features a white background with no visible shapes, lines, or colors. The composition is entirely blank, with no distinguishable elements or details present. The absence of any graphical content suggests it might be a placeholder or an empty template."
,"The SVG is a simple geometric shape consisting of a blue arrow pointing upwards. The arrow is composed of two equilateral triangles joined at their bases, forming a symmetrical, elongated diamond shape. The color is a solid, bright blue, and the background is white. The orientation is vertical, with the point of the arrow directed upwards. There are no additional elements or details within the SVG."
,"The SVG is a simple black and white graphic featuring a bold, black ""X"" shape. The ""X"" is composed of two thick, straight lines intersecting at their midpoints, forming right angles. The background is white, providing a stark contrast that highlights the black lines. The lines are uniform in width and extend to the edges of the SVG, creating a symmetrical and balanced design. There are no additional elements or colors present."
,"The SVG is a grayscale icon featuring a gear overlapping a document. The document is depicted with a folded corner on the top right, suggesting a page or report. The gear, positioned in the lower right section of the document, has a circular center with eight evenly spaced teeth. The icon uses shades of gray to create contrast and depth, with the gear being darker than the document. The overall design is simple and clean, commonly used to represent settings or configuration options related to documents or files."
,"The SVG is a simple, stylized representation of a piece of paper with a wavy edge, set against a black background. The paper is white and has a curved, almost serpentine shape, giving it a dynamic appearance. There are three small black dots aligned vertically on the left side of the paper. Additionally, there are two horizontal black lines, one near the top and one near the bottom of the paper. The overall design is minimalistic, using only black and white colors."
,"The SVG is a logo featuring the text ""SKAMBŪCIŲ CENTRAS"" in bold, uppercase letters. The text is split into two lines, with ""SKAMBŪCIŲ"" on the top in orange and ""CENTRAS"" below it in teal. To the left of the text is a circular icon containing a stylized headset. The headset is depicted with a combination of orange and teal colors, matching the text. The background of the circle is dark blue. The overall design is clean and modern, with a professional appearance."
,"The SVG is a logo consisting of a stylized lowercase letter ""p"" in white, set against a solid black circular background. The ""p"" has a smooth, curved design with a vertical line extending downward. The logo is symmetrical and balanced, with the white ""p"" prominently centered within the black circle. The color scheme is simple, using only black and white, which creates a strong contrast. The overall shape is circular, and the design is clean and modern."
,"The SVG is a simple, stylized icon of a camera. It is depicted in a monochromatic gray color scheme. The camera icon has a rectangular shape with rounded corners. At the top section, there are two circular shapes representing the camera's viewfinder or flash. To the right of these circles is a rectangular shape, likely indicating the camera's screen or display. The lower section of the icon is a larger rectangle, representing the main body of the camera. The design is minimalistic, focusing on basic geometric shapes to convey the concept of a camera."
,"The SVG is a black and white icon featuring two main elements: a speech bubble and a car. The speech bubble is on the left side and is outlined with a thick border, containing a blank interior. It has a tail pointing towards the right, indicating the direction of communication. The car is positioned to the right of the speech bubble, depicted in a simplified, stylized form with a rectangular body and two circular headlights. The car is oriented horizontally, facing to the right. The entire icon uses a monochromatic color scheme with solid black shapes on a white background."
,"The SVG is a simple black cross on a white background. The cross consists of two thick, black rectangles intersecting at their centers. One rectangle is oriented horizontally, and the other is oriented vertically. The rectangles have rounded edges, giving the cross a smooth appearance. The black color is solid and uniform throughout the shape. There are no additional elements or details present in the SVG."
,"The SVG is a simple black and white icon featuring a curved arrow. The arrow starts from the left side, curves downward in a smooth, clockwise arc, and then points to the right. The arrowhead is solid and triangular, indicating direction. The entire icon is monochromatic, with a black arrow on a white background. The design is minimalistic, with clean lines and no additional embellishments."
,"The SVG is a simple black and white graphic featuring a stylized shield shape. The shield is oriented vertically with a symmetrical design. The top and bottom sections have inward-pointing chevron shapes, creating a tapered appearance. The central area is a solid white rectangle, contrasting with the black background. The overall design is minimalistic, using only black and white colors, with clean, sharp lines defining the shape."
,"The SVG is a simple, stylized icon of a person. It consists of two main elements: a circle representing the head and a curved shape below it representing the body. The icon is black on a white background. The head is a perfect circle, and the body is formed by two connected arcs, creating a symmetrical, minimalist human figure. The lines are smooth and continuous, with no additional details or textures. The icon is oriented vertically and is centered within the SVG."
,"The SVG is a black and white icon featuring a notebook and a padlock. The notebook is depicted on the left side, with a vertical line indicating its spine and three horizontal lines representing text on the cover. To the right of the notebook is a padlock, shown in an open position with a keyhole symbol in the center. The overall design is simple and uses bold lines to create a clear, recognizable representation of secure documentation or data."
,"The SVG is a logo of Instagram, displayed in a square format with rounded corners. The central feature is a stylized camera icon, consisting of a circle with a smaller circle inside it, representing the lens. A dot is positioned at the top right of the camera, symbolizing the flash. The color scheme is a gradient of light gray tones on a white background, giving it a minimalist and modern appearance. The design is clean and easily recognizable, aligning with Instagram's branding."
,"The SVG is a digital illustration of a computer monitor displaying a simplified webpage layout. The monitor has a sleek, modern design with a flat screen and a minimalistic stand. The screen shows three rectangular blocks at the top, two in blue and one in red, likely representing header sections or navigation elements. Below these blocks are horizontal gray lines, indicating text or content sections on the webpage. The overall color scheme is simple, with a focus on blue, red, gray, and white. The illustration is oriented horizontally and uses clean, flat design elements without gradients or shadows."
,"The SVG is a representation of the national flag of Sweden. It features a blue field with a yellow Nordic cross that extends to the edges of the flag. The cross is positioned vertically and horizontally centered, dividing the flag into four equal quadrants. The blue background contrasts sharply with the bright yellow cross, creating a simple yet striking design. The flag's proportions are rectangular, adhering to the standard dimensions used for national flags."
,"The SVG is a simple black and white icon of a snowflake. The snowflake has a symmetrical design with six main arms extending outward from the center. Each arm is divided into smaller segments, creating a branching pattern that resembles the natural structure of a snowflake. The lines are solid and evenly spaced, giving the icon a clean and minimalist appearance. The background is plain white, which contrasts sharply with the black lines of the snowflake, making the design stand out clearly."
,"The SVG is a simple, stylized icon of a shopping cart. It features a minimalist design with smooth, continuous lines. The cart is depicted in a bright blue color. The body of the cart consists of three horizontal, wavy lines representing the contents or structure of the cart. The handle extends upward and curves slightly to the right. The cart has two round wheels at the bottom. The overall design is clean and modern, suitable for use in digital interfaces or as a graphic symbol."
,"The SVG is a stylized icon featuring a human figure in a simplified, continuous line design. The figure is depicted in a blue gradient, transitioning from a lighter blue at the top to a darker blue at the bottom. The head of the figure is represented by an orange circle, contrasting with the blue body. The figure appears to be standing upright with one arm extended outward, suggesting a dynamic or active pose. The overall design is clean and modern, with smooth curves and a minimalist aesthetic."
,"The SVG is a black and white icon depicting a cloud connected to a network. The cloud is stylized with smooth, rounded edges and consists of two overlapping shapes, creating a typical cloud appearance. Below the cloud, there is a straight horizontal line representing the ground or a base level. A vertical line connects the cloud to the base, ending in a circular node. This node is connected to the horizontal line, symbolizing a network connection. The design is simple and uses clean lines to convey the concept of cloud connectivity or cloud computing."
,"The SVG is a simple, monochromatic icon featuring a bell shape centered within a circle. The bell is outlined in a thin, continuous line, creating a minimalist design. The bell has a small loop at the top, indicating where it would be hung. The circle surrounding the bell is also outlined, matching the bell's line thickness. The color scheme is grayscale, with the icon and outlines in a light gray against a white background. The design is clean and modern, suitable for use in digital interfaces or notifications."
,"The SVG is a black and white icon featuring a stylized eye. The eye is depicted with a thick, curved outline forming the shape of the eye socket. Inside the outline, there is a smaller circle representing the pupil, positioned slightly off-center towards the right. A diagonal slash cuts across the eye from the top left to the bottom right, dividing the icon into two parts. The design is simple and uses only black shapes on a white background."
,"The SVG is a simple black and white line drawing of the capital letter ""U."" The letter is depicted with two vertical lines connected at the bottom by a curved line, forming the characteristic shape of the letter ""U."" The lines are uniform in thickness, and the background is plain white. The SVG is oriented horizontally, with the letter centered within the frame. There are no additional colors, textures, or elements present."
,"The SVG is a simple, bold graphic of the letter ""F"" in black against a white background. The letter is uppercase and has a sans-serif font style. The design is minimalistic, with clean lines and no additional embellishments. The ""F"" consists of a vertical line on the left and two horizontal lines extending to the right, one near the top and one in the middle. The black color is solid and uniform throughout the letter."
]
with gr.Blocks(title="SVGThinker-7B Demo Page", theme=gr.themes.Soft()) as demo:
gr.Markdown("# SVGThinker-7B Demo Page")
gr.Markdown("Generate SVG code from images or text descriptions")
with gr.Tabs():
# Text-to-SVG tab
with gr.TabItem("Text-to-SVG"):
with gr.Row():
with gr.Column():
text_input = gr.Textbox(
label="Description",
placeholder="Enter SVG description, e.g.: a red circle with a blue square inside",
lines=3
)
# Add example texts
gr.Examples(
examples=[[text] for text in example_texts],
inputs=[text_input],
label="Example Descriptions (click to use)",
examples_per_page=10
)
text_generate_btn = gr.Button("Generate SVG", variant="primary")
with gr.Column():
text_svg_output = gr.Textbox(
label="Generated SVG Code",
lines=10,
max_lines=20,
show_copy_button=True
)
text_png_preview = gr.Image(label="SVG Preview", type="pil")
text_generate_btn.click(
fn=gradio_text_to_svg,
inputs=[text_input],
outputs=[text_svg_output, text_png_preview],
queue=True
)
# Add usage instructions
gr.Markdown("""
## Usage Instructions
- **Text-to-SVG**: Enter a text description or click an example, then click "Generate SVG"
### Performance Tips:
- **Text-to-SVG**: Keep descriptions concise and specific. Focus on 2-3 main elements with clear spatial relationships. Avoid overly complex prompts with too many objects or detailed arrangements. Just retry more times to achieve the satisfying results.
### Supported Descriptions:
- **Objects**: circle, square, triangle, rectangle, star, polygon, heart, diamond, hexagon, oval
- **Colors**: red, blue, green, yellow, purple, orange, pink, black, white, gray, brown, cyan
- **Formations**: arranged in a row, arranged in a circle, grid pattern, scattered, clustered, stacked
- **Styles**: filled, outlined, gradient, striped, dotted, solid, transparent, shaded
""")
return demo
if __name__ == "__main__":
# Set environment variable to avoid tokenizer parallelization warning
os.environ["TOKENIZERS_PARALLELISM"] = "false"
args = parse_args()
# Load models before starting
print("Loading models...")
load_models()
print("Models loaded successfully!")
# Create and launch interface
demo = create_interface()
demo.launch(
server_name=args.listen,
server_port=args.port,
share=args.share,
debug=args.debug
)