Add library_name and pipeline_tag metadata

#1
by nielsr HF Staff - opened
Files changed (1) hide show
  1. README.md +46 -20
README.md CHANGED
@@ -1,5 +1,6 @@
1
  ---
2
- license: mit
 
3
  datasets:
4
  - CodeGoat24/HPD
5
  - CodeGoat24/OIP
@@ -10,8 +11,9 @@ datasets:
10
  - CodeGoat24/Text-2-Video-Human-Preferences
11
  - CodeGoat24/OpenAI-4o_t2i_human_preference
12
  - CodeGoat24/ImageGen_Reward_Cold_Start
13
- base_model:
14
- - CodeGoat24/UnifiedReward-7b
 
15
  ---
16
 
17
  ## Model Summary
@@ -65,27 +67,51 @@ Query = 'What does this image present?'
65
  R1 = 'The image is a black and white sketch of a line that appears to be in the shape of a cross. The line is a simple and straightforward representation of the cross shape, with two straight lines intersecting at a point.'
66
  R2 = 'This is a handwritten number seven.'
67
 
68
- question = ("<image>\nGiven a question and a reference image, please analyze in detail the two provided answers (Answer 1 and Answer 2). " \
69
- "Evaluate them based on the following three core dimensions:\n" \
70
- "1. Semantic accuracy: How well the answer reflects the visual content of the image\n" \
71
- "2. Correctness: Whether the answer is logically and factually correct\n" \
72
- "3. Clarity: Whether the answer is clearly and fluently expressed\n" \
 
 
 
 
 
73
  "You may also consider additional dimensions if you find them relevant (e.g., reasoning ability, attention to detail, multimodal grounding, etc.). " \
74
  "For each dimension, provide a score from 1 to 10 for both answers, and briefly explain your reasoning. " \
75
  "Then, compute the total score for each answer by explicitly adding the scores for all dimensions and showing the full calculation. " \
76
  "Enclose your full reasoning within <think> and </think> tags. " \
77
- "Then, in the <answer> tag, output exactly one of the following: 'Answer 1 is better' or 'Answer 2 is better'. No other text is allowed in the <answer> section.\n\n" \
78
- "Example format:\n" \
79
- "<think>\n" \
80
- "1. Semantic accuracy: Answer 1 (9/10) - ...; Answer 2 (7/10) - ...\n" \
81
- "2. Correctness: Answer 1 (8/10) - ...; Answer 2 (7/10) - ...\n" \
82
- "3. Clarity: Answer 1 (9/10) - ...; Answer 2 (8/10) - ...\n" \
83
- "[Additional dimensions if any]: Answer 1 (6/10) - ...; Answer 2 (7/10) - ...\n" \
84
- "Total score:\nAnswer 1: 9+8+9+6=32\nAnswer 2: 7+7+8+7=29\n" \
85
- "</think>\n" \
86
- "<answer>Answer 1 is better</answer>\n\n" \
87
- "**Note: In the example above, scores and the final answer are placeholders meant only to demonstrate the format. Your actual evaluation should be based on the quality of two given answers.**\n\n"
88
- f"Your task is provided as follows:\nQuestion: [{Query}]\nAnswer 1: [{R1}]\nAnswer 2: [{R2}]")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
 
90
  conv = copy.deepcopy(conv_templates[conv_template])
91
  conv.append_message(conv.roles[0], question)
 
1
  ---
2
+ base_model:
3
+ - CodeGoat24/UnifiedReward-7b
4
  datasets:
5
  - CodeGoat24/HPD
6
  - CodeGoat24/OIP
 
11
  - CodeGoat24/Text-2-Video-Human-Preferences
12
  - CodeGoat24/OpenAI-4o_t2i_human_preference
13
  - CodeGoat24/ImageGen_Reward_Cold_Start
14
+ license: mit
15
+ library_name: transformers
16
+ pipeline_tag: image-text-to-text
17
  ---
18
 
19
  ## Model Summary
 
67
  R1 = 'The image is a black and white sketch of a line that appears to be in the shape of a cross. The line is a simple and straightforward representation of the cross shape, with two straight lines intersecting at a point.'
68
  R2 = 'This is a handwritten number seven.'
69
 
70
+ question = ("<image>
71
+ Given a question and a reference image, please analyze in detail the two provided answers (Answer 1 and Answer 2). " \
72
+ "Evaluate them based on the following three core dimensions:
73
+ " \
74
+ "1. Semantic accuracy: How well the answer reflects the visual content of the image
75
+ " \
76
+ "2. Correctness: Whether the answer is logically and factually correct
77
+ " \
78
+ "3. Clarity: Whether the answer is clearly and fluently expressed
79
+ " \
80
  "You may also consider additional dimensions if you find them relevant (e.g., reasoning ability, attention to detail, multimodal grounding, etc.). " \
81
  "For each dimension, provide a score from 1 to 10 for both answers, and briefly explain your reasoning. " \
82
  "Then, compute the total score for each answer by explicitly adding the scores for all dimensions and showing the full calculation. " \
83
  "Enclose your full reasoning within <think> and </think> tags. " \
84
+ "Then, in the <answer> tag, output exactly one of the following: 'Answer 1 is better' or 'Answer 2 is better'. No other text is allowed in the <answer> section.
85
+
86
+ " \
87
+ "Example format:
88
+ " \
89
+ "<think>
90
+ " \
91
+ "1. Semantic accuracy: Answer 1 (9/10) - ...; Answer 2 (7/10) - ...
92
+ " \
93
+ "2. Correctness: Answer 1 (8/10) - ...; Answer 2 (7/10) - ...
94
+ " \
95
+ "3. Clarity: Answer 1 (9/10) - ...; Answer 2 (8/10) - ...
96
+ " \
97
+ "[Additional dimensions if any]: Answer 1 (6/10) - ...; Answer 2 (7/10) - ...
98
+ " \
99
+ "Total score:
100
+ Answer 1: 9+8+9+6=32
101
+ Answer 2: 7+7+8+7=29
102
+ " \
103
+ "</think>
104
+ " \
105
+ "<answer>Answer 1 is better</answer>
106
+
107
+ " \
108
+ "**Note: In the example above, scores and the final answer are placeholders meant only to demonstrate the format. Your actual evaluation should be based on the quality of two given answers.**
109
+
110
+ "
111
+ f"Your task is provided as follows:
112
+ Question: [{Query}]
113
+ Answer 1: [{R1}]
114
+ Answer 2: [{R2}]")
115
 
116
  conv = copy.deepcopy(conv_templates[conv_template])
117
  conv.append_message(conv.roles[0], question)