luulinh90s commited on
Commit
465850b
Β·
1 Parent(s): 67a3186
app.py CHANGED
@@ -7,6 +7,7 @@ import string
7
  import logging
8
  from datetime import datetime
9
  from huggingface_hub import login, HfApi, hf_hub_download
 
10
 
11
  # Set up logging
12
  logging.basicConfig(level=logging.INFO,
@@ -25,48 +26,47 @@ else:
25
  logger.error("HF_TOKEN not found in environment variables")
26
 
27
  app = Flask(__name__)
28
- app.config['SECRET_KEY'] = 'supersecretkey' # Change this to a random secret key
29
 
30
  # File-based session storage
31
  SESSION_DIR = '/tmp/sessions'
32
  os.makedirs(SESSION_DIR, exist_ok=True)
33
 
34
- # Update the VISUALIZATION_DIRS dictionary
35
  VISUALIZATION_DIRS = {
36
- "No-XAI": "htmls_NO_XAI_mod",
37
  "Dater": "htmls_DATER_mod2",
38
  "Chain-of-Table": "htmls_COT_mod",
39
- "Plan-of-SQLs": "htmls_POS_mod2",
40
- "Text2SQL": "htmls_Text2SQL"
41
  }
42
 
43
- # Update the get_method_dir function
 
44
  def get_method_dir(method):
45
- if method == 'No-XAI':
46
- return 'NO_XAI'
47
- elif method == 'Dater':
48
- return 'DATER'
49
- elif method == 'Chain-of-Table':
50
- return 'COT'
51
- elif method == 'Plan-of-SQLs':
52
- return 'POS'
53
- elif method == 'Text2SQL':
54
- return 'Text2SQL'
55
- else:
56
- return None
57
-
58
- # Update the METHODS list
59
- METHODS = ["No-XAI", "Dater", "Chain-of-Table", "Plan-of-SQLs", "Text2SQL"]
60
 
61
  def generate_session_id():
62
  return str(uuid.uuid4())
63
 
 
64
  def save_session_data(session_id, data):
65
  file_path = os.path.join(SESSION_DIR, f'{session_id}.json')
66
  with open(file_path, 'w') as f:
67
  json.dump(data, f)
68
  logger.info(f"Session data saved for session {session_id}")
69
 
 
70
  def load_session_data(session_id):
71
  file_path = os.path.join(SESSION_DIR, f'{session_id}.json')
72
  if os.path.exists(file_path):
@@ -74,6 +74,7 @@ def load_session_data(session_id):
74
  return json.load(f)
75
  return None
76
 
 
77
  def save_session_data_to_hf(session_id, data):
78
  try:
79
  username = data.get('username', 'unknown')
@@ -88,7 +89,7 @@ def save_session_data_to_hf(session_id, data):
88
  f.write(json_data)
89
 
90
  api = HfApi()
91
- repo_path = "session_data_foward_simulation"
92
 
93
  api.upload_file(
94
  path_or_fileobj=temp_file_path,
@@ -100,229 +101,156 @@ def save_session_data_to_hf(session_id, data):
100
  logger.info(f"Session data saved for session {session_id} in Hugging Face Data Space")
101
  except Exception as e:
102
  logger.exception(f"Error saving session data for session {session_id}: {e}")
103
- #
104
- # def load_samples():
105
- # common_samples = []
106
- # categories = ["TP", "TN", "FP", "FN"]
107
- #
108
- # for category in categories:
109
- # files = set(os.listdir(f'htmls_NO_XAI_mod/{category}'))
110
- # for method in ["Dater", "Chain-of-Table", "Plan-of-SQLs", "Text2SQL"]:
111
- # method_dir = VISUALIZATION_DIRS[method]
112
- # files &= set(os.listdir(f'{method_dir}/{category}'))
113
- #
114
- # for file in files:
115
- # common_samples.append({'category': category, 'file': file})
116
- #
117
- # logger.info(f"Found {len(common_samples)} common samples across all methods")
118
- # return common_samples
119
-
120
- def load_samples(method, metadata):
121
  common_samples = []
122
- categories = ["TP", "TN", "FP", "FN"]
123
-
124
- for category in categories:
125
- # files = set(os.listdir(f'htmls_NO_XAI_mod/{category}'))
126
- method_dir = VISUALIZATION_DIRS[method]
127
- files = set(os.listdir(f'{method_dir}/{category}'))
128
-
129
- for file in files:
130
- index = file.split('-')[1].split('.')[0]
131
- metadata_key = f"{get_method_dir(method)}_test-{index}.html"
132
- sample_metadata = metadata.get(metadata_key, {})
133
-
134
- common_samples.append({
135
- 'category': category,
136
- 'file': file,
137
- 'metadata': sample_metadata
138
- })
139
 
140
- logger.info(f"Found {len(common_samples)} samples for method {method}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
  return common_samples
142
 
 
143
  def select_balanced_samples(samples):
144
  try:
145
- # Separate samples into two groups
146
- tp_fp_samples = [s for s in samples if s['category'] in ['TP', 'FP']]
147
- tn_fn_samples = [s for s in samples if s['category'] in ['TN', 'FN']]
148
 
149
- # Check if we have enough samples in each group
150
- if len(tp_fp_samples) < 5 or len(tn_fn_samples) < 5:
151
- logger.warning(f"Not enough samples in each category. TP+FP: {len(tp_fp_samples)}, TN+FN: {len(tn_fn_samples)}")
152
- return samples if len(samples) <= 10 else random.sample(samples, 10)
153
-
154
- # Select 5 samples from each group
155
- selected_tp_fp = random.sample(tp_fp_samples, 5)
156
- selected_tn_fn = random.sample(tn_fn_samples, 5)
157
-
158
- # Combine and shuffle the selected samples
159
- selected_samples = selected_tp_fp + selected_tn_fn
160
- random.shuffle(selected_samples)
 
 
161
 
162
- logger.info(f"Selected 10 balanced samples: 5 from TP+FP, 5 from TN+FN")
163
  return selected_samples
164
  except Exception as e:
165
  logger.exception("Error selecting balanced samples")
166
  return []
167
 
168
- # @app.route('/')
169
- # def introduction():
170
- # return render_template('introduction.html')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
171
 
172
  @app.route('/attribution')
173
  def attribution():
174
  return render_template('attribution.html')
175
- #
176
- # @app.route('/index', methods=['GET', 'POST'])
177
- # def index():
178
- # if request.method == 'POST':
179
- # username = request.form.get('username')
180
- # seed = request.form.get('seed')
181
- # method = request.form.get('method')
182
- # if not username or not seed or not method:
183
- # return render_template('index.html', error="Please fill in all fields and select a method.")
184
- # if method not in ['Chain-of-Table', 'Plan-of-SQLs', 'Dater', 'Text2SQL']:
185
- # return render_template('index.html', error="Invalid method selected.")
186
- # try:
187
- # seed = int(seed)
188
- # random.seed(seed)
189
- # all_samples = load_samples()
190
- # selected_samples = select_balanced_samples(all_samples)
191
- # if len(selected_samples) == 0:
192
- # return render_template('index.html', error="No common samples were found")
193
- # start_time = datetime.now().isoformat()
194
- # session_id = generate_session_id()
195
- # session_data = {
196
- # 'username': username,
197
- # 'seed': str(seed),
198
- # 'method': method,
199
- # 'selected_samples': selected_samples,
200
- # 'current_index': 0,
201
- # 'responses': [],
202
- # 'start_time': start_time,
203
- # 'session_id': session_id
204
- # }
205
- # save_session_data(session_id, session_data)
206
- # logger.info(f"Session data stored for user {username}, method {method}, session_id {session_id}")
207
- #
208
- # # Redirect to explanation for all methods
209
- # return redirect(url_for('explanation', session_id=session_id))
210
- # except Exception as e:
211
- # logger.exception(f"Error in index route: {e}")
212
- # return render_template('index.html', error="An error occurred. Please try again.")
213
- # return render_template('index.html', show_no_xai=False)
214
 
215
  @app.route('/index', methods=['GET', 'POST'])
216
  def index():
217
  if request.method == 'POST':
218
  username = request.form.get('username')
219
  seed = request.form.get('seed')
220
- method = request.form.get('method')
221
- if not username or not seed or not method:
222
- return render_template('index.html', error="Please fill in all fields and select a method.")
223
- if method not in ['Chain-of-Table', 'Plan-of-SQLs', 'Dater', 'Text2SQL', 'No-XAI']:
224
- return render_template('index.html', error="Invalid method selected.")
225
  try:
226
  seed = int(seed)
227
  random.seed(seed)
228
 
229
- # Load the appropriate metadata file
230
- if method == "Chain-of-Table":
231
- json_file = 'Tabular_LLMs_human_study_vis_6_COT.json'
232
- elif method == "Plan-of-SQLs":
233
- json_file = 'Tabular_LLMs_human_study_vis_6_POS.json'
234
- elif method == "Dater":
235
- json_file = 'Tabular_LLMs_human_study_vis_6_DATER.json'
236
- elif method == "No-XAI":
237
- json_file = 'Tabular_LLMs_human_study_vis_6_NO_XAI.json'
238
- elif method == "Text2SQL":
239
- json_file = 'Tabular_LLMs_human_study_vis_6_Text2SQL.json'
240
-
241
- with open(json_file, 'r') as f:
242
- metadata = json.load(f)
243
-
244
- all_samples = load_samples(method, metadata)
245
  selected_samples = select_balanced_samples(all_samples)
 
246
  if len(selected_samples) == 0:
247
  return render_template('index.html', error="No common samples were found")
248
- start_time = datetime.now().isoformat()
 
249
  session_id = generate_session_id()
250
  session_data = {
251
  'username': username,
252
  'seed': str(seed),
253
- 'method': method,
254
  'selected_samples': selected_samples,
255
  'current_index': 0,
256
  'responses': [],
257
- 'start_time': start_time,
258
  'session_id': session_id
259
  }
260
  save_session_data(session_id, session_data)
261
- logger.info(f"Session data stored for user {username}, method {method}, session_id {session_id}")
262
 
263
- # Redirect to explanation for all methods
264
- return redirect(url_for('explanation', session_id=session_id))
265
  except Exception as e:
266
  logger.exception(f"Error in index route: {e}")
267
  return render_template('index.html', error="An error occurred. Please try again.")
268
- return render_template('index.html', show_no_xai=False)
269
-
270
- @app.route('/explanation/<session_id>')
271
- def explanation(session_id):
272
- session_data = load_session_data(session_id)
273
- if not session_data:
274
- logger.error(f"No session data found for session ID: {session_id}")
275
- return redirect(url_for('index'))
276
-
277
- method = session_data.get('method')
278
- if not method:
279
- logger.error(f"No method found in session data for session ID: {session_id}")
280
- return redirect(url_for('index'))
281
-
282
- if method == 'Chain-of-Table':
283
- return render_template('cot_intro.html', session_id=session_id)
284
- elif method == 'Plan-of-SQLs':
285
- return render_template('pos_intro.html', session_id=session_id)
286
- elif method == 'Dater':
287
- return render_template('dater_intro.html', session_id=session_id)
288
- elif method == 'Text2SQL':
289
- return render_template('text2sql_intro.html', session_id=session_id)
290
- else:
291
- logger.error(f"Invalid method '{method}' for session ID: {session_id}")
292
- return redirect(url_for('index'))
293
- #
294
- # @app.route('/experiment/<session_id>', methods=['GET', 'POST'])
295
- # def experiment(session_id):
296
- # try:
297
- # session_data = load_session_data(session_id)
298
- # if not session_data:
299
- # return redirect(url_for('index'))
300
- #
301
- # selected_samples = session_data['selected_samples']
302
- #
303
- # method = session_data['method']
304
- # current_index = session_data['current_index']
305
- #
306
- # if current_index >= len(selected_samples):
307
- # return redirect(url_for('completed', session_id=session_id))
308
- #
309
- # sample = selected_samples[current_index]
310
- # visualization_dir = VISUALIZATION_DIRS[method]
311
- # visualization_path = f"{visualization_dir}/{sample['category']}/{sample['file']}"
312
- #
313
- # statement = """
314
- # Please note that in select row function, starting index is 0 for Chain-of-Table and 1 for Dater and Index * represents the selection for all rows.
315
- # """
316
- #
317
- # return render_template('experiment.html',
318
- # sample_id=current_index,
319
- # statement=statement,
320
- # visualization=url_for('send_visualization', filename=visualization_path),
321
- # session_id=session_id,
322
- # method=method)
323
- # except Exception as e:
324
- # logger.exception(f"An error occurred in the experiment route: {e}")
325
- # return "An error occurred", 500
326
 
327
  @app.route('/experiment/<session_id>', methods=['GET', 'POST'])
328
  def experiment(session_id):
@@ -332,200 +260,106 @@ def experiment(session_id):
332
  return redirect(url_for('index'))
333
 
334
  selected_samples = session_data['selected_samples']
335
- method = session_data['method']
336
  current_index = session_data['current_index']
337
 
338
  if current_index >= len(selected_samples):
339
  return redirect(url_for('completed', session_id=session_id))
340
 
341
- sample = selected_samples[current_index]
342
- visualization_dir = VISUALIZATION_DIRS[method]
343
- visualization_path = f"{visualization_dir}/{sample['category']}/{sample['file']}"
 
 
 
 
 
344
 
345
- # Extract metadata
346
- metadata = sample.get('metadata', {})
 
 
 
 
 
347
 
348
- # Log the metadata
349
- logger.info(f"Sample metadata for session {session_id}, method {method}, index {current_index}: {metadata}")
 
 
 
 
 
350
 
351
- statement = metadata['statement']
352
- if method == 'Text2SQL':
353
- statement = f""
354
 
355
  return render_template('experiment.html',
356
  sample_id=current_index,
357
  statement=statement,
358
- visualization=url_for('send_visualization', filename=visualization_path),
359
- session_id=session_id,
360
- method=method,
361
- metadata=metadata) # Pass metadata to the template
362
  except Exception as e:
363
  logger.exception(f"An error occurred in the experiment route: {e}")
364
  return "An error occurred", 500
365
 
366
- @app.route('/')
367
- def root():
368
- return redirect(url_for('consent'))
369
 
370
- @app.route('/consent', methods=['GET', 'POST'])
371
- def consent():
372
- if request.method == 'POST':
373
- # User has agreed to the consent
374
- return redirect(url_for('introduction'))
375
- return render_template('consent.html')
376
-
377
- @app.route('/introduction')
378
- def introduction():
379
- return render_template('introduction.html')
380
-
381
- @app.route('/subjective/<session_id>', methods=['GET', 'POST'])
382
- def subjective(session_id):
383
- if request.method == 'POST':
384
- understanding = request.form.get('understanding')
385
-
386
- session_data = load_session_data(session_id)
387
- if not session_data:
388
- logger.error(f"No session data found for session: {session_id}")
389
- return redirect(url_for('index'))
390
-
391
- session_data['subjective_feedback'] = understanding
392
- save_session_data(session_id, session_data)
393
-
394
- return redirect(url_for('completed', session_id=session_id))
395
-
396
- return render_template('subjective.html', session_id=session_id)
397
-
398
- @app.route('/feedback', methods=['POST'])
399
- def feedback():
400
- try:
401
- session_id = request.form['session_id']
402
- prediction = request.form['prediction']
403
-
404
- session_data = load_session_data(session_id)
405
- if not session_data:
406
- logger.error(f"No session data found for session: {session_id}")
407
- return redirect(url_for('index'))
408
-
409
- session_data['responses'].append({
410
- 'sample_id': session_data['current_index'],
411
- 'user_prediction': prediction
412
- })
413
-
414
- session_data['current_index'] += 1
415
- save_session_data(session_id, session_data)
416
- logger.info(f"Prediction saved for session {session_id}, sample {session_data['current_index'] - 1}")
417
-
418
- if session_data['current_index'] >= len(session_data['selected_samples']):
419
- return redirect(url_for('subjective', session_id=session_id))
420
-
421
- return redirect(url_for('experiment', session_id=session_id))
422
- except Exception as e:
423
- logger.exception(f"Error in feedback route: {e}")
424
- return "An error occurred", 500
425
-
426
- # Update the completed route to include Text2SQL
427
  @app.route('/completed/<session_id>')
428
  def completed(session_id):
429
  try:
430
  session_data = load_session_data(session_id)
431
  if not session_data:
432
- logger.error(f"No session data found for session: {session_id}")
433
  return redirect(url_for('index'))
434
 
435
  session_data['end_time'] = datetime.now().isoformat()
436
  responses = session_data['responses']
437
- method = session_data['method']
438
-
439
- if method == "Chain-of-Table":
440
- json_file = 'Tabular_LLMs_human_study_vis_6_COT.json'
441
- elif method == "Plan-of-SQLs":
442
- json_file = 'Tabular_LLMs_human_study_vis_6_POS.json'
443
- elif method == "Dater":
444
- json_file = 'Tabular_LLMs_human_study_vis_6_DATER.json'
445
- elif method == "No-XAI":
446
- json_file = 'Tabular_LLMs_human_study_vis_6_NO_XAI.json'
447
- elif method == "Text2SQL":
448
- json_file = 'Tabular_LLMs_human_study_vis_6_Text2SQL.json'
449
- else:
450
- return "Invalid method", 400
451
-
452
- with open(json_file, 'r') as f:
453
- ground_truth = json.load(f)
454
-
455
- correct_predictions = 0
456
- true_predictions = 0
457
- false_predictions = 0
458
-
459
- for response in responses:
460
- sample_id = response['sample_id']
461
- user_prediction = response['user_prediction']
462
- visualization_file = session_data['selected_samples'][sample_id]['file']
463
- index = visualization_file.split('-')[1].split('.')[0]
464
-
465
- ground_truth_key = f"{get_method_dir(method)}_test-{index}.html"
466
- logger.info(f"ground_truth_key: {ground_truth_key}")
467
-
468
- if ground_truth_key in ground_truth:
469
- # TODO: Important Note ->
470
- # Using model prediction as we are doing forward simulation
471
- # Please use ground_truth[ground_truth_key]['answer'].upper() if running verification task
472
- model_prediction = ground_truth[ground_truth_key]['prediction'].upper()
473
- if user_prediction.upper() == model_prediction:
474
- correct_predictions += 1
475
 
476
- if user_prediction.upper() == "TRUE":
477
- true_predictions += 1
478
- elif user_prediction.upper() == "FALSE":
479
- false_predictions += 1
480
- else:
481
- logger.warning(f"Missing key in ground truth: {ground_truth_key}")
482
 
483
- accuracy = (correct_predictions / len(responses)) * 100 if responses else 0
484
- accuracy = round(accuracy, 2)
485
-
486
- true_percentage = (true_predictions / len(responses)) * 100 if len(responses) else 0
487
- false_percentage = (false_predictions / len(responses)) * 100 if len(responses) else 0
488
-
489
- true_percentage = round(true_percentage, 2)
490
- false_percentage = round(false_percentage, 2)
491
-
492
- session_data['accuracy'] = accuracy
493
- session_data['true_percentage'] = true_percentage
494
- session_data['false_percentage'] = false_percentage
495
 
496
- # Save all the data to Hugging Face at the end
497
  save_session_data_to_hf(session_id, session_data)
498
 
499
- # Remove the local session data file
500
- os.remove(os.path.join(SESSION_DIR, f'{session_id}.json'))
 
 
 
 
 
 
 
 
 
501
 
502
- return render_template('completed.html',
503
- accuracy=accuracy,
504
- true_percentage=true_percentage,
505
- false_percentage=false_percentage)
506
  except Exception as e:
507
  logger.exception(f"An error occurred in the completed route: {e}")
508
  return "An error occurred", 500
509
 
 
510
  @app.route('/visualizations/<path:filename>')
511
  def send_visualization(filename):
512
- logger.info(f"Attempting to serve file: {filename}")
513
  base_dir = os.getcwd()
514
  file_path = os.path.normpath(os.path.join(base_dir, filename))
515
  if not file_path.startswith(base_dir):
516
  return "Access denied", 403
517
-
518
  if not os.path.exists(file_path):
519
  return "File not found", 404
520
-
521
  directory = os.path.dirname(file_path)
522
  file_name = os.path.basename(file_path)
523
- logger.info(f"Serving file from directory: {directory}, filename: {file_name}")
524
  return send_from_directory(directory, file_name)
525
 
526
- @app.route('/visualizations/<path:filename>')
527
- def send_examples(filename):
528
- return send_from_directory('', filename)
529
 
530
  if __name__ == "__main__":
531
  app.run(host="0.0.0.0", port=7860, debug=True)
 
7
  import logging
8
  from datetime import datetime
9
  from huggingface_hub import login, HfApi, hf_hub_download
10
+ from statistics import mean
11
 
12
  # Set up logging
13
  logging.basicConfig(level=logging.INFO,
 
26
  logger.error("HF_TOKEN not found in environment variables")
27
 
28
  app = Flask(__name__)
29
+ app.config['SECRET_KEY'] = 'supersecretkey'
30
 
31
  # File-based session storage
32
  SESSION_DIR = '/tmp/sessions'
33
  os.makedirs(SESSION_DIR, exist_ok=True)
34
 
35
+ # Update visualization directories for the 4 methods
36
  VISUALIZATION_DIRS = {
37
+ "Text2SQL": "htmls_Text2SQL",
38
  "Dater": "htmls_DATER_mod2",
39
  "Chain-of-Table": "htmls_COT_mod",
40
+ "Plan-of-SQLs": "htmls_POS_mod2"
 
41
  }
42
 
43
+
44
+ # Update method directory mapping
45
  def get_method_dir(method):
46
+ method_mapping = {
47
+ 'Text2SQL': 'Text2SQL',
48
+ 'Dater': 'DATER',
49
+ 'Chain-of-Table': 'COT',
50
+ 'Plan-of-SQLs': 'POS'
51
+ }
52
+ return method_mapping.get(method)
53
+
54
+
55
+ # Update methods list to only include the 4 methods we want to rank
56
+ METHODS = ["Text2SQL", "Dater", "Chain-of-Table", "Plan-of-SQLs"]
57
+
 
 
 
58
 
59
  def generate_session_id():
60
  return str(uuid.uuid4())
61
 
62
+
63
  def save_session_data(session_id, data):
64
  file_path = os.path.join(SESSION_DIR, f'{session_id}.json')
65
  with open(file_path, 'w') as f:
66
  json.dump(data, f)
67
  logger.info(f"Session data saved for session {session_id}")
68
 
69
+
70
  def load_session_data(session_id):
71
  file_path = os.path.join(SESSION_DIR, f'{session_id}.json')
72
  if os.path.exists(file_path):
 
74
  return json.load(f)
75
  return None
76
 
77
+
78
  def save_session_data_to_hf(session_id, data):
79
  try:
80
  username = data.get('username', 'unknown')
 
89
  f.write(json_data)
90
 
91
  api = HfApi()
92
+ repo_path = "session_data_preference_ranking"
93
 
94
  api.upload_file(
95
  path_or_fileobj=temp_file_path,
 
101
  logger.info(f"Session data saved for session {session_id} in Hugging Face Data Space")
102
  except Exception as e:
103
  logger.exception(f"Error saving session data for session {session_id}: {e}")
104
+
105
+
106
+ def load_samples_for_all_methods(metadata_files):
107
+ samples_by_method = {}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
  common_samples = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
 
110
+ # First, load all samples for each method
111
+ for method in METHODS:
112
+ method_samples = []
113
+ categories = ["TP", "TN", "FP", "FN"]
114
+
115
+ for category in categories:
116
+ method_dir = VISUALIZATION_DIRS[method]
117
+ try:
118
+ files = set(os.listdir(f'{method_dir}/{category}'))
119
+
120
+ for file in files:
121
+ index = file.split('-')[1].split('.')[0]
122
+ metadata_key = f"{get_method_dir(method)}_test-{index}.html"
123
+
124
+ # Get metadata for this sample
125
+ sample_metadata = metadata_files[method].get(metadata_key, {})
126
+
127
+ method_samples.append({
128
+ 'category': category,
129
+ 'file': file,
130
+ 'metadata': sample_metadata
131
+ })
132
+ except Exception as e:
133
+ logger.error(f"Error loading samples for method {method}, category {category}: {e}")
134
+
135
+ samples_by_method[method] = method_samples
136
+
137
+ # Find common samples across all methods
138
+ file_sets = []
139
+ for method, samples in samples_by_method.items():
140
+ file_set = {s['file'] for s in samples}
141
+ file_sets.append(file_set)
142
+
143
+ common_files = set.intersection(*file_sets)
144
+
145
+ # Create groups of samples that exist across all methods
146
+ for file_name in common_files:
147
+ sample_group = {}
148
+ for method in METHODS:
149
+ sample = next((s for s in samples_by_method[method] if s['file'] == file_name), None)
150
+ if sample:
151
+ sample_group[method] = sample
152
+ if len(sample_group) == len(METHODS):
153
+ common_samples.append(sample_group)
154
+
155
  return common_samples
156
 
157
+
158
  def select_balanced_samples(samples):
159
  try:
160
+ # Get the category from any method (they should all be the same)
161
+ sample_categories = [(s, next(iter(s.values()))['category']) for s in samples]
 
162
 
163
+ # Separate samples into two groups
164
+ tp_fp_samples = [s for s, cat in sample_categories if cat in ['TP', 'FP']]
165
+ tn_fn_samples = [s for s, cat in sample_categories if cat in ['TN', 'FN']]
166
+
167
+ # Select balanced samples
168
+ if len(tp_fp_samples) >= 5 and len(tn_fn_samples) >= 5:
169
+ selected_tp_fp = random.sample(tp_fp_samples, 5)
170
+ selected_tn_fn = random.sample(tn_fn_samples, 5)
171
+ selected_samples = selected_tp_fp + selected_tn_fn
172
+ random.shuffle(selected_samples)
173
+ else:
174
+ logger.warning(
175
+ f"Not enough samples for balanced selection. TP+FP: {len(tp_fp_samples)}, TN+FN: {len(tn_fn_samples)}")
176
+ selected_samples = random.sample(samples, min(10, len(samples)))
177
 
 
178
  return selected_samples
179
  except Exception as e:
180
  logger.exception("Error selecting balanced samples")
181
  return []
182
 
183
+
184
+ @app.route('/')
185
+ def root():
186
+ return redirect(url_for('consent'))
187
+
188
+
189
+ @app.route('/consent', methods=['GET', 'POST'])
190
+ def consent():
191
+ if request.method == 'POST':
192
+ return redirect(url_for('introduction'))
193
+ return render_template('consent.html')
194
+
195
+
196
+ @app.route('/introduction')
197
+ def introduction():
198
+ return render_template('introduction.html')
199
+
200
 
201
  @app.route('/attribution')
202
  def attribution():
203
  return render_template('attribution.html')
204
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
205
 
206
  @app.route('/index', methods=['GET', 'POST'])
207
  def index():
208
  if request.method == 'POST':
209
  username = request.form.get('username')
210
  seed = request.form.get('seed')
211
+
212
+ if not username or not seed:
213
+ return render_template('index.html', error="Please fill in all fields.")
214
+
 
215
  try:
216
  seed = int(seed)
217
  random.seed(seed)
218
 
219
+ # Load metadata for all methods
220
+ metadata_files = {}
221
+ for method in METHODS:
222
+ json_file = f'Tabular_LLMs_human_study_vis_6_{get_method_dir(method)}.json'
223
+ with open(json_file, 'r') as f:
224
+ metadata_files[method] = json.load(f)
225
+
226
+ # Load and select samples
227
+ all_samples = load_samples_for_all_methods(metadata_files)
 
 
 
 
 
 
 
228
  selected_samples = select_balanced_samples(all_samples)
229
+
230
  if len(selected_samples) == 0:
231
  return render_template('index.html', error="No common samples were found")
232
+
233
+ # Create session
234
  session_id = generate_session_id()
235
  session_data = {
236
  'username': username,
237
  'seed': str(seed),
 
238
  'selected_samples': selected_samples,
239
  'current_index': 0,
240
  'responses': [],
241
+ 'start_time': datetime.now().isoformat(),
242
  'session_id': session_id
243
  }
244
  save_session_data(session_id, session_data)
 
245
 
246
+ return redirect(url_for('experiment', session_id=session_id))
247
+
248
  except Exception as e:
249
  logger.exception(f"Error in index route: {e}")
250
  return render_template('index.html', error="An error occurred. Please try again.")
251
+
252
+ return render_template('index.html')
253
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
254
 
255
  @app.route('/experiment/<session_id>', methods=['GET', 'POST'])
256
  def experiment(session_id):
 
260
  return redirect(url_for('index'))
261
 
262
  selected_samples = session_data['selected_samples']
 
263
  current_index = session_data['current_index']
264
 
265
  if current_index >= len(selected_samples):
266
  return redirect(url_for('completed', session_id=session_id))
267
 
268
+ if request.method == 'POST':
269
+ # Validate and save rankings
270
+ rankings = {method: int(request.form.get(method)) for method in METHODS}
271
+
272
+ if not all(1 <= rank <= 4 for rank in rankings.values()):
273
+ return "Invalid rankings. Please use numbers 1-4.", 400
274
+ if len(set(rankings.values())) != 4:
275
+ return "Each method must have a unique rank.", 400
276
 
277
+ session_data['responses'].append({
278
+ 'sample_id': current_index,
279
+ 'rankings': rankings
280
+ })
281
+ session_data['current_index'] += 1
282
+ save_session_data(session_id, session_data)
283
+ return redirect(url_for('experiment', session_id=session_id))
284
 
285
+ # Get current sample group and prepare visualizations
286
+ sample_group = selected_samples[current_index]
287
+ visualizations = {
288
+ method: url_for('send_visualization',
289
+ filename=f"{VISUALIZATION_DIRS[method]}/{sample['category']}/{sample['file']}")
290
+ for method, sample in sample_group.items()
291
+ }
292
 
293
+ # Get metadata from any method (they should all have the same statement)
294
+ sample_metadata = next(iter(sample_group.values()))['metadata']
295
+ statement = sample_metadata.get('statement', '')
296
 
297
  return render_template('experiment.html',
298
  sample_id=current_index,
299
  statement=statement,
300
+ visualizations=visualizations,
301
+ methods=METHODS,
302
+ session_id=session_id)
303
+
304
  except Exception as e:
305
  logger.exception(f"An error occurred in the experiment route: {e}")
306
  return "An error occurred", 500
307
 
 
 
 
308
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
309
  @app.route('/completed/<session_id>')
310
  def completed(session_id):
311
  try:
312
  session_data = load_session_data(session_id)
313
  if not session_data:
 
314
  return redirect(url_for('index'))
315
 
316
  session_data['end_time'] = datetime.now().isoformat()
317
  responses = session_data['responses']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
318
 
319
+ # Calculate average ranking for each method
320
+ average_rankings = {
321
+ method: mean(r['rankings'][method] for r in responses)
322
+ for method in METHODS
323
+ }
 
324
 
325
+ # Sort methods by average ranking (ascending)
326
+ sorted_methods = sorted(
327
+ average_rankings.items(),
328
+ key=lambda x: x[1]
329
+ )
 
 
 
 
 
 
 
330
 
331
+ session_data['average_rankings'] = average_rankings
332
  save_session_data_to_hf(session_id, session_data)
333
 
334
+ # Clean up local session file
335
+ try:
336
+ os.remove(os.path.join(SESSION_DIR, f'{session_id}.json'))
337
+ except Exception as e:
338
+ logger.warning(f"Error removing session file: {e}")
339
+
340
+ return render_template(
341
+ 'completed.html',
342
+ average_rankings=average_rankings,
343
+ sorted_methods=sorted_methods
344
+ )
345
 
 
 
 
 
346
  except Exception as e:
347
  logger.exception(f"An error occurred in the completed route: {e}")
348
  return "An error occurred", 500
349
 
350
+
351
  @app.route('/visualizations/<path:filename>')
352
  def send_visualization(filename):
 
353
  base_dir = os.getcwd()
354
  file_path = os.path.normpath(os.path.join(base_dir, filename))
355
  if not file_path.startswith(base_dir):
356
  return "Access denied", 403
 
357
  if not os.path.exists(file_path):
358
  return "File not found", 404
 
359
  directory = os.path.dirname(file_path)
360
  file_name = os.path.basename(file_path)
 
361
  return send_from_directory(directory, file_name)
362
 
 
 
 
363
 
364
  if __name__ == "__main__":
365
  app.run(host="0.0.0.0", port=7860, debug=True)
templates/completed.html CHANGED
@@ -11,17 +11,17 @@
11
  display: flex;
12
  justify-content: center;
13
  align-items: center;
14
- height: 100vh;
15
- overflow: hidden;
16
  }
17
  .container {
18
- background-color: rgba(255, 255, 255, 0.9);
19
  border-radius: 20px;
20
  padding: 40px;
21
  box-shadow: 0 10px 30px rgba(0, 0, 0, 0.1);
22
  text-align: center;
23
- max-width: 600px;
24
  width: 90%;
 
25
  }
26
  h1 {
27
  color: #2c3e50;
@@ -29,47 +29,47 @@
29
  margin-bottom: 30px;
30
  animation: bounce 1s ease;
31
  }
32
- .stats-container {
33
  display: flex;
34
  flex-direction: column;
35
- align-items: center;
36
- margin-bottom: 30px;
37
  }
38
- .stat-card {
39
- background-color: #fff;
40
  border-radius: 15px;
41
  padding: 20px;
42
- margin: 10px;
43
- box-shadow: 0 5px 15px rgba(0, 0, 0, 0.1);
44
- width: 80%;
45
- transition: transform 0.3s ease;
46
- }
47
- .prediction-stats {
48
  display: flex;
 
49
  justify-content: space-between;
50
- width: 80%;
51
- }
52
- .prediction-stat-card {
53
- flex: 1;
54
- margin: 0 5px;
55
- }
56
- .stat-card:hover, .prediction-stat-card:hover {
57
- transform: translateY(-5px);
58
  }
59
- .stat-title {
60
- font-size: 18px;
61
- color: #7f8c8d;
62
- margin-bottom: 10px;
63
  }
64
- .stat-value {
65
- font-size: 28px;
66
  font-weight: bold;
67
  color: #2c3e50;
68
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  .button-container {
70
  margin-top: 30px;
71
  }
72
- button {
73
  background-color: #3498db;
74
  color: white;
75
  border: none;
@@ -77,49 +77,62 @@
77
  font-size: 18px;
78
  border-radius: 50px;
79
  cursor: pointer;
 
 
80
  transition: background-color 0.3s ease, transform 0.3s ease;
81
  }
82
- button:hover {
83
  background-color: #2980b9;
84
  transform: scale(1.05);
85
  }
 
 
 
 
 
 
 
 
 
 
 
86
  @keyframes bounce {
87
  0%, 20%, 50%, 80%, 100% {transform: translateY(0);}
88
  40% {transform: translateY(-30px);}
89
  60% {transform: translateY(-15px);}
90
  }
91
  </style>
92
- <link href="https://fonts.googleapis.com/css2?family=Roboto:wght@400;700&display=swap" rel="stylesheet">
93
- <script src="https://cdn.jsdelivr.net/npm/canvas-confetti@1.5.1/dist/confetti.browser.min.js"></script>
94
  </head>
95
  <body>
96
  <div class="container">
97
- <h1>Thank you!</h1>
98
- <p>You've successfully completed the experiment. Your predictions have been recorded.</p>
99
- <div class="stats-container">
100
- <div class="stat-card">
101
- <div class="stat-title">Your Labeling Accuracy</div>
102
- <div class="stat-value">{{ accuracy }}%</div>
103
- </div>
104
- <div class="prediction-stats">
105
- <div class="stat-card prediction-stat-card">
106
- <div class="stat-title">You Predicted TRUE</div>
107
- <div class="stat-value">{{ true_percentage }}%</div>
108
- </div>
109
- <div class="stat-card prediction-stat-card">
110
- <div class="stat-title">You Predicted FALSE</div>
111
- <div class="stat-value">{{ false_percentage }}%</div>
112
  </div>
 
113
  </div>
 
114
  </div>
 
115
  <div class="button-container">
116
- <a href="/" style="text-decoration: none;">
117
- <button>Back to Start Page</button>
118
- </a>
119
  </div>
120
  </div>
 
 
121
  <script>
122
- // Trigger confetti animation
123
  confetti({
124
  particleCount: 100,
125
  spread: 70,
 
11
  display: flex;
12
  justify-content: center;
13
  align-items: center;
14
+ min-height: 100vh;
 
15
  }
16
  .container {
17
+ background-color: rgba(255, 255, 255, 0.95);
18
  border-radius: 20px;
19
  padding: 40px;
20
  box-shadow: 0 10px 30px rgba(0, 0, 0, 0.1);
21
  text-align: center;
22
+ max-width: 800px;
23
  width: 90%;
24
+ margin: 20px;
25
  }
26
  h1 {
27
  color: #2c3e50;
 
29
  margin-bottom: 30px;
30
  animation: bounce 1s ease;
31
  }
32
+ .rankings-container {
33
  display: flex;
34
  flex-direction: column;
35
+ gap: 20px;
36
+ margin: 30px 0;
37
  }
38
+ .method-card {
39
+ background-color: white;
40
  border-radius: 15px;
41
  padding: 20px;
42
+ box-shadow: 0 5px 15px rgba(0, 0, 0, 0.05);
 
 
 
 
 
43
  display: flex;
44
+ align-items: center;
45
  justify-content: space-between;
46
+ transition: transform 0.3s ease;
 
 
 
 
 
 
 
47
  }
48
+ .method-card:hover {
49
+ transform: translateY(-3px);
 
 
50
  }
51
+ .method-name {
52
+ font-size: 1.2em;
53
  font-weight: bold;
54
  color: #2c3e50;
55
  }
56
+ .average-rank {
57
+ font-size: 1.5em;
58
+ font-weight: bold;
59
+ color: #3498db;
60
+ background: #f8f9fa;
61
+ padding: 10px 20px;
62
+ border-radius: 25px;
63
+ }
64
+ .rank-label {
65
+ font-size: 0.9em;
66
+ color: #7f8c8d;
67
+ margin-top: 5px;
68
+ }
69
  .button-container {
70
  margin-top: 30px;
71
  }
72
+ .home-button {
73
  background-color: #3498db;
74
  color: white;
75
  border: none;
 
77
  font-size: 18px;
78
  border-radius: 50px;
79
  cursor: pointer;
80
+ text-decoration: none;
81
+ display: inline-block;
82
  transition: background-color 0.3s ease, transform 0.3s ease;
83
  }
84
+ .home-button:hover {
85
  background-color: #2980b9;
86
  transform: scale(1.05);
87
  }
88
+ .summary {
89
+ background-color: #f8f9fa;
90
+ padding: 20px;
91
+ border-radius: 10px;
92
+ margin-bottom: 30px;
93
+ text-align: left;
94
+ }
95
+ .summary h2 {
96
+ color: #2c3e50;
97
+ margin-top: 0;
98
+ }
99
  @keyframes bounce {
100
  0%, 20%, 50%, 80%, 100% {transform: translateY(0);}
101
  40% {transform: translateY(-30px);}
102
  60% {transform: translateY(-15px);}
103
  }
104
  </style>
 
 
105
  </head>
106
  <body>
107
  <div class="container">
108
+ <h1>Thank You!</h1>
109
+ <p>You've successfully completed all 10 samples. Here are the average rankings for each explanation method:</p>
110
+
111
+ <div class="summary">
112
+ <h2>Ranking Summary</h2>
113
+ <p>Lower numbers indicate better rankings (1 = best, 4 = worst)</p>
114
+ </div>
115
+
116
+ <div class="rankings-container">
117
+ {% for method, rank in sorted_methods %}
118
+ <div class="method-card">
119
+ <div class="method-info">
120
+ <div class="method-name">{{ method }}</div>
121
+ <div class="rank-label">Average Ranking</div>
 
122
  </div>
123
+ <div class="average-rank">{{ "%.2f"|format(rank) }}</div>
124
  </div>
125
+ {% endfor %}
126
  </div>
127
+
128
  <div class="button-container">
129
+ <a href="/" class="home-button">Back to Home</a>
 
 
130
  </div>
131
  </div>
132
+
133
+ <script src="https://cdn.jsdelivr.net/npm/canvas-confetti@1.5.1/dist/confetti.browser.min.js"></script>
134
  <script>
135
+ // Celebration animation
136
  confetti({
137
  particleCount: 100,
138
  spread: 70,
templates/consent.html CHANGED
@@ -73,8 +73,8 @@
73
  <h2>Voluntary Participation</h2>
74
  <p>Your participation in this study is entirely voluntary. You may choose to withdraw at any time without any consequences.</p>
75
 
76
- <h2>Contact Information</h2>
77
- <p>If you have any questions or concerns about this study, please contact Anh Nguyen's lab at Auburn CSSE department via anh.ng8@gmail.com.</p>
78
 
79
  <p class="highlight">By clicking "I Agree" below, you confirm that you have read and understood this informed consent, and you agree to participate in this TableQA study under the terms described above.</p>
80
 
 
73
  <h2>Voluntary Participation</h2>
74
  <p>Your participation in this study is entirely voluntary. You may choose to withdraw at any time without any consequences.</p>
75
 
76
+ <!-- <h2>Contact Information</h2>-->
77
+ <!-- <p>If you have any questions or concerns about this study, please contact Anh Nguyen's lab at Auburn CSSE department via anh.ng8@gmail.com.</p>-->
78
 
79
  <p class="highlight">By clicking "I Agree" below, you confirm that you have read and understood this informed consent, and you agree to participate in this TableQA study under the terms described above.</p>
80
 
templates/experiment.html CHANGED
@@ -1,153 +1,197 @@
1
  <!DOCTYPE html>
2
  <html>
3
  <head>
4
- <title>Experiment</title>
5
  <style>
6
- body, html {
7
- margin: 0;
8
- padding: 0;
9
- height: 100%;
10
  font-family: 'Roboto', sans-serif;
 
 
 
11
  }
12
  .container {
13
- display: flex;
14
- flex-direction: column;
15
- height: 100vh;
16
- width: 100vw;
17
- background-color: #ffffff;
18
- }
19
- .header {
20
- padding: 10px;
21
- background-color: #f0f0f0;
22
- text-align: center;
23
  }
24
  h1 {
25
- margin: 0;
26
- font-size: 20px;
27
- }
28
- .task-description {
29
- padding: 10px;
30
- background-color: #e0e0e0;
31
  text-align: center;
 
 
32
  }
33
- .highlight {
34
- font-size: 1.1em;
35
- font-weight: bold;
36
- color: #0056b3;
37
- background-color: #e6e6e6;
38
- padding: 5px 10px;
39
- border-radius: 4px;
40
- display: inline-block;
41
- margin-top: 5px;
42
- }
43
- .visualization-container {
44
- flex-grow: 1;
45
- display: flex;
46
- justify-content: center;
47
- align-items: center;
48
  overflow: hidden;
49
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  iframe {
51
  width: 100%;
52
- height: 100%;
53
  border: none;
54
  }
55
- .buttons {
56
- display: flex;
57
- justify-content: space-around;
58
- padding: 10px;
59
- background-color: #f0f0f0;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
  }
61
- button {
62
- background-color: #808080;
63
  color: white;
64
- padding: 10px 20px;
65
  border: none;
66
  border-radius: 5px;
67
  cursor: pointer;
68
- font-size: 16px;
69
- transition: background-color 0.3s ease;
70
- }
71
- button:hover {
72
- background-color: #707070;
 
 
 
 
 
 
 
 
 
73
  }
74
- .bottom-question {
 
75
  text-align: center;
 
76
  padding: 10px;
77
- background-color: #e0e0e0;
78
- font-weight: bold;
79
- font-size: 14px;
80
- }
81
- /* Loader styles */
82
- .overlay {
83
- position: fixed;
84
- top: 0;
85
- left: 0;
86
- width: 100%;
87
- height: 100%;
88
- background-color: rgba(0, 0, 0, 0.5);
89
- display: none;
90
- z-index: 1000;
91
- }
92
- .loader {
93
- border: 5px solid #f3f3f3;
94
- border-top: 5px solid #3498db;
95
- border-radius: 50%;
96
- width: 50px;
97
- height: 50px;
98
- animation: spin 1s linear infinite;
99
- position: fixed;
100
- top: 50%;
101
- left: 50%;
102
- margin-top: -25px;
103
- margin-left: -25px;
104
  display: none;
105
- z-index: 1001;
106
- }
107
- @keyframes spin {
108
- 0% { transform: rotate(0deg); }
109
- 100% { transform: rotate(360deg); }
110
  }
111
  </style>
112
- <link href="https://fonts.googleapis.com/css2?family=Roboto:wght@400;700&display=swap" rel="stylesheet">
113
  </head>
114
  <body>
115
  <div class="container">
116
- <div class="header">
117
- <h1>{{ sample_id + 1 }} / 10</h1>
118
- </div>
119
- <div class="task-description">
120
- <p>Please note that in select row function, starting index is 0 for Chain-of-Table and 1 for Dater and Index * represents the selection for all rows.</p>
121
- <p class="highlight">Based on the explanation below, please guess what the AI model will predict on the input Statement below.</p>
122
- <h2>{{ statement | safe }}</h2>
 
 
 
 
123
  </div>
124
 
125
- <div class="visualization-container">
126
- <iframe src="{{ visualization }}"></iframe>
127
- </div>
128
- <div class="bottom-question">
129
- <h1>Guess what the model will predict on the Statement based on the provided explanation?</h1>
 
 
 
 
 
 
130
  </div>
131
- <div class="buttons">
132
- <form action="{{ url_for('feedback') }}" method="post" onsubmit="showLoader()">
133
- <input type="hidden" name="session_id" value="{{ session_id }}">
134
- <button type="submit" name="prediction" value="TRUE"><h1>Model will predict: Statement is TRUE</h1></button>
135
- </form>
136
- <form action="{{ url_for('feedback') }}" method="post" onsubmit="showLoader()">
137
- <input type="hidden" name="session_id" value="{{ session_id }}">
138
- <button type="submit" name="prediction" value="FALSE"><h1>Model will predict: Statement is FALSE</h1></button>
 
 
 
 
 
139
  </form>
140
  </div>
141
  </div>
142
 
143
- <!-- Loader and overlay -->
144
- <div class="overlay" id="overlay"></div>
145
- <div class="loader" id="loader"></div>
146
-
147
  <script>
148
- function showLoader() {
149
- document.getElementById('overlay').style.display = 'block';
150
- document.getElementById('loader').style.display = 'block';
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
151
  }
152
  </script>
153
  </body>
 
1
  <!DOCTYPE html>
2
  <html>
3
  <head>
4
+ <title>Table QA Experiment</title>
5
  <style>
6
+ body {
 
 
 
7
  font-family: 'Roboto', sans-serif;
8
+ margin: 0;
9
+ padding: 20px;
10
+ background-color: #f5f5f5;
11
  }
12
  .container {
13
+ max-width: 1200px;
14
+ margin: 0 auto;
15
+ background-color: white;
16
+ padding: 20px;
17
+ border-radius: 10px;
18
+ box-shadow: 0 2px 4px rgba(0,0,0,0.1);
 
 
 
 
19
  }
20
  h1 {
 
 
 
 
 
 
21
  text-align: center;
22
+ color: #333;
23
+ margin-bottom: 30px;
24
  }
25
+ .progress {
26
+ text-align: center;
27
+ font-size: 1.2em;
28
+ color: #666;
29
+ margin-bottom: 20px;
30
+ }
31
+ .explanations-grid {
32
+ display: grid;
33
+ grid-template-columns: repeat(2, 1fr);
34
+ gap: 20px;
35
+ margin-bottom: 40px;
36
+ }
37
+ .explanation-card {
38
+ border: 1px solid #ddd;
39
+ border-radius: 8px;
40
  overflow: hidden;
41
  }
42
+ .explanation-header {
43
+ background-color: #f8f9fa;
44
+ padding: 15px;
45
+ border-bottom: 1px solid #ddd;
46
+ }
47
+ .explanation-header h2 {
48
+ margin: 0;
49
+ color: #333;
50
+ font-size: 1.2em;
51
+ }
52
+ .explanation-content {
53
+ padding: 0;
54
+ }
55
  iframe {
56
  width: 100%;
57
+ height: 500px;
58
  border: none;
59
  }
60
+ .ranking-section {
61
+ position: sticky;
62
+ bottom: 0;
63
+ background: white;
64
+ padding: 20px;
65
+ border-top: 3px solid #4CAF50;
66
+ box-shadow: 0 -2px 10px rgba(0,0,0,0.1);
67
+ margin-top: 40px;
68
+ }
69
+ .ranking-grid {
70
+ display: grid;
71
+ grid-template-columns: repeat(4, 1fr);
72
+ gap: 15px;
73
+ margin: 20px 0;
74
+ }
75
+ .ranking-item {
76
+ background: #f8f9fa;
77
+ padding: 15px;
78
+ border-radius: 8px;
79
+ text-align: center;
80
+ }
81
+ .ranking-item label {
82
+ display: block;
83
+ margin-bottom: 10px;
84
+ font-weight: bold;
85
+ color: #333;
86
+ }
87
+ .ranking-item input {
88
+ width: 60px;
89
+ padding: 8px;
90
+ border: 2px solid #ddd;
91
+ border-radius: 4px;
92
+ text-align: center;
93
+ font-size: 1.1em;
94
  }
95
+ .submit-button {
96
+ background-color: #4CAF50;
97
  color: white;
98
+ padding: 15px 30px;
99
  border: none;
100
  border-radius: 5px;
101
  cursor: pointer;
102
+ font-size: 1.1em;
103
+ display: block;
104
+ margin: 20px auto;
105
+ transition: background-color 0.3s;
106
+ }
107
+ .submit-button:hover {
108
+ background-color: #45a049;
109
+ }
110
+ .instructions {
111
+ background-color: #e9f5e9;
112
+ border-left: 4px solid #4CAF50;
113
+ padding: 15px;
114
+ margin-bottom: 20px;
115
+ border-radius: 4px;
116
  }
117
+ .error-message {
118
+ color: #d32f2f;
119
  text-align: center;
120
+ margin: 10px 0;
121
  padding: 10px;
122
+ background-color: #fde8e8;
123
+ border-radius: 4px;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
  display: none;
 
 
 
 
 
125
  }
126
  </style>
 
127
  </head>
128
  <body>
129
  <div class="container">
130
+ <div class="progress">Sample {{ sample_id + 1 }} of 10</div>
131
+
132
+ <div class="instructions">
133
+ <h3>Ranking Instructions:</h3>
134
+ <p>Please examine each explanation method and rank them based on:</p>
135
+ <ul>
136
+ <li><strong>Clarity:</strong> How easy is the explanation to understand?</li>
137
+ <li><strong>Coherence:</strong> Does the explanation logically flow and make sense?</li>
138
+ <li><strong>Helpfulness:</strong> How well does it reveal the model's reasoning?</li>
139
+ </ul>
140
+ <p>Assign ranks from 1 (best) to 4 (worst). Each rank can only be used once.</p>
141
  </div>
142
 
143
+ <div class="explanations-grid">
144
+ {% for method in methods %}
145
+ <div class="explanation-card">
146
+ <div class="explanation-header">
147
+ <h2>{{ method }}</h2>
148
+ </div>
149
+ <div class="explanation-content">
150
+ <iframe src="{{ visualizations[method] }}" title="{{ method }}"></iframe>
151
+ </div>
152
+ </div>
153
+ {% endfor %}
154
  </div>
155
+
156
+ <div class="ranking-section">
157
+ <form id="rankingForm" action="{{ url_for('experiment', session_id=session_id) }}" method="post" onsubmit="return validateRankings()">
158
+ <div class="ranking-grid">
159
+ {% for method in methods %}
160
+ <div class="ranking-item">
161
+ <label for="{{ method }}">{{ method }}</label>
162
+ <input type="number" id="{{ method }}" name="{{ method }}" min="1" max="4" required>
163
+ </div>
164
+ {% endfor %}
165
+ </div>
166
+ <div id="errorMessage" class="error-message"></div>
167
+ <button type="submit" class="submit-button">Submit Rankings</button>
168
  </form>
169
  </div>
170
  </div>
171
 
 
 
 
 
172
  <script>
173
+ function validateRankings() {
174
+ const rankings = new Set();
175
+ const form = document.getElementById('rankingForm');
176
+ const errorMessage = document.getElementById('errorMessage');
177
+
178
+ for (const input of form.getElementsByTagName('input')) {
179
+ const value = parseInt(input.value);
180
+ if (isNaN(value) || value < 1 || value > 4) {
181
+ errorMessage.textContent = 'Please use only numbers between 1 and 4.';
182
+ errorMessage.style.display = 'block';
183
+ return false;
184
+ }
185
+ rankings.add(value);
186
+ }
187
+
188
+ if (rankings.size !== 4) {
189
+ errorMessage.textContent = 'Please assign unique ranks (1-4) to each method.';
190
+ errorMessage.style.display = 'block';
191
+ return false;
192
+ }
193
+
194
+ return true;
195
  }
196
  </script>
197
  </body>
templates/index.html CHANGED
@@ -40,84 +40,6 @@
40
  border-radius: 5px;
41
  font-size: 18px;
42
  }
43
- .method-buttons {
44
- display: flex;
45
- flex-wrap: wrap;
46
- justify-content: center;
47
- margin-bottom: 20px;
48
- gap: 20px;
49
- }
50
- .method-button {
51
- width: calc(45% - 10px);
52
- padding: 15px;
53
- font-size: 20px;
54
- border-radius: 10px;
55
- cursor: pointer;
56
- transition: all 0.3s ease;
57
- border: 2px solid transparent;
58
- font-weight: bold;
59
- text-align: center;
60
- }
61
- .method-button.Chain-of-Table {
62
- background-color: #ffcc80;
63
- color: #1e90ff;
64
- }
65
- .method-button.Plan-of-SQLs {
66
- background-color: #ffcc80;
67
- color: #e65100;
68
- }
69
- .method-button.Text2SQL {
70
- background-color: #ffcc80;
71
- color: #7b1fa2;
72
- }
73
- .method-button.Dater {
74
- background-color: #ffcc80;
75
- color: #4caf50;
76
- }
77
- .method-button.No-XAI {
78
- background-color: #ffcc80;
79
- color: #ff9800;
80
- }
81
- .task-instruction {
82
- background-color: #f0f8ff;
83
- border-left: 5px solid #4CAF50;
84
- padding: 20px;
85
- margin-bottom: 30px;
86
- border-radius: 5px;
87
- text-align: left;
88
- }
89
- .task-instruction h2 {
90
- color: #4CAF50;
91
- margin-top: 0;
92
- }
93
- .task-step {
94
- display: flex;
95
- align-items: center;
96
- margin-bottom: 15px;
97
- }
98
- .task-icon {
99
- font-size: 24px;
100
- margin-right: 15px;
101
- color: #4CAF50;
102
- }
103
- .task-text {
104
- font-size: 18px;
105
- color: #333;
106
- }
107
- .method-button:hover {
108
- opacity: 0.8;
109
- }
110
- .method-button.selected {
111
- border-color: #000000;
112
- box-shadow: 0 0 15px rgba(0, 0, 0, 0.3);
113
- transform: scale(1.05);
114
- animation: borderPulse 0.5s ease-in-out;
115
- }
116
- @keyframes borderPulse {
117
- 0% { border-color: transparent; }
118
- 50% { border-color: #000000; }
119
- 100% { border-color: #000000; }
120
- }
121
  button {
122
  background-color: #4CAF50;
123
  color: white;
@@ -132,121 +54,18 @@
132
  button:hover {
133
  background-color: #45a049;
134
  }
135
- .error-message {
136
- color: red;
137
- margin-bottom: 10px;
138
- font-size: 18px;
139
- }
140
  </style>
141
- <script>
142
- function shuffleArray(array) {
143
- for (let i = array.length - 1; i > 0; i--) {
144
- const j = Math.floor(Math.random() * (i + 1));
145
- [array[i], array[j]] = [array[j], array[i]];
146
- }
147
- return array;
148
- }
149
-
150
- function createMethodButtons() {
151
- const methods = [
152
- { name: 'Chain-of-Table', color: '#1e90ff' },
153
- { name: 'Plan-of-SQLs', color: '#e65100' },
154
- { name: 'Text2SQL', color: '#7b1fa2' },
155
- { name: 'Dater', color: '#4caf50' },
156
- { name: 'No-XAI', color: '#ff9800' }
157
- ];
158
-
159
- const shuffledMethods = shuffleArray(methods);
160
- const container = document.querySelector('.method-buttons');
161
- container.innerHTML = ''; // Clear existing buttons
162
-
163
- shuffledMethods.forEach(method => {
164
- if (method.name === 'No-XAI' && !{{ show_no_xai|tojson }}) {
165
- return; // Skip No-XAI if not shown
166
- }
167
- const button = document.createElement('div');
168
- button.className = `method-button ${method.name}`;
169
- button.onclick = () => selectMethod(method.name);
170
- button.textContent = method.name;
171
- container.appendChild(button);
172
- });
173
- }
174
-
175
- function selectMethod(method) {
176
- document.getElementById('method').value = method;
177
-
178
- var buttons = document.getElementsByClassName('method-button');
179
- for (var i = 0; i < buttons.length; i++) {
180
- buttons[i].classList.remove('selected');
181
- }
182
-
183
- var selectedButton = document.querySelector(`.method-button.${method}`);
184
- if (selectedButton) {
185
- selectedButton.classList.add('selected');
186
- }
187
- }
188
-
189
- function validateForm() {
190
- var username = document.getElementById('username').value;
191
- var seed = document.getElementById('seed').value;
192
- var method = document.getElementById('method').value;
193
- if (!username || !seed || !method) {
194
- alert("Please fill in all fields and select a method.");
195
- return false;
196
- }
197
- return true;
198
- }
199
-
200
- // Call this function when the page loads
201
- window.onload = function() {
202
- createMethodButtons();
203
- };
204
- </script>
205
  <link href="https://fonts.googleapis.com/css2?family=Roboto:wght@400;700&display=swap" rel="stylesheet">
206
  </head>
207
  <body>
208
  <div class="container">
209
- <div class="task-instruction">
210
- <h2>Let's Get Started! πŸš€</h2>
211
- <div class="task-step">
212
- <span class="task-icon">πŸ‘€</span>
213
- <span class="task-text">Enter your name</span>
214
- </div>
215
- <div class="task-step">
216
- <span class="task-icon">πŸ”’</span>
217
- <span class="task-text">Choose a lucky number</span>
218
- </div>
219
- <div class="task-step">
220
- <span class="task-icon">πŸ“Š</span>
221
- <span class="task-text">Select an explanation method</span>
222
- </div>
223
- <div class="task-step">
224
- <span class="task-icon">🎯</span>
225
- <span class="task-text">Complete 10 samples in the experiment</span>
226
- </div>
227
- </div>
228
-
229
- {% if error %}
230
- <div class="error-message">
231
- {{ error }}
232
- </div>
233
- {% endif %}
234
-
235
- <form id="method-form" action="{{ url_for('index') }}" method="post" onsubmit="return validateForm();">
236
  <label for="username">Hi there πŸ‘‹πŸ‘‹πŸ‘‹ ! What is your name?</label>
237
  <input type="text" id="username" name="username" required>
238
  <label for="seed">What is your lucky number? πŸ€πŸ€πŸ€ </label>
239
  <input type="number" id="seed" name="seed" required>
240
-
241
- <input type="hidden" id="method" name="method" required>
242
-
243
- <h2 style="color: #333; margin-top: 30px; margin-bottom: 20px;">Explanation Methods</h2>
244
-
245
- <div class="method-buttons">
246
- <!-- Method buttons will be dynamically inserted here -->
247
- </div>
248
-
249
- <button type="submit">Next</button>
250
  </form>
251
  </div>
252
  </body>
 
40
  border-radius: 5px;
41
  font-size: 18px;
42
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  button {
44
  background-color: #4CAF50;
45
  color: white;
 
54
  button:hover {
55
  background-color: #45a049;
56
  }
 
 
 
 
 
57
  </style>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  <link href="https://fonts.googleapis.com/css2?family=Roboto:wght@400;700&display=swap" rel="stylesheet">
59
  </head>
60
  <body>
61
  <div class="container">
62
+ <h1>Trustworthy LLMs for Table QA</h1>
63
+ <form id="method-form" action="/" method="post">
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  <label for="username">Hi there πŸ‘‹πŸ‘‹πŸ‘‹ ! What is your name?</label>
65
  <input type="text" id="username" name="username" required>
66
  <label for="seed">What is your lucky number? πŸ€πŸ€πŸ€ </label>
67
  <input type="number" id="seed" name="seed" required>
68
+ <button type="submit">Start Experiment</button>
 
 
 
 
 
 
 
 
 
69
  </form>
70
  </div>
71
  </body>
templates/introduction.html CHANGED
@@ -200,11 +200,18 @@
200
  </div>
201
 
202
  <div class="outro">
203
- <h2>Model Simulation Task</h2>
204
  <p>
205
- Given an input statement, an Artificial Intelligence (AI) model will output either TRUE or FALSE.
206
- <strong>Your job in this Simulation task is to use the AI's explanation to guess the machine response.</strong>
207
- Specifically, please choose which response (Statement is TRUE/ Statement is FALSE) model would output regardless of whether you think that response is correct or not.
 
 
 
 
 
 
 
208
  </p>
209
  </div>
210
 
 
200
  </div>
201
 
202
  <div class="outro">
203
+ <h2>Explanation Ranking Task</h2>
204
  <p>
205
+ <!-- Given an input statement, an Artificial Intelligence (AI) model will output either TRUE or FALSE.-->
206
+ <!-- <strong>Your job in this Simulation task is to use the AI's explanation to guess the machine response.</strong>-->
207
+ <!-- Specifically, please choose which response (Statement is TRUE/ Statement is FALSE) model would output regardless of whether you think that response is correct or not.-->
208
+ You are given explanations from <strong>4</strong> different methods for an input.
209
+ Please rank these explanations based on their clarity, coherence, and helpfulness in understanding the model's reasoning.
210
+ Clarity Definition: How easy is the explanation to understand? Is the language clear and straightforward?
211
+ Coherence Definition: Does the explanation logically flow and make sense as a whole? Are the ideas well-connected?
212
+ Helpfulness in Understanding the Model's Reasoning Definition: How effectively does the explanation help you understand why the model made its decision? Does it reveal the reasoning process?
213
+
214
+ Provide the ranking from best to worst (1-best and 4-worst).
215
  </p>
216
  </div>
217