ykhrustalev commited on
Commit
ea2863b
·
1 Parent(s): 49dadf9

improve the dialog

Browse files
Files changed (1) hide show
  1. main.js +56 -16
main.js CHANGED
@@ -43,6 +43,9 @@ const audioPreview = document.getElementById('audioPreview');
43
  const clearCacheBtn = document.getElementById('clearCacheBtn');
44
  const cacheInfoEl = document.getElementById('cacheInfo');
45
  const dropOverlay = document.getElementById('dropOverlay');
 
 
 
46
 
47
  // State
48
  let audioModel = null;
@@ -131,6 +134,23 @@ function updateProgress(percent, text) {
131
  progressText.textContent = text || `${percent}%`;
132
  }
133
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
  function addMessage(role, content, isStreaming = false, audio = null) {
135
  const msgEl = document.createElement('div');
136
  msgEl.className = `message ${role}${isStreaming ? ' generating' : ''}`;
@@ -375,6 +395,7 @@ async function generate(userMessage) {
375
  let generatedText = '';
376
  const startTime = performance.now();
377
  let tokenCount = 0;
 
378
 
379
  try {
380
  const currentMode = audioModeSelect?.value || 'interleaved';
@@ -387,10 +408,13 @@ async function generate(userMessage) {
387
  tokenCount++;
388
  textEl.textContent = generatedText;
389
  chatContainer.scrollTop = chatContainer.scrollHeight;
 
 
390
  return false;
391
  };
392
 
393
  if (currentMode === 'asr' && audioToSend) {
 
394
  generatedText = await audioModel.transcribe(
395
  audioToSend.audioData,
396
  audioToSend.sampleRate,
@@ -399,28 +423,34 @@ async function generate(userMessage) {
399
  pendingAudio = null;
400
 
401
  } else if (currentMode === 'tts') {
 
402
  const result = await audioModel.generateSpeech(userMessage, {
403
  onToken: onTokenCallback,
404
  onAudioFrame: (frame, count) => {
405
- if (count % 10 === 0) {
406
- textEl.textContent = `Generating audio... (${count} frames)`;
407
- }
408
  },
409
  });
410
 
411
  if (result.audioCodes && result.audioCodes.length > 0) {
412
- textEl.textContent = 'Decoding audio...';
413
  const waveform = await audioModel.decodeAudioCodes(result.audioCodes);
414
 
415
  if (waveform.length > 0) {
 
 
 
 
 
416
  const wavBlob = createWavBlob(waveform, 24000);
417
  const audioUrl = URL.createObjectURL(wavBlob);
418
  const audioEl = document.createElement('audio');
419
  audioEl.src = audioUrl;
420
  audioEl.controls = true;
421
- msgEl.appendChild(audioEl);
422
-
423
- generatedText = result.textOutput || `[Generated ${result.audioCodes.length} audio frames (${(waveform.length / 24000).toFixed(2)}s)]`;
424
  } else {
425
  generatedText = '[Audio decoding failed - no waveform generated]';
426
  }
@@ -429,6 +459,7 @@ async function generate(userMessage) {
429
  }
430
 
431
  } else if (currentMode === 'interleaved' && audioToSend) {
 
432
  const result = await audioModel.generateInterleaved(
433
  audioToSend.audioData,
434
  audioToSend.sampleRate,
@@ -439,11 +470,13 @@ async function generate(userMessage) {
439
  tokenCount = text.length;
440
  textEl.textContent = text;
441
  chatContainer.scrollTop = chatContainer.scrollHeight;
 
 
442
  },
443
  onAudioFrame: (frame, count) => {
444
- if (count % 20 === 0) {
445
- setStatus(`Generating audio... (${count} frames)`);
446
- }
447
  },
448
  }
449
  );
@@ -453,24 +486,30 @@ async function generate(userMessage) {
453
  textEl.textContent = generatedText;
454
 
455
  if (result.audioCodes && result.audioCodes.length > 0) {
456
- setStatus('Decoding audio...');
457
  const waveform = await audioModel.decodeAudioCodes(result.audioCodes);
458
 
459
  if (waveform.length > 0) {
 
 
 
 
 
 
 
460
  const wavBlob = createWavBlob(waveform, 24000);
461
  const audioUrl = URL.createObjectURL(wavBlob);
462
  const audioEl = document.createElement('audio');
463
  audioEl.src = audioUrl;
464
  audioEl.controls = true;
465
- msgEl.appendChild(audioEl);
466
-
467
- if (!generatedText) {
468
- generatedText = `[Generated ${result.audioCodes.length} audio frames]`;
469
- }
470
  }
471
  }
472
 
473
  } else {
 
474
  generatedText = await audioModel.generate(messages, {
475
  maxNewTokens: 256,
476
  onToken: onTokenCallback,
@@ -500,6 +539,7 @@ async function generate(userMessage) {
500
  messages.pop();
501
  setStatus(`Error: ${error.message}`, 'error');
502
  } finally {
 
503
  isGenerating = false;
504
  setReady(true);
505
  userInput.focus();
 
43
  const clearCacheBtn = document.getElementById('clearCacheBtn');
44
  const cacheInfoEl = document.getElementById('cacheInfo');
45
  const dropOverlay = document.getElementById('dropOverlay');
46
+ const spinner = document.getElementById('spinner');
47
+ const spinnerText = document.getElementById('spinnerText');
48
+ const spinnerStats = document.getElementById('spinnerStats');
49
 
50
  // State
51
  let audioModel = null;
 
134
  progressText.textContent = text || `${percent}%`;
135
  }
136
 
137
+ function showSpinner(text, stats = '') {
138
+ spinner.classList.add('active');
139
+ spinnerText.textContent = text;
140
+ spinnerStats.textContent = stats;
141
+ }
142
+
143
+ function updateSpinner(text, stats = '') {
144
+ if (text) spinnerText.textContent = text;
145
+ spinnerStats.textContent = stats;
146
+ }
147
+
148
+ function hideSpinner() {
149
+ spinner.classList.remove('active');
150
+ spinnerText.textContent = '';
151
+ spinnerStats.textContent = '';
152
+ }
153
+
154
  function addMessage(role, content, isStreaming = false, audio = null) {
155
  const msgEl = document.createElement('div');
156
  msgEl.className = `message ${role}${isStreaming ? ' generating' : ''}`;
 
395
  let generatedText = '';
396
  const startTime = performance.now();
397
  let tokenCount = 0;
398
+ let audioFrameCount = 0;
399
 
400
  try {
401
  const currentMode = audioModeSelect?.value || 'interleaved';
 
408
  tokenCount++;
409
  textEl.textContent = generatedText;
410
  chatContainer.scrollTop = chatContainer.scrollHeight;
411
+ const elapsed = ((performance.now() - startTime) / 1000).toFixed(1);
412
+ updateSpinner(null, `${tokenCount} tokens · ${elapsed}s`);
413
  return false;
414
  };
415
 
416
  if (currentMode === 'asr' && audioToSend) {
417
+ showSpinner('Transcribing audio...');
418
  generatedText = await audioModel.transcribe(
419
  audioToSend.audioData,
420
  audioToSend.sampleRate,
 
423
  pendingAudio = null;
424
 
425
  } else if (currentMode === 'tts') {
426
+ showSpinner('Generating speech...');
427
  const result = await audioModel.generateSpeech(userMessage, {
428
  onToken: onTokenCallback,
429
  onAudioFrame: (frame, count) => {
430
+ audioFrameCount = count;
431
+ const elapsed = ((performance.now() - startTime) / 1000).toFixed(1);
432
+ updateSpinner('Generating audio...', `${count} frames · ${elapsed}s`);
433
  },
434
  });
435
 
436
  if (result.audioCodes && result.audioCodes.length > 0) {
437
+ updateSpinner('Decoding audio...', `${result.audioCodes.length} frames`);
438
  const waveform = await audioModel.decodeAudioCodes(result.audioCodes);
439
 
440
  if (waveform.length > 0) {
441
+ generatedText = result.textOutput || `Generated ${result.audioCodes.length} audio frames (${(waveform.length / 24000).toFixed(2)}s)`;
442
+
443
+ // Create separate audio message block
444
+ const audioMsgEl = document.createElement('div');
445
+ audioMsgEl.className = 'message assistant';
446
  const wavBlob = createWavBlob(waveform, 24000);
447
  const audioUrl = URL.createObjectURL(wavBlob);
448
  const audioEl = document.createElement('audio');
449
  audioEl.src = audioUrl;
450
  audioEl.controls = true;
451
+ audioMsgEl.appendChild(audioEl);
452
+ chatContainer.appendChild(audioMsgEl);
453
+ chatContainer.scrollTop = chatContainer.scrollHeight;
454
  } else {
455
  generatedText = '[Audio decoding failed - no waveform generated]';
456
  }
 
459
  }
460
 
461
  } else if (currentMode === 'interleaved' && audioToSend) {
462
+ showSpinner('Processing audio...');
463
  const result = await audioModel.generateInterleaved(
464
  audioToSend.audioData,
465
  audioToSend.sampleRate,
 
470
  tokenCount = text.length;
471
  textEl.textContent = text;
472
  chatContainer.scrollTop = chatContainer.scrollHeight;
473
+ const elapsed = ((performance.now() - startTime) / 1000).toFixed(1);
474
+ updateSpinner('Generating text...', `${tokenCount} chars · ${elapsed}s`);
475
  },
476
  onAudioFrame: (frame, count) => {
477
+ audioFrameCount = count;
478
+ const elapsed = ((performance.now() - startTime) / 1000).toFixed(1);
479
+ updateSpinner('Generating audio...', `${count} frames · ${elapsed}s`);
480
  },
481
  }
482
  );
 
486
  textEl.textContent = generatedText;
487
 
488
  if (result.audioCodes && result.audioCodes.length > 0) {
489
+ updateSpinner('Decoding audio...', `${result.audioCodes.length} frames`);
490
  const waveform = await audioModel.decodeAudioCodes(result.audioCodes);
491
 
492
  if (waveform.length > 0) {
493
+ if (!generatedText) {
494
+ generatedText = `Generated ${result.audioCodes.length} audio frames`;
495
+ }
496
+
497
+ // Create separate audio message block
498
+ const audioMsgEl = document.createElement('div');
499
+ audioMsgEl.className = 'message assistant';
500
  const wavBlob = createWavBlob(waveform, 24000);
501
  const audioUrl = URL.createObjectURL(wavBlob);
502
  const audioEl = document.createElement('audio');
503
  audioEl.src = audioUrl;
504
  audioEl.controls = true;
505
+ audioMsgEl.appendChild(audioEl);
506
+ chatContainer.appendChild(audioMsgEl);
507
+ chatContainer.scrollTop = chatContainer.scrollHeight;
 
 
508
  }
509
  }
510
 
511
  } else {
512
+ showSpinner('Generating response...');
513
  generatedText = await audioModel.generate(messages, {
514
  maxNewTokens: 256,
515
  onToken: onTokenCallback,
 
539
  messages.pop();
540
  setStatus(`Error: ${error.message}`, 'error');
541
  } finally {
542
+ hideSpinner();
543
  isGenerating = false;
544
  setReady(true);
545
  userInput.focus();