csukuangfj commited on
Commit
cdfc5ab
·
1 Parent(s): 09c3692

update model

Browse files
sherpa-onnx-tts.js CHANGED
@@ -20,6 +20,10 @@ function freeConfig(config, Module) {
20
  freeConfig(config.kitten, Module)
21
  }
22
 
 
 
 
 
23
  Module._free(config.ptr);
24
  }
25
 
@@ -29,7 +33,8 @@ function initSherpaOnnxOfflineTtsVitsModelConfig(config, Module) {
29
  const lexiconLen = Module.lengthBytesUTF8(config.lexicon || '') + 1;
30
  const tokensLen = Module.lengthBytesUTF8(config.tokens || '') + 1;
31
  const dataDirLen = Module.lengthBytesUTF8(config.dataDir || '') + 1;
32
- const dictDirLen = Module.lengthBytesUTF8(config.dictDir || '') + 1;
 
33
 
34
  const n = modelLen + lexiconLen + tokensLen + dataDirLen + dictDirLen;
35
 
@@ -51,7 +56,7 @@ function initSherpaOnnxOfflineTtsVitsModelConfig(config, Module) {
51
  Module.stringToUTF8(config.dataDir || '', buffer + offset, dataDirLen);
52
  offset += dataDirLen;
53
 
54
- Module.stringToUTF8(config.dictDir || '', buffer + offset, dictDirLen);
55
  offset += dictDirLen;
56
 
57
  offset = 0;
@@ -84,7 +89,9 @@ function initSherpaOnnxOfflineTtsMatchaModelConfig(config, Module) {
84
  const lexiconLen = Module.lengthBytesUTF8(config.lexicon || '') + 1;
85
  const tokensLen = Module.lengthBytesUTF8(config.tokens || '') + 1;
86
  const dataDirLen = Module.lengthBytesUTF8(config.dataDir || '') + 1;
87
- const dictDirLen = Module.lengthBytesUTF8(config.dictDir || '') + 1;
 
 
88
 
89
  const n = acousticModelLen + vocoderLen + lexiconLen + tokensLen +
90
  dataDirLen + dictDirLen;
@@ -111,7 +118,7 @@ function initSherpaOnnxOfflineTtsMatchaModelConfig(config, Module) {
111
  Module.stringToUTF8(config.dataDir || '', buffer + offset, dataDirLen);
112
  offset += dataDirLen;
113
 
114
- Module.stringToUTF8(config.dictDir || '', buffer + offset, dictDirLen);
115
  offset += dictDirLen;
116
 
117
  offset = 0;
@@ -145,7 +152,8 @@ function initSherpaOnnxOfflineTtsKokoroModelConfig(config, Module) {
145
  const voicesLen = Module.lengthBytesUTF8(config.voices) + 1;
146
  const tokensLen = Module.lengthBytesUTF8(config.tokens || '') + 1;
147
  const dataDirLen = Module.lengthBytesUTF8(config.dataDir || '') + 1;
148
- const dictDirLen = Module.lengthBytesUTF8(config.dictDir || '') + 1;
 
149
  const lexiconLen = Module.lengthBytesUTF8(config.lexicon || '') + 1;
150
  const langLen = Module.lengthBytesUTF8(config.lang || '') + 1;
151
 
@@ -170,7 +178,7 @@ function initSherpaOnnxOfflineTtsKokoroModelConfig(config, Module) {
170
  Module.stringToUTF8(config.dataDir || '', buffer + offset, dataDirLen);
171
  offset += dataDirLen;
172
 
173
- Module.stringToUTF8(config.dictDir || '', buffer + offset, dictDirLen);
174
  offset += dictDirLen;
175
 
176
  Module.stringToUTF8(config.lexicon || '', buffer + offset, lexiconLen);
@@ -254,6 +262,72 @@ function initSherpaOnnxOfflineTtsKittenModelConfig(config, Module) {
254
  }
255
  }
256
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
257
  function initSherpaOnnxOfflineTtsModelConfig(config, Module) {
258
  if (!('offlineTtsVitsModelConfig' in config)) {
259
  config.offlineTtsVitsModelConfig = {
@@ -264,7 +338,6 @@ function initSherpaOnnxOfflineTtsModelConfig(config, Module) {
264
  noiseScaleW: 0.8,
265
  lengthScale: 1.0,
266
  dataDir: '',
267
- dictDir: '',
268
  };
269
  }
270
 
@@ -277,7 +350,6 @@ function initSherpaOnnxOfflineTtsModelConfig(config, Module) {
277
  noiseScale: 0.667,
278
  lengthScale: 1.0,
279
  dataDir: '',
280
- dictDir: '',
281
  };
282
  }
283
 
@@ -288,7 +360,6 @@ function initSherpaOnnxOfflineTtsModelConfig(config, Module) {
288
  tokens: '',
289
  lengthScale: 1.0,
290
  dataDir: '',
291
- dictDir: '',
292
  lexicon: '',
293
  lang: '',
294
  };
@@ -303,6 +374,21 @@ function initSherpaOnnxOfflineTtsModelConfig(config, Module) {
303
  };
304
  }
305
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
306
 
307
  const vitsModelConfig = initSherpaOnnxOfflineTtsVitsModelConfig(
308
  config.offlineTtsVitsModelConfig, Module);
@@ -316,8 +402,12 @@ function initSherpaOnnxOfflineTtsModelConfig(config, Module) {
316
  const kittenModelConfig = initSherpaOnnxOfflineTtsKittenModelConfig(
317
  config.offlineTtsKittenModelConfig, Module);
318
 
 
 
 
319
  const len = vitsModelConfig.len + matchaModelConfig.len +
320
- kokoroModelConfig.len + kittenModelConfig.len + 3 * 4;
 
321
 
322
  const ptr = Module._malloc(len);
323
 
@@ -346,10 +436,14 @@ function initSherpaOnnxOfflineTtsModelConfig(config, Module) {
346
  Module._CopyHeap(kittenModelConfig.ptr, kittenModelConfig.len, ptr + offset);
347
  offset += kittenModelConfig.len;
348
 
 
 
 
 
349
  return {
350
  buffer: buffer, ptr: ptr, len: len, config: vitsModelConfig,
351
  matcha: matchaModelConfig, kokoro: kokoroModelConfig,
352
- kitten: kittenModelConfig,
353
  }
354
  }
355
 
@@ -454,7 +548,6 @@ function createOfflineTts(Module, myConfig) {
454
  lexicon: '',
455
  tokens: './tokens.txt',
456
  dataDir: './espeak-ng-data',
457
- dictDir: '',
458
  noiseScale: 0.667,
459
  noiseScaleW: 0.8,
460
  lengthScale: 1.0,
@@ -466,7 +559,6 @@ function createOfflineTts(Module, myConfig) {
466
  lexicon: '',
467
  tokens: '',
468
  dataDir: '',
469
- dictDir: '',
470
  noiseScale: 0.667,
471
  lengthScale: 1.0,
472
  };
@@ -477,7 +569,6 @@ function createOfflineTts(Module, myConfig) {
477
  tokens: '',
478
  dataDir: '',
479
  lengthScale: 1.0,
480
- dictDir: '',
481
  lexicon: '',
482
  lang: '',
483
  };
 
20
  freeConfig(config.kitten, Module)
21
  }
22
 
23
+ if ('zipvoice' in config) {
24
+ freeConfig(config.zipvoice, Module)
25
+ }
26
+
27
  Module._free(config.ptr);
28
  }
29
 
 
33
  const lexiconLen = Module.lengthBytesUTF8(config.lexicon || '') + 1;
34
  const tokensLen = Module.lengthBytesUTF8(config.tokens || '') + 1;
35
  const dataDirLen = Module.lengthBytesUTF8(config.dataDir || '') + 1;
36
+ const dictDir = ''
37
+ const dictDirLen = Module.lengthBytesUTF8(dictDir) + 1;
38
 
39
  const n = modelLen + lexiconLen + tokensLen + dataDirLen + dictDirLen;
40
 
 
56
  Module.stringToUTF8(config.dataDir || '', buffer + offset, dataDirLen);
57
  offset += dataDirLen;
58
 
59
+ Module.stringToUTF8(dictDir, buffer + offset, dictDirLen);
60
  offset += dictDirLen;
61
 
62
  offset = 0;
 
89
  const lexiconLen = Module.lengthBytesUTF8(config.lexicon || '') + 1;
90
  const tokensLen = Module.lengthBytesUTF8(config.tokens || '') + 1;
91
  const dataDirLen = Module.lengthBytesUTF8(config.dataDir || '') + 1;
92
+
93
+ const dictDir = '';
94
+ const dictDirLen = Module.lengthBytesUTF8(dictDir) + 1;
95
 
96
  const n = acousticModelLen + vocoderLen + lexiconLen + tokensLen +
97
  dataDirLen + dictDirLen;
 
118
  Module.stringToUTF8(config.dataDir || '', buffer + offset, dataDirLen);
119
  offset += dataDirLen;
120
 
121
+ Module.stringToUTF8(dictDir, buffer + offset, dictDirLen);
122
  offset += dictDirLen;
123
 
124
  offset = 0;
 
152
  const voicesLen = Module.lengthBytesUTF8(config.voices) + 1;
153
  const tokensLen = Module.lengthBytesUTF8(config.tokens || '') + 1;
154
  const dataDirLen = Module.lengthBytesUTF8(config.dataDir || '') + 1;
155
+ const dictDir = '';
156
+ const dictDirLen = Module.lengthBytesUTF8(dictDir) + 1;
157
  const lexiconLen = Module.lengthBytesUTF8(config.lexicon || '') + 1;
158
  const langLen = Module.lengthBytesUTF8(config.lang || '') + 1;
159
 
 
178
  Module.stringToUTF8(config.dataDir || '', buffer + offset, dataDirLen);
179
  offset += dataDirLen;
180
 
181
+ Module.stringToUTF8(dictDir, buffer + offset, dictDirLen);
182
  offset += dictDirLen;
183
 
184
  Module.stringToUTF8(config.lexicon || '', buffer + offset, lexiconLen);
 
262
  }
263
  }
264
 
265
+ function initSherpaOnnxOfflineTtsZipVoiceModelConfig(config, Module) {
266
+ const tokensLen = Module.lengthBytesUTF8(config.tokens || '') + 1;
267
+ const textModelLen = Module.lengthBytesUTF8(config.textModel || '') + 1;
268
+ const flowMatchingModelLen =
269
+ Module.lengthBytesUTF8(config.flowMatchingModel || '') + 1;
270
+ const vocoderLen = Module.lengthBytesUTF8(config.vocoder || '') + 1;
271
+ const dataDirLen = Module.lengthBytesUTF8(config.dataDir || '') + 1;
272
+ const pinyinDictLen = Module.lengthBytesUTF8(config.pinyinDict || '') + 1;
273
+
274
+ const n = tokensLen + textModelLen + flowMatchingModelLen + vocoderLen +
275
+ dataDirLen + pinyinDictLen;
276
+
277
+ const buffer = Module._malloc(n);
278
+
279
+ const len = 10 * 4;
280
+ const ptr = Module._malloc(len);
281
+
282
+ let offset = 0;
283
+ Module.stringToUTF8(config.tokens || '', buffer + offset, tokensLen);
284
+ offset += tokensLen;
285
+
286
+ Module.stringToUTF8(config.textModel || '', buffer + offset, textModelLen);
287
+ offset += textModelLen;
288
+
289
+ Module.stringToUTF8(
290
+ config.flowMatchingModel || '', buffer + offset, flowMatchingModelLen);
291
+ offset += flowMatchingModelLen;
292
+
293
+ Module.stringToUTF8(config.vocoder || '', buffer + offset, vocoderLen);
294
+ offset += vocoderLen;
295
+
296
+ Module.stringToUTF8(config.dataDir || '', buffer + offset, dataDirLen);
297
+ offset += dataDirLen;
298
+
299
+ Module.stringToUTF8(config.pinyinDict || '', buffer + offset, pinyinDictLen);
300
+ offset += pinyinDictLen;
301
+
302
+ offset = 0;
303
+ Module.setValue(ptr, buffer + offset, 'i8*');
304
+ offset += tokensLen;
305
+
306
+ Module.setValue(ptr + 4, buffer + offset, 'i8*');
307
+ offset += textModelLen;
308
+
309
+ Module.setValue(ptr + 8, buffer + offset, 'i8*');
310
+ offset += flowMatchingModelLen;
311
+
312
+ Module.setValue(ptr + 12, buffer + offset, 'i8*');
313
+ offset += vocoderLen;
314
+
315
+ Module.setValue(ptr + 16, buffer + offset, 'i8*');
316
+ offset += dataDirLen;
317
+
318
+ Module.setValue(ptr + 20, buffer + offset, 'i8*');
319
+ offset += pinyinDictLen;
320
+
321
+ Module.setValue(ptr + 24, config.featScale || 0.1, 'float');
322
+ Module.setValue(ptr + 28, config.tShift || 0.5, 'float');
323
+ Module.setValue(ptr + 32, config.targetRMS || 0.1, 'float');
324
+ Module.setValue(ptr + 36, config.guidanceScale || 1.0, 'float');
325
+
326
+ return {
327
+ buffer: buffer, ptr: ptr, len: len,
328
+ }
329
+ }
330
+
331
  function initSherpaOnnxOfflineTtsModelConfig(config, Module) {
332
  if (!('offlineTtsVitsModelConfig' in config)) {
333
  config.offlineTtsVitsModelConfig = {
 
338
  noiseScaleW: 0.8,
339
  lengthScale: 1.0,
340
  dataDir: '',
 
341
  };
342
  }
343
 
 
350
  noiseScale: 0.667,
351
  lengthScale: 1.0,
352
  dataDir: '',
 
353
  };
354
  }
355
 
 
360
  tokens: '',
361
  lengthScale: 1.0,
362
  dataDir: '',
 
363
  lexicon: '',
364
  lang: '',
365
  };
 
374
  };
375
  }
376
 
377
+ if (!('offlineTtsZipVoiceModelConfig' in config)) {
378
+ config.offlineTtsZipVoiceModelConfig = {
379
+ tokens: '',
380
+ textModel: '',
381
+ flowMatchingModel: '',
382
+ vocoder: '',
383
+ dataDir: '',
384
+ pinyinDict: '',
385
+ featScale: 0.1,
386
+ tShift: 0.5,
387
+ targetRMS: 0.1,
388
+ guidanceScale: 1.0,
389
+ };
390
+ }
391
+
392
 
393
  const vitsModelConfig = initSherpaOnnxOfflineTtsVitsModelConfig(
394
  config.offlineTtsVitsModelConfig, Module);
 
402
  const kittenModelConfig = initSherpaOnnxOfflineTtsKittenModelConfig(
403
  config.offlineTtsKittenModelConfig, Module);
404
 
405
+ const zipVoiceModelConfig = initSherpaOnnxOfflineTtsZipVoiceModelConfig(
406
+ config.offlineTtsZipVoiceModelConfig, Module);
407
+
408
  const len = vitsModelConfig.len + matchaModelConfig.len +
409
+ kokoroModelConfig.len + kittenModelConfig.len + zipVoiceModelConfig.len +
410
+ 3 * 4;
411
 
412
  const ptr = Module._malloc(len);
413
 
 
436
  Module._CopyHeap(kittenModelConfig.ptr, kittenModelConfig.len, ptr + offset);
437
  offset += kittenModelConfig.len;
438
 
439
+ Module._CopyHeap(
440
+ zipVoiceModelConfig.ptr, zipVoiceModelConfig.len, ptr + offset);
441
+ offset += zipVoiceModelConfig.len;
442
+
443
  return {
444
  buffer: buffer, ptr: ptr, len: len, config: vitsModelConfig,
445
  matcha: matchaModelConfig, kokoro: kokoroModelConfig,
446
+ kitten: kittenModelConfig, zipvoice: zipVoiceModelConfig,
447
  }
448
  }
449
 
 
548
  lexicon: '',
549
  tokens: './tokens.txt',
550
  dataDir: './espeak-ng-data',
 
551
  noiseScale: 0.667,
552
  noiseScaleW: 0.8,
553
  lengthScale: 1.0,
 
559
  lexicon: '',
560
  tokens: '',
561
  dataDir: '',
 
562
  noiseScale: 0.667,
563
  lengthScale: 1.0,
564
  };
 
569
  tokens: '',
570
  dataDir: '',
571
  lengthScale: 1.0,
 
572
  lexicon: '',
573
  lang: '',
574
  };
sherpa-onnx-wasm-main-tts.js CHANGED
The diff for this file is too large to render. See raw diff
 
sherpa-onnx-wasm-main-tts.wasm CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c72c89eaf0c6a4c6e022de52cb26de629a1b31220c8ba6869307a2665e3c0596
3
- size 11923804
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8b0c97d0915da2cdfbc809255cff0f3fc6a655fb51aa3897210d72c769b4515
3
+ size 11875129