Rekey commited on
Commit
e2ef75a
·
verified ·
1 Parent(s): 0f2bff9

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +1017 -0
app.py ADDED
@@ -0,0 +1,1017 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import subprocess, torch, os, traceback, sys, warnings, shutil, numpy as np
2
+ from mega import Mega
3
+ os.environ["no_proxy"] = "localhost, 127.0.0.1, ::1"
4
+ import threading
5
+ from time import sleep
6
+ from subprocess import Popen
7
+ import faiss
8
+ from random import shuffle
9
+ import json, datetime, requests
10
+ from gtts import gTTS
11
+ now_dir = os.getcwd()
12
+ sys.path.append(now_dir)
13
+ tmp = os.path.join(now_dir, "TEMP")
14
+ shutil.rmtree(tmp, ignore_errors=True)
15
+ shutil.rmtree("%s/runtime/Lib/site-packages/infer_pack" % (now_dir), ignore_errors=True)
16
+ os.makedirs(tmp, exist_ok=True)
17
+ os.makedirs(os.path.join(now_dir, "logs"), exist_ok=True)
18
+ os.makedirs(os.path.join(now_dir, "weights"), exist_ok=True)
19
+ os.environ["TEMP"] = tmp
20
+ warnings.filterwarnings("ignore")
21
+ torch.manual_seed(114514)
22
+ from i18n import I18nAuto
23
+
24
+ import signal
25
+
26
+ import math
27
+
28
+ from utils import load_audio, CSVutil
29
+
30
+ global DoFormant, Quefrency, Timbre
31
+
32
+ if not os.path.isdir('csvdb/'):
33
+ os.makedirs('csvdb')
34
+ frmnt, stp = open("csvdb/formanting.csv", 'w'), open("csvdb/stop.csv", 'w')
35
+ frmnt.close()
36
+ stp.close()
37
+
38
+ try:
39
+ DoFormant, Quefrency, Timbre = CSVutil('csvdb/formanting.csv', 'r', 'formanting')
40
+ DoFormant = (
41
+ lambda DoFormant: True if DoFormant.lower() == 'true' else (False if DoFormant.lower() == 'false' else DoFormant)
42
+ )(DoFormant)
43
+ except (ValueError, TypeError, IndexError):
44
+ DoFormant, Quefrency, Timbre = False, 1.0, 1.0
45
+ CSVutil('csvdb/formanting.csv', 'w+', 'formanting', DoFormant, Quefrency, Timbre)
46
+
47
+ def download_models():
48
+ # Download hubert base model if not present
49
+ if not os.path.isfile('./hubert_base.pt'):
50
+ response = requests.get('https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/hubert_base.pt')
51
+
52
+ if response.status_code == 200:
53
+ with open('./hubert_base.pt', 'wb') as f:
54
+ f.write(response.content)
55
+ print("Downloaded hubert base model file successfully. File saved to ./hubert_base.pt.")
56
+ else:
57
+ raise Exception("Failed to download hubert base model file. Status code: " + str(response.status_code) + ".")
58
+
59
+ # Download rmvpe model if not present
60
+ if not os.path.isfile('./rmvpe.pt'):
61
+ response = requests.get('https://drive.usercontent.google.com/download?id=1Hkn4kNuVFRCNQwyxQFRtmzmMBGpQxptI&export=download&authuser=0&confirm=t&uuid=0b3a40de-465b-4c65-8c41-135b0b45c3f7&at=APZUnTV3lA3LnyTbeuduura6Dmi2:1693724254058')
62
+
63
+ if response.status_code == 200:
64
+ with open('./rmvpe.pt', 'wb') as f:
65
+ f.write(response.content)
66
+ print("Downloaded rmvpe model file successfully. File saved to ./rmvpe.pt.")
67
+ else:
68
+ raise Exception("Failed to download rmvpe model file. Status code: " + str(response.status_code) + ".")
69
+
70
+ download_models()
71
+
72
+ print("\n-------------------------------\nRVC v2 Easy GUI (Local Edition)\n-------------------------------\n")
73
+
74
+ i18n = I18nAuto()
75
+ #i18n.print()
76
+ # 判断是否有能用来训练和加速推理的N卡
77
+ ngpu = torch.cuda.device_count()
78
+ gpu_infos = []
79
+ mem = []
80
+ if (not torch.cuda.is_available()) or ngpu == 0:
81
+ if_gpu_ok = False
82
+ else:
83
+ if_gpu_ok = False
84
+ for i in range(ngpu):
85
+ gpu_name = torch.cuda.get_device_name(i)
86
+ if (
87
+ "10" in gpu_name
88
+ or "16" in gpu_name
89
+ or "20" in gpu_name
90
+ or "30" in gpu_name
91
+ or "40" in gpu_name
92
+ or "A2" in gpu_name.upper()
93
+ or "A3" in gpu_name.upper()
94
+ or "A4" in gpu_name.upper()
95
+ or "P4" in gpu_name.upper()
96
+ or "A50" in gpu_name.upper()
97
+ or "A60" in gpu_name.upper()
98
+ or "70" in gpu_name
99
+ or "80" in gpu_name
100
+ or "90" in gpu_name
101
+ or "M4" in gpu_name.upper()
102
+ or "T4" in gpu_name.upper()
103
+ or "TITAN" in gpu_name.upper()
104
+ ): # A10#A100#V100#A40#P40#M40#K80#A4500
105
+ if_gpu_ok = True # 至少有一张能用的N卡
106
+ gpu_infos.append("%s\t%s" % (i, gpu_name))
107
+ mem.append(
108
+ int(
109
+ torch.cuda.get_device_properties(i).total_memory
110
+ / 1024
111
+ / 1024
112
+ / 1024
113
+ + 0.4
114
+ )
115
+ )
116
+ if if_gpu_ok == True and len(gpu_infos) > 0:
117
+ gpu_info = "\n".join(gpu_infos)
118
+ default_batch_size = min(mem) // 2
119
+ else:
120
+ gpu_info = i18n("很遗憾您这没有能用的显卡来支持您训练")
121
+ default_batch_size = 1
122
+ gpus = "-".join([i[0] for i in gpu_infos])
123
+ from lib.infer_pack.models import (
124
+ SynthesizerTrnMs256NSFsid,
125
+ SynthesizerTrnMs256NSFsid_nono,
126
+ SynthesizerTrnMs768NSFsid,
127
+ SynthesizerTrnMs768NSFsid_nono,
128
+ )
129
+ import soundfile as sf
130
+ from fairseq import checkpoint_utils
131
+ import gradio as gr
132
+ import logging
133
+ from vc_infer_pipeline import VC
134
+ from config import Config
135
+
136
+ config = Config()
137
+ # from trainset_preprocess_pipeline import PreProcess
138
+ logging.getLogger("numba").setLevel(logging.WARNING)
139
+
140
+ hubert_model = None
141
+
142
+ def load_hubert():
143
+ global hubert_model
144
+ models, _, _ = checkpoint_utils.load_model_ensemble_and_task(
145
+ ["hubert_base.pt"],
146
+ suffix="",
147
+ )
148
+ hubert_model = models[0]
149
+ hubert_model = hubert_model.to(config.device)
150
+ if config.is_half:
151
+ hubert_model = hubert_model.half()
152
+ else:
153
+ hubert_model = hubert_model.float()
154
+ hubert_model.eval()
155
+
156
+
157
+ weight_root = "weights"
158
+ index_root = "logs"
159
+ names = []
160
+ for name in os.listdir(weight_root):
161
+ if name.endswith(".pth"):
162
+ names.append(name)
163
+ index_paths = []
164
+ for root, dirs, files in os.walk(index_root, topdown=False):
165
+ for name in files:
166
+ if name.endswith(".index") and "trained" not in name:
167
+ index_paths.append("%s/%s" % (root, name))
168
+
169
+
170
+
171
+ def vc_single(
172
+ sid,
173
+ input_audio_path,
174
+ f0_up_key,
175
+ f0_file,
176
+ f0_method,
177
+ file_index,
178
+ #file_index2,
179
+ # file_big_npy,
180
+ index_rate,
181
+ filter_radius,
182
+ resample_sr,
183
+ rms_mix_rate,
184
+ protect,
185
+ crepe_hop_length,
186
+ ): # spk_item, input_audio0, vc_transform0,f0_file,f0method0
187
+ global tgt_sr, net_g, vc, hubert_model, version
188
+ if input_audio_path is None:
189
+ return "You need to upload an audio", None
190
+ f0_up_key = int(f0_up_key)
191
+ try:
192
+ audio = load_audio(input_audio_path, 16000, DoFormant, Quefrency, Timbre)
193
+ audio_max = np.abs(audio).max() / 0.95
194
+ if audio_max > 1:
195
+ audio /= audio_max
196
+ times = [0, 0, 0]
197
+ if hubert_model == None:
198
+ load_hubert()
199
+ if_f0 = cpt.get("f0", 1)
200
+ file_index = (
201
+ (
202
+ file_index.strip(" ")
203
+ .strip('"')
204
+ .strip("\n")
205
+ .strip('"')
206
+ .strip(" ")
207
+ .replace("trained", "added")
208
+ )
209
+ ) # 防止小白写错,自动帮他替换掉
210
+ # file_big_npy = (
211
+ # file_big_npy.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
212
+ # )
213
+ audio_opt = vc.pipeline(
214
+ hubert_model,
215
+ net_g,
216
+ sid,
217
+ audio,
218
+ input_audio_path,
219
+ times,
220
+ f0_up_key,
221
+ f0_method,
222
+ file_index,
223
+ # file_big_npy,
224
+ index_rate,
225
+ if_f0,
226
+ filter_radius,
227
+ tgt_sr,
228
+ resample_sr,
229
+ rms_mix_rate,
230
+ version,
231
+ protect,
232
+ crepe_hop_length,
233
+ f0_file=f0_file,
234
+ )
235
+ if resample_sr >= 16000 and tgt_sr != resample_sr:
236
+ tgt_sr = resample_sr
237
+ index_info = (
238
+ "Using index:%s." % file_index
239
+ if os.path.exists(file_index)
240
+ else "Index not used."
241
+ )
242
+ return "Success.\n %s\nTime:\n npy:%ss, f0:%ss, infer:%ss" % (
243
+ index_info,
244
+ times[0],
245
+ times[1],
246
+ times[2],
247
+ ), (tgt_sr, audio_opt)
248
+ except:
249
+ info = traceback.format_exc()
250
+ print(info)
251
+ return info, (None, None)
252
+
253
+
254
+ def vc_multi(
255
+ sid,
256
+ dir_path,
257
+ opt_root,
258
+ paths,
259
+ f0_up_key,
260
+ f0_method,
261
+ file_index,
262
+ file_index2,
263
+ # file_big_npy,
264
+ index_rate,
265
+ filter_radius,
266
+ resample_sr,
267
+ rms_mix_rate,
268
+ protect,
269
+ format1,
270
+ crepe_hop_length,
271
+ ):
272
+ try:
273
+ dir_path = (
274
+ dir_path.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
275
+ ) # 防止小白拷路径头尾带了空格和"和回车
276
+ opt_root = opt_root.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
277
+ os.makedirs(opt_root, exist_ok=True)
278
+ try:
279
+ if dir_path != "":
280
+ paths = [os.path.join(dir_path, name) for name in os.listdir(dir_path)]
281
+ else:
282
+ paths = [path.name for path in paths]
283
+ except:
284
+ traceback.print_exc()
285
+ paths = [path.name for path in paths]
286
+ infos = []
287
+ for path in paths:
288
+ info, opt = vc_single(
289
+ sid,
290
+ path,
291
+ f0_up_key,
292
+ None,
293
+ f0_method,
294
+ file_index,
295
+ # file_big_npy,
296
+ index_rate,
297
+ filter_radius,
298
+ resample_sr,
299
+ rms_mix_rate,
300
+ protect,
301
+ crepe_hop_length
302
+ )
303
+ if "Success" in info:
304
+ try:
305
+ tgt_sr, audio_opt = opt
306
+ if format1 in ["wav", "flac"]:
307
+ sf.write(
308
+ "%s/%s.%s" % (opt_root, os.path.basename(path), format1),
309
+ audio_opt,
310
+ tgt_sr,
311
+ )
312
+ else:
313
+ path = "%s/%s.wav" % (opt_root, os.path.basename(path))
314
+ sf.write(
315
+ path,
316
+ audio_opt,
317
+ tgt_sr,
318
+ )
319
+ if os.path.exists(path):
320
+ os.system(
321
+ "ffmpeg -i %s -vn %s -q:a 2 -y"
322
+ % (path, path[:-4] + ".%s" % format1)
323
+ )
324
+ except:
325
+ info += traceback.format_exc()
326
+ infos.append("%s->%s" % (os.path.basename(path), info))
327
+ yield "\n".join(infos)
328
+ yield "\n".join(infos)
329
+ except:
330
+ yield traceback.format_exc()
331
+
332
+ # 一个选项卡全局只能有一个音色
333
+ def get_vc(sid):
334
+ global n_spk, tgt_sr, net_g, vc, cpt, version
335
+ if sid == "" or sid == []:
336
+ global hubert_model
337
+ if hubert_model != None: # 考虑到轮询, 需要加个判断看是否 sid 是由有模型切换到无模型的
338
+ print("clean_empty_cache")
339
+ del net_g, n_spk, vc, hubert_model, tgt_sr # ,cpt
340
+ hubert_model = net_g = n_spk = vc = hubert_model = tgt_sr = None
341
+ if torch.cuda.is_available():
342
+ torch.cuda.empty_cache()
343
+ ###楼下不这么折腾清理不干净
344
+ if_f0 = cpt.get("f0", 1)
345
+ version = cpt.get("version", "v1")
346
+ if version == "v1":
347
+ if if_f0 == 1:
348
+ net_g = SynthesizerTrnMs256NSFsid(
349
+ *cpt["config"], is_half=config.is_half
350
+ )
351
+ else:
352
+ net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
353
+ elif version == "v2":
354
+ if if_f0 == 1:
355
+ net_g = SynthesizerTrnMs768NSFsid(
356
+ *cpt["config"], is_half=config.is_half
357
+ )
358
+ else:
359
+ net_g = SynthesizerTrnMs768NSFsid_nono(*cpt["config"])
360
+ del net_g, cpt
361
+ if torch.cuda.is_available():
362
+ torch.cuda.empty_cache()
363
+ cpt = None
364
+ return {"visible": False, "__type__": "update"}
365
+ person = "%s/%s" % (weight_root, sid)
366
+ print("loading %s" % person)
367
+ cpt = torch.load(person, map_location="cpu")
368
+ tgt_sr = cpt["config"][-1]
369
+ cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0] # n_spk
370
+ if_f0 = cpt.get("f0", 1)
371
+ version = cpt.get("version", "v1")
372
+ if version == "v1":
373
+ if if_f0 == 1:
374
+ net_g = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=config.is_half)
375
+ else:
376
+ net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
377
+ elif version == "v2":
378
+ if if_f0 == 1:
379
+ net_g = SynthesizerTrnMs768NSFsid(*cpt["config"], is_half=config.is_half)
380
+ else:
381
+ net_g = SynthesizerTrnMs768NSFsid_nono(*cpt["config"])
382
+ del net_g.enc_q
383
+ print(net_g.load_state_dict(cpt["weight"], strict=False))
384
+ net_g.eval().to(config.device)
385
+ if config.is_half:
386
+ net_g = net_g.half()
387
+ else:
388
+ net_g = net_g.float()
389
+ vc = VC(tgt_sr, config)
390
+ n_spk = cpt["config"][-3]
391
+ return {"visible": False, "maximum": n_spk, "__type__": "update"}
392
+
393
+
394
+ def change_choices():
395
+ names = []
396
+ for name in os.listdir(weight_root):
397
+ if name.endswith(".pth"):
398
+ names.append(name)
399
+ index_paths = []
400
+ for root, dirs, files in os.walk(index_root, topdown=False):
401
+ for name in files:
402
+ if name.endswith(".index") and "trained" not in name:
403
+ index_paths.append("%s/%s" % (root, name))
404
+ return {"choices": sorted(names), "__type__": "update"}, {
405
+ "choices": sorted(index_paths),
406
+ "__type__": "update",
407
+ }
408
+
409
+
410
+ def clean():
411
+ return {"value": "", "__type__": "update"}
412
+
413
+
414
+ sr_dict = {
415
+ "32k": 32000,
416
+ "40k": 40000,
417
+ "48k": 48000,
418
+ }
419
+
420
+
421
+ def if_done(done, p):
422
+ while 1:
423
+ if p.poll() == None:
424
+ sleep(0.5)
425
+ else:
426
+ break
427
+ done[0] = True
428
+
429
+
430
+ def if_done_multi(done, ps):
431
+ while 1:
432
+ # poll==None代表进程未结束
433
+ # 只要有一个进程未结束都不停
434
+ flag = 1
435
+ for p in ps:
436
+ if p.poll() == None:
437
+ flag = 0
438
+ sleep(0.5)
439
+ break
440
+ if flag == 1:
441
+ break
442
+ done[0] = True
443
+
444
+
445
+
446
+
447
+
448
+
449
+ # but5.click(train1key, [exp_dir1, sr2, if_f0_3, trainset_dir4, spk_id5, gpus6, np7, f0method8, save_epoch10, total_epoch11, batch_size12, if_save_latest13, pretrained_G14, pretrained_D15, gpus16, if_cache_gpu17], info3)
450
+
451
+
452
+ def whethercrepeornah(radio):
453
+ mango = True if radio == 'mangio-crepe' or radio == 'mangio-crepe-tiny' else False
454
+ return ({"visible": mango, "__type__": "update"})
455
+
456
+ # ckpt_path2.change(change_info_,[ckpt_path2],[sr__,if_f0__])
457
+
458
+
459
+ #region RVC WebUI App
460
+
461
+
462
+ def change_choices2():
463
+ audio_files=[]
464
+ for filename in os.listdir("./audios"):
465
+ if filename.endswith(('.wav','.mp3','.ogg','.flac','.m4a','.aac','.mp4')):
466
+ audio_files.append(os.path.join('./audios',filename).replace('\\', '/'))
467
+ return {"choices": sorted(audio_files), "__type__": "update"}, {"__type__": "update"}
468
+
469
+ audio_files=[]
470
+ for filename in os.listdir("./audios"):
471
+ if filename.endswith(('.wav','.mp3','.ogg','.flac','.m4a','.aac','.mp4')):
472
+ audio_files.append(os.path.join('./audios',filename).replace('\\', '/'))
473
+
474
+ def get_index():
475
+ if check_for_name() != '':
476
+ chosen_model=sorted(names)[0].split(".")[0]
477
+ logs_path="./logs/"+chosen_model
478
+ if os.path.exists(logs_path):
479
+ for file in os.listdir(logs_path):
480
+ if file.endswith(".index"):
481
+ return os.path.join(logs_path, file)
482
+ return ''
483
+ else:
484
+ return ''
485
+
486
+ def get_indexes():
487
+ indexes_list=[]
488
+ for dirpath, dirnames, filenames in os.walk("./logs/"):
489
+ for filename in filenames:
490
+ if filename.endswith(".index"):
491
+ indexes_list.append(os.path.join(dirpath,filename))
492
+ if len(indexes_list) > 0:
493
+ return indexes_list
494
+ else:
495
+ return ''
496
+
497
+ def get_name():
498
+ if len(audio_files) > 0:
499
+ return sorted(audio_files)[0]
500
+ else:
501
+ return ''
502
+
503
+ def save_to_wav(record_button):
504
+ if record_button is None:
505
+ pass
506
+ else:
507
+ path_to_file=record_button
508
+ new_name = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")+'.wav'
509
+ new_path='./audios/'+new_name
510
+ shutil.move(path_to_file,new_path)
511
+ return new_path
512
+
513
+ def save_to_wav2(dropbox):
514
+ file_path=dropbox.name
515
+ shutil.move(file_path,'./audios')
516
+ return os.path.join('./audios',os.path.basename(file_path))
517
+
518
+ def match_index(sid0):
519
+ folder=sid0.split(".")[0]
520
+ parent_dir="./logs/"+folder
521
+ if os.path.exists(parent_dir):
522
+ for filename in os.listdir(parent_dir):
523
+ if filename.endswith(".index"):
524
+ index_path=os.path.join(parent_dir,filename)
525
+ return index_path
526
+ else:
527
+ return ''
528
+
529
+ def check_for_name():
530
+ if len(names) > 0:
531
+ return sorted(names)[0]
532
+ else:
533
+ return ''
534
+
535
+ def download_from_url(url, model):
536
+ if url == '':
537
+ return "URL cannot be left empty."
538
+ if model =='':
539
+ return "You need to name your model. For example: My-Model"
540
+ url = url.strip()
541
+ zip_dirs = ["zips", "unzips"]
542
+ for directory in zip_dirs:
543
+ if os.path.exists(directory):
544
+ shutil.rmtree(directory)
545
+ os.makedirs("zips", exist_ok=True)
546
+ os.makedirs("unzips", exist_ok=True)
547
+ zipfile = model + '.zip'
548
+ zipfile_path = './zips/' + zipfile
549
+ try:
550
+ if "drive.google.com" in url:
551
+ subprocess.run(["gdown", url, "--fuzzy", "-O", zipfile_path])
552
+ elif "mega.nz" in url:
553
+ m = Mega()
554
+ m.download_url(url, './zips')
555
+ else:
556
+ subprocess.run(["wget", url, "-O", zipfile_path])
557
+ for filename in os.listdir("./zips"):
558
+ if filename.endswith(".zip"):
559
+ zipfile_path = os.path.join("./zips/",filename)
560
+ shutil.unpack_archive(zipfile_path, "./unzips", 'zip')
561
+ else:
562
+ return "No zipfile found."
563
+ for root, dirs, files in os.walk('./unzips'):
564
+ for file in files:
565
+ file_path = os.path.join(root, file)
566
+ if file.endswith(".index"):
567
+ os.mkdir(f'./logs/{model}')
568
+ shutil.copy2(file_path,f'./logs/{model}')
569
+ elif "G_" not in file and "D_" not in file and file.endswith(".pth"):
570
+ shutil.copy(file_path,f'./weights/{model}.pth')
571
+ shutil.rmtree("zips")
572
+ shutil.rmtree("unzips")
573
+ return "Success."
574
+ except:
575
+ return "There's been an error."
576
+ def success_message(face):
577
+ return f'{face.name} has been uploaded.', 'None'
578
+ def mouth(size, face, voice, faces):
579
+ if size == 'Half':
580
+ size = 2
581
+ else:
582
+ size = 1
583
+ if faces == 'None':
584
+ character = face.name
585
+ else:
586
+ if faces == 'Ben Shapiro':
587
+ character = '/content/wav2lip-HD/inputs/ben-shapiro-10.mp4'
588
+ elif faces == 'Andrew Tate':
589
+ character = '/content/wav2lip-HD/inputs/tate-7.mp4'
590
+ command = "python inference.py " \
591
+ "--checkpoint_path checkpoints/wav2lip.pth " \
592
+ f"--face {character} " \
593
+ f"--audio {voice} " \
594
+ "--pads 0 20 0 0 " \
595
+ "--outfile /content/wav2lip-HD/outputs/result.mp4 " \
596
+ "--fps 24 " \
597
+ f"--resize_factor {size}"
598
+ process = subprocess.Popen(command, shell=True, cwd='/content/wav2lip-HD/Wav2Lip-master')
599
+ stdout, stderr = process.communicate()
600
+ return '/content/wav2lip-HD/outputs/result.mp4', 'Animation completed.'
601
+ eleven_voices = ['Adam','Antoni','Josh','Arnold','Sam','Bella','Rachel','Domi','Elli']
602
+ eleven_voices_ids=['pNInz6obpgDQGcFmaJgB','ErXwobaYiN019PkySvjV','TxGEqnHWrfWFTfGW9XjX','VR6AewLTigWG4xSOukaG','yoZ06aMxZJJ28mfd3POQ','EXAVITQu4vr4xnSDxMaL','21m00Tcm4TlvDq8ikWAM','AZnzlk1XvdvUeBnXmlld','MF3mGyEYCl7XYWbV9V6O']
603
+ chosen_voice = dict(zip(eleven_voices, eleven_voices_ids))
604
+
605
+ def stoptraining(mim):
606
+ if int(mim) == 1:
607
+ try:
608
+ CSVutil('csvdb/stop.csv', 'w+', 'stop', 'True')
609
+ os.kill(PID, signal.SIGTERM)
610
+ except Exception as e:
611
+ print(f"Couldn't click due to {e}")
612
+ return (
613
+ {"visible": False, "__type__": "update"},
614
+ {"visible": True, "__type__": "update"},
615
+ )
616
+
617
+
618
+ def elevenTTS(xiapi, text, id, lang):
619
+ if xiapi!= '' and id !='':
620
+ choice = chosen_voice[id]
621
+ CHUNK_SIZE = 1024
622
+ url = f"https://api.elevenlabs.io/v1/text-to-speech/{choice}"
623
+ headers = {
624
+ "Accept": "audio/mpeg",
625
+ "Content-Type": "application/json",
626
+ "xi-api-key": xiapi
627
+ }
628
+ if lang == 'en':
629
+ data = {
630
+ "text": text,
631
+ "model_id": "eleven_monolingual_v1",
632
+ "voice_settings": {
633
+ "stability": 0.5,
634
+ "similarity_boost": 0.5
635
+ }
636
+ }
637
+ else:
638
+ data = {
639
+ "text": text,
640
+ "model_id": "eleven_multilingual_v1",
641
+ "voice_settings": {
642
+ "stability": 0.5,
643
+ "similarity_boost": 0.5
644
+ }
645
+ }
646
+
647
+ response = requests.post(url, json=data, headers=headers)
648
+ with open('./temp_eleven.mp3', 'wb') as f:
649
+ for chunk in response.iter_content(chunk_size=CHUNK_SIZE):
650
+ if chunk:
651
+ f.write(chunk)
652
+ aud_path = save_to_wav('./temp_eleven.mp3')
653
+ return aud_path, aud_path
654
+ else:
655
+ tts = gTTS(text, lang=lang)
656
+ tts.save('./temp_gTTS.mp3')
657
+ aud_path = save_to_wav('./temp_gTTS.mp3')
658
+ return aud_path, aud_path
659
+
660
+ def upload_to_dataset(files, dir):
661
+ if dir == '':
662
+ dir = './dataset'
663
+ if not os.path.exists(dir):
664
+ os.makedirs(dir)
665
+ count = 0
666
+ for file in files:
667
+ path=file.name
668
+ shutil.copy2(path,dir)
669
+ count += 1
670
+ return f' {count} files uploaded to {dir}.'
671
+
672
+ def zip_downloader(model):
673
+ if not os.path.exists(f'./weights/{model}.pth'):
674
+ return {"__type__": "update"}, f'Make sure the Voice Name is correct. I could not find {model}.pth'
675
+ index_found = False
676
+ for file in os.listdir(f'./logs/{model}'):
677
+ if file.endswith('.index') and 'added' in file:
678
+ log_file = file
679
+ index_found = True
680
+ if index_found:
681
+ return [f'./weights/{model}.pth', f'./logs/{model}/{log_file}'], "Done"
682
+ else:
683
+ return f'./weights/{model}.pth', "Could not find Index file."
684
+
685
+ with gr.Blocks(theme=gr.themes.Base(), title='Mangio-RVC-Web 💻') as app:
686
+ with gr.Tabs():
687
+ with gr.TabItem("Inference"):
688
+ gr.HTML("<h1> RVC V2 Huggingface Version </h1>")
689
+ gr.HTML("<h4> Inference may take time because this space does not use GPU :( </h4>")
690
+ gr.HTML("<h10> Huggingface version made by Rekey </h10>")
691
+ gr.HTML("<h10> Easy GUI coded by Rejekts </h10>")
692
+ gr.HTML("<h4> If you want to use this space privately, I recommend you duplicate the space. </h4>")
693
+
694
+ # Inference Preset Row
695
+ # with gr.Row():
696
+ # mangio_preset = gr.Dropdown(label="Inference Preset", choices=sorted(get_presets()))
697
+ # mangio_preset_name_save = gr.Textbox(
698
+ # label="Your preset name"
699
+ # )
700
+ # mangio_preset_save_btn = gr.Button('Save Preset', variant="primary")
701
+
702
+ # Other RVC stuff
703
+ with gr.Row():
704
+ sid0 = gr.Dropdown(label="1.Choose your Model.", choices=sorted(names), value=check_for_name())
705
+ refresh_button = gr.Button("Refresh", variant="primary")
706
+ if check_for_name() != '':
707
+ get_vc(sorted(names)[0])
708
+ vc_transform0 = gr.Number(label="Optional: You can change the pitch here or leave it at 0.", value=0)
709
+ #clean_button = gr.Button(i18n("卸载音色省显存"), variant="primary")
710
+ spk_item = gr.Slider(
711
+ minimum=0,
712
+ maximum=2333,
713
+ step=1,
714
+ label=i18n("请选择说话人id"),
715
+ value=0,
716
+ visible=False,
717
+ interactive=True,
718
+ )
719
+ #clean_button.click(fn=clean, inputs=[], outputs=[sid0])
720
+ sid0.change(
721
+ fn=get_vc,
722
+ inputs=[sid0],
723
+ outputs=[spk_item],
724
+ )
725
+ but0 = gr.Button("Convert", variant="primary")
726
+ with gr.Row():
727
+ with gr.Column():
728
+ with gr.Row():
729
+ dropbox = gr.File(label="Drop your audio here & hit the Reload button.")
730
+ with gr.Row():
731
+ record_button=gr.Audio(source="microphone", label="OR Record audio.", type="filepath")
732
+ with gr.Row():
733
+ input_audio0 = gr.Dropdown(
734
+ label="2.Choose your audio.",
735
+ value="./audios/someguy.mp3",
736
+ choices=audio_files
737
+ )
738
+ dropbox.upload(fn=save_to_wav2, inputs=[dropbox], outputs=[input_audio0])
739
+ dropbox.upload(fn=change_choices2, inputs=[], outputs=[input_audio0])
740
+ refresh_button2 = gr.Button("Refresh", variant="primary", size='sm')
741
+ record_button.change(fn=save_to_wav, inputs=[record_button], outputs=[input_audio0])
742
+ record_button.change(fn=change_choices2, inputs=[], outputs=[input_audio0])
743
+ with gr.Row():
744
+ with gr.Accordion('Text To Speech', open=False):
745
+ with gr.Column():
746
+ lang = gr.Radio(label='Chinese & Japanese do not work with ElevenLabs currently.',choices=['en','es','fr','pt','zh-CN','de','hi','ja'], value='en')
747
+ api_box = gr.Textbox(label="Enter your API Key for ElevenLabs, or leave empty to use GoogleTTS", value='')
748
+ elevenid=gr.Dropdown(label="Voice:", choices=eleven_voices)
749
+ with gr.Column():
750
+ tfs = gr.Textbox(label="Input your Text", interactive=True, value="This is a test.")
751
+ tts_button = gr.Button(value="Speak")
752
+ tts_button.click(fn=elevenTTS, inputs=[api_box,tfs, elevenid, lang], outputs=[record_button, input_audio0])
753
+ with gr.Row():
754
+ with gr.Accordion('Wav2Lip', open=False):
755
+ with gr.Row():
756
+ size = gr.Radio(label='Resolution:',choices=['Half','Full'])
757
+ face = gr.UploadButton("Upload A Character",type='file')
758
+ faces = gr.Dropdown(label="OR Choose one:", choices=['None','Ben Shapiro','Andrew Tate'])
759
+ with gr.Row():
760
+ preview = gr.Textbox(label="Status:",interactive=False)
761
+ face.upload(fn=success_message,inputs=[face], outputs=[preview, faces])
762
+ with gr.Row():
763
+ animation = gr.Video(type='filepath')
764
+ refresh_button2.click(fn=change_choices2, inputs=[], outputs=[input_audio0, animation])
765
+ with gr.Row():
766
+ animate_button = gr.Button('Animate')
767
+
768
+ with gr.Column():
769
+ with gr.Accordion("Index Settings", open=False):
770
+ file_index1 = gr.Dropdown(
771
+ label="3. Path to your added.index file (if it didn't automatically find it.)",
772
+ choices=get_indexes(),
773
+ value=get_index(),
774
+ interactive=True,
775
+ )
776
+ sid0.change(fn=match_index, inputs=[sid0],outputs=[file_index1])
777
+ refresh_button.click(
778
+ fn=change_choices, inputs=[], outputs=[sid0, file_index1]
779
+ )
780
+
781
+ index_rate1 = gr.Slider(
782
+ minimum=0,
783
+ maximum=1,
784
+ label=i18n("检索特征占比"),
785
+ value=0.66,
786
+ interactive=True,
787
+ )
788
+ vc_output2 = gr.Audio(
789
+ label="Output Audio (Click on the Three Dots in the Right Corner to Download)",
790
+ type='filepath',
791
+ interactive=False,
792
+ )
793
+ animate_button.click(fn=mouth, inputs=[size, face, vc_output2, faces], outputs=[animation, preview])
794
+ with gr.Accordion("Advanced Settings", open=False):
795
+ f0method0 = gr.Radio(
796
+ label="Optional: Change the Pitch Extraction Algorithm.\nExtraction methods are sorted from 'worst quality' to 'best quality'.\nmangio-crepe may or may not be better than rmvpe in cases where 'smoothness' is more important, but rmvpe is the best overall.",
797
+ choices=["pm", "dio", "crepe-tiny", "mangio-crepe-tiny", "crepe", "harvest", "mangio-crepe", "rmvpe"], # Fork Feature. Add Crepe-Tiny
798
+ value="rmvpe",
799
+ interactive=True,
800
+ )
801
+
802
+ crepe_hop_length = gr.Slider(
803
+ minimum=1,
804
+ maximum=512,
805
+ step=1,
806
+ label="Mangio-Crepe Hop Length. Higher numbers will reduce the chance of extreme pitch changes but lower numbers will increase accuracy. 64-192 is a good range to experiment with.",
807
+ value=120,
808
+ interactive=True,
809
+ visible=False,
810
+ )
811
+ f0method0.change(fn=whethercrepeornah, inputs=[f0method0], outputs=[crepe_hop_length])
812
+ filter_radius0 = gr.Slider(
813
+ minimum=0,
814
+ maximum=7,
815
+ label=i18n(">=3则使用对harvest音高识别的结果使用中值滤波,数值为滤波半径,使用可以削弱哑音"),
816
+ value=3,
817
+ step=1,
818
+ interactive=True,
819
+ )
820
+ resample_sr0 = gr.Slider(
821
+ minimum=0,
822
+ maximum=48000,
823
+ label=i18n("后处理重采样至最终采样率,0为不进行重采样"),
824
+ value=0,
825
+ step=1,
826
+ interactive=True,
827
+ visible=False
828
+ )
829
+ rms_mix_rate0 = gr.Slider(
830
+ minimum=0,
831
+ maximum=1,
832
+ label=i18n("输入源音量包络替换输出音量包络融合比例,越靠近1越使用输出包络"),
833
+ value=0.21,
834
+ interactive=True,
835
+ )
836
+ protect0 = gr.Slider(
837
+ minimum=0,
838
+ maximum=0.5,
839
+ label=i18n("保护清辅音和呼吸声,防止电音撕裂等artifact,拉满0.5不开启,调低加大保护力度但可能降低索引效果"),
840
+ value=0.33,
841
+ step=0.01,
842
+ interactive=True,
843
+ )
844
+ formanting = gr.Checkbox(
845
+ value=bool(DoFormant),
846
+ label="[EXPERIMENTAL] Formant shift inference audio",
847
+ info="Used for male to female and vice-versa conversions",
848
+ interactive=True,
849
+ visible=True,
850
+ )
851
+
852
+
853
+
854
+
855
+
856
+ with gr.Row():
857
+ vc_output1 = gr.Textbox("")
858
+ f0_file = gr.File(label=i18n("F0曲线文件, 可选, 一行一个音高, 代替默认F0及升降调"), visible=False)
859
+
860
+ but0.click(
861
+ vc_single,
862
+ [
863
+ spk_item,
864
+ input_audio0,
865
+ vc_transform0,
866
+ f0_file,
867
+ f0method0,
868
+ file_index1,
869
+ # file_index2,
870
+ # file_big_npy1,
871
+ index_rate1,
872
+ filter_radius0,
873
+ resample_sr0,
874
+ rms_mix_rate0,
875
+ protect0,
876
+ crepe_hop_length
877
+ ],
878
+ [vc_output1, vc_output2],
879
+ )
880
+
881
+ with gr.Accordion("Batch Conversion",open=False):
882
+ with gr.Row():
883
+ with gr.Column():
884
+ vc_transform1 = gr.Number(
885
+ label=i18n("变调(整数, 半音数量, 升八度12降八度-12)"), value=0
886
+ )
887
+ opt_input = gr.Textbox(label=i18n("指定输出文件夹"), value="opt")
888
+ f0method1 = gr.Radio(
889
+ label=i18n(
890
+ "选择音高提取算法,输入歌声可用pm提速,harvest低音好但巨慢无比,crepe效果好但吃GPU"
891
+ ),
892
+ choices=["pm", "harvest", "crepe", "rmvpe"],
893
+ value="rmvpe",
894
+ interactive=True,
895
+ )
896
+ filter_radius1 = gr.Slider(
897
+ minimum=0,
898
+ maximum=7,
899
+ label=i18n(">=3则使用对harvest音高识别的结果使用中值滤波,数值为滤波半径,使用可以削弱哑音"),
900
+ value=3,
901
+ step=1,
902
+ interactive=True,
903
+ )
904
+ with gr.Column():
905
+ file_index3 = gr.Textbox(
906
+ label=i18n("特征检索库文件路径,为空则使用下拉的选择结果"),
907
+ value="",
908
+ interactive=True,
909
+ )
910
+ file_index4 = gr.Dropdown(
911
+ label=i18n("自动检测index路径,下拉式选择(dropdown)"),
912
+ choices=sorted(index_paths),
913
+ interactive=True,
914
+ )
915
+ refresh_button.click(
916
+ fn=lambda: change_choices()[1],
917
+ inputs=[],
918
+ outputs=file_index4,
919
+ )
920
+ # file_big_npy2 = gr.Textbox(
921
+ # label=i18n("特征文件路径"),
922
+ # value="E:\\codes\\py39\\vits_vc_gpu_train\\logs\\mi-test-1key\\total_fea.npy",
923
+ # interactive=True,
924
+ # )
925
+ index_rate2 = gr.Slider(
926
+ minimum=0,
927
+ maximum=1,
928
+ label=i18n("检索特征占比"),
929
+ value=1,
930
+ interactive=True,
931
+ )
932
+ with gr.Column():
933
+ resample_sr1 = gr.Slider(
934
+ minimum=0,
935
+ maximum=48000,
936
+ label=i18n("后处理重采样至最终采样率,0为不进行重采样"),
937
+ value=0,
938
+ step=1,
939
+ interactive=True,
940
+ )
941
+ rms_mix_rate1 = gr.Slider(
942
+ minimum=0,
943
+ maximum=1,
944
+ label=i18n("输入源音量包络替换输出音量包络融合比例,越靠近1越使用输出包络"),
945
+ value=1,
946
+ interactive=True,
947
+ )
948
+ protect1 = gr.Slider(
949
+ minimum=0,
950
+ maximum=0.5,
951
+ label=i18n(
952
+ "保护清辅音和呼吸声,防止电音撕裂等artifact,拉满0.5不开启,调低加大保护力度但可能降低索引效果"
953
+ ),
954
+ value=0.33,
955
+ step=0.01,
956
+ interactive=True,
957
+ )
958
+ with gr.Column():
959
+ dir_input = gr.Textbox(
960
+ label=i18n("输入待处理音频文件夹路径(去文件管理器地址栏拷就行了)"),
961
+ value="E:\codes\py39\\test-20230416b\\todo-songs",
962
+ )
963
+ inputs = gr.File(
964
+ file_count="multiple", label=i18n("也可批量输入音频文件, 二选一, 优先读文件夹")
965
+ )
966
+ with gr.Row():
967
+ format1 = gr.Radio(
968
+ label=i18n("导出文件格式"),
969
+ choices=["wav", "flac", "mp3", "m4a"],
970
+ value="flac",
971
+ interactive=True,
972
+ )
973
+ but1 = gr.Button(i18n("转换"), variant="primary")
974
+ vc_output3 = gr.Textbox(label=i18n("输出信息"))
975
+ but1.click(
976
+ vc_multi,
977
+ [
978
+ spk_item,
979
+ dir_input,
980
+ opt_input,
981
+ inputs,
982
+ vc_transform1,
983
+ f0method1,
984
+ file_index3,
985
+ file_index4,
986
+ # file_big_npy2,
987
+ index_rate2,
988
+ filter_radius1,
989
+ resample_sr1,
990
+ rms_mix_rate1,
991
+ protect1,
992
+ format1,
993
+ crepe_hop_length,
994
+ ],
995
+ [vc_output3],
996
+ )
997
+ but1.click(fn=lambda: easy_uploader.clear())
998
+ with gr.TabItem("Download Model"):
999
+ with gr.Row():
1000
+ url=gr.Textbox(label="Enter the URL to the Model:")
1001
+ with gr.Row():
1002
+ model = gr.Textbox(label="Name your model:")
1003
+ download_button=gr.Button("Download")
1004
+ with gr.Row():
1005
+ status_bar=gr.Textbox(label="")
1006
+ download_button.click(fn=download_from_url, inputs=[url, model], outputs=[status_bar])
1007
+ with gr.Row():
1008
+ gr.Markdown(
1009
+ """
1010
+ Mangio’s RVC Fork:https://github.com/Mangio621/Mangio-RVC-Fork ❤️ If you like the EasyGUI, help me keep it.❤️ https://paypal.me/lesantillan
1011
+ """
1012
+ )
1013
+
1014
+
1015
+
1016
+ app.queue(concurrency_count=511, max_size=1022).launch(share=False, quiet=True)
1017
+ #endregion