File size: 2,386 Bytes
8da77b5
 
 
0aa6509
bbe2522
8da77b5
30a4455
8da77b5
0aa6509
8da77b5
 
 
 
 
 
 
 
 
a0324df
8da77b5
 
 
 
0aa6509
8da77b5
 
0aa6509
8da77b5
 
 
 
 
 
 
 
 
0aa6509
 
8da77b5
30a4455
8da77b5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0aa6509
 
8da77b5
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import json
import os
import tempfile
from pathlib import Path

import gradio as gr
from huggingface_hub import duplicate_space, upload_folder, login


def configure_training(this_space_id, csv_data, character, do_extract_vocals=False):
    character = character.strip().replace('-', '').replace('_', '').replace(" ", "").lower()
    ds_cfg = {
        "character": character,
        "do_extract_vocals": do_extract_vocals,
    }
    with tempfile.TemporaryDirectory() as tempdir:
        temp_path = Path(tempdir)
        (temp_path / 'data.csv').write_text(csv_data)
        (temp_path / 'dataset_config.json').write_text(json.dumps(ds_cfg, indent=2, sort_keys=False))
        upload_folder(repo_id=this_space_id, folder_path=tempdir, path_in_repo=".", repo_type="space")
        print("Would normally upload here!")
        print(list(temp_path.glob("*")))
    return "OK! Rebooting here in a sec to start training"

description = """
Configure training session for voice cloning.

Please provide a CSV containing YouTube IDs, start times, and end times that we can use to gather the dataset for you.

It should look like this:

```
ytid,start,end
YYiQxHM0L-w,300,660
Ga-CcToGiUM,3105,3300
```
"""

if os.environ.get("HF_TOKEN", None) is not None:
    login(os.environ.get("HF_TOKEN"))
    interface = gr.Interface(
        configure_training,
        inputs=[
            gr.Textbox(label="This Space's Repo ID", info="The repo ID of this space (ex. nateraw/voice-cloning-training-ui)."),
            gr.TextArea(value="ytid,start,end\n", label="CSV Data", max_lines=50),
            gr.Textbox(placeholder="Name of character that you're cloning."),
            gr.Checkbox(
                False,
                label="Isolate Vocals",
                info="If checked, we use demucs to isolate vocals from each audio file. You want to use this if the provided clips contain background music"
            )
        ],
        outputs="text",
        title="Configure Training Session",
        description=description,
    )
else:
    with gr.Blocks() as interface:
        gr.Markdown("""
## Please Set The HF_TOKEN Environment Variable

Go to the settings tab of this space and add a new environment variable named `HF_TOKEN` with its value being **a token with write access** from [here](https://hf.co/settings/tokens).
""")


if __name__ == '__main__':
    interface.launch()