DiamondYin asiffarhankhan commited on
Commit
50dee91
·
0 Parent(s):

Duplicate from asiffarhankhan/custom-gpt-voice-assistant

Browse files

Co-authored-by: Asif Farhan Khan <[email protected]>

.gitattributes ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tflite filter=lfs diff=lfs merge=lfs -text
29
+ *.tgz filter=lfs diff=lfs merge=lfs -text
30
+ *.wasm filter=lfs diff=lfs merge=lfs -text
31
+ *.xz filter=lfs diff=lfs merge=lfs -text
32
+ *.zip filter=lfs diff=lfs merge=lfs -text
33
+ *.zst filter=lfs diff=lfs merge=lfs -text
34
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ .venv
2
+ __pycache__
3
+ .chroma
4
+ initialize.sh
5
+ conversations.log
6
+ custom_gpt_voice assistant_demo.mp4
README.md ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Custom Gpt Voice Assistant
3
+ emoji: 🌖
4
+ colorFrom: indigo
5
+ colorTo: gray
6
+ sdk: gradio
7
+ sdk_version: 3.27.0
8
+ app_file: app.py
9
+ pinned: false
10
+ license: other
11
+ duplicated_from: asiffarhankhan/custom-gpt-voice-assistant
12
+ ---
13
+
14
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import nltk
3
+ import openai
4
+ import time
5
+ import gradio as gr
6
+ from threading import Thread
7
+
8
+ from assets.char_poses_base64 import (
9
+ CHAR_IDLE_HTML, CHAR_THINKING_HTML, CHAR_TALKING_HTML)
10
+
11
+ from app_utils import (
12
+ get_chat_history, initialize_knowledge_base,
13
+ text_to_speech_gen, logging, buzz_user)
14
+
15
+ global FUNC_CALL
16
+ FUNC_CALL = 0
17
+
18
+ global BUZZ_TIMEOUT
19
+ BUZZ_TIMEOUT = 60
20
+
21
+ GENERAL_RSPONSE_TRIGGERS = ["I don't understand the question.", "I don't know", "Hello, my name is", "mentioned in the context provided"]
22
+ MESSAGES = [{"role": "system", "content": "You are a helpful assistant.."}]
23
+
24
+ LOGGER = logging.getLogger('voice_agent')
25
+ AUDIO_HTML = ''
26
+
27
+ # Uncomment If this is your first Run:
28
+ nltk.download('averaged_perceptron_tagger')
29
+ conv_model, voice_model = initialize_knowledge_base()
30
+
31
+
32
+ def idle_timer():
33
+ global BUZZ_TIMEOUT
34
+
35
+ while True:
36
+ time.sleep(BUZZ_TIMEOUT)
37
+ buzz_user()
38
+
39
+ if BUZZ_TIMEOUT == 80:
40
+ time.sleep(BUZZ_TIMEOUT)
41
+ BUZZ_TIMEOUT = 60
42
+
43
+
44
+ def update_img():
45
+ global FUNC_CALL
46
+ FUNC_CALL += 1
47
+
48
+ if FUNC_CALL % 2== 0:
49
+ return CHAR_TALKING_HTML
50
+ else:
51
+ return CHAR_THINKING_HTML
52
+
53
+
54
+ def get_response(history, audio_input):
55
+
56
+ query_type = 'text'
57
+ question =history[-1][0]
58
+
59
+ global BUZZ_TIMEOUT
60
+ BUZZ_TIMEOUT = 80
61
+
62
+ if not question:
63
+ if audio_input:
64
+ query_type = 'audio'
65
+ os.rename(audio_input, audio_input + '.wav')
66
+ audio_file = open(audio_input + '.wav', "rb")
67
+ transcript = openai.Audio.transcribe("whisper-1", audio_file)
68
+ question = transcript['text']
69
+ else:
70
+ return None, None
71
+
72
+ LOGGER.info("\nquery_type: %s", query_type)
73
+ LOGGER.info("query_text: %s", question)
74
+ print('\nquery_type:', query_type)
75
+ print('\nquery_text:', question)
76
+
77
+ if question.lower().strip() == 'hi':
78
+ question = 'hello'
79
+
80
+ answer = conv_model.run(question)
81
+ LOGGER.info("\ndocument_response: %s", answer)
82
+ print('\ndocument_response:', answer)
83
+
84
+ for trigger in GENERAL_RSPONSE_TRIGGERS:
85
+ if trigger in answer:
86
+ MESSAGES.append({"role": "user", "content": question})
87
+ chat = openai.ChatCompletion.create(
88
+ model="gpt-3.5-turbo",
89
+ messages=MESSAGES,
90
+ temperature=0.7,
91
+ n=128,
92
+ stop="\n"
93
+ )
94
+ answer = chat.choices[0].message.content
95
+ MESSAGES.append({"role": "assistant", "content": answer})
96
+ LOGGER.info("general_response: %s", answer)
97
+ print('\ngeneral_response:', answer)
98
+
99
+ AUDIO_HTML = text_to_speech_gen(answer)
100
+ history[-1][1] = answer
101
+
102
+ return history, AUDIO_HTML
103
+
104
+ # buzz_usr_proc = Thread(target=idle_timer)
105
+
106
+ with gr.Blocks(title="Your Assistance Pal!") as demo:
107
+
108
+ with gr.Row():
109
+ output_html = gr.HTML(label="Felix's Voice", value=AUDIO_HTML)
110
+ output_html.visible = False
111
+ assistant_character = gr.HTML(label=None, value=CHAR_IDLE_HTML, show_label=False)
112
+ with gr.Column(scale=0.1):
113
+ chatbot = gr.Chatbot(label='Send a text or a voice input').style(height=285)
114
+ with gr.Row():
115
+ msg = gr.Textbox(placeholder='Write a chat & press Enter.', show_label=False).style(container=False)
116
+ with gr.Column(scale=0.5):
117
+ audio_input = gr.Audio(source="microphone", type='filepath', show_label=False).style(container=False)
118
+ button = gr.Button(value="Send")
119
+
120
+ msg.submit(get_chat_history, [msg, chatbot], [msg, chatbot]
121
+ ).then(update_img, outputs=[assistant_character]
122
+ ).then(get_response, [chatbot, audio_input], [chatbot, output_html]
123
+ ).then(update_img, outputs=[assistant_character])
124
+
125
+ button.click(get_chat_history, [msg, chatbot], [msg, chatbot]
126
+ ).then(update_img, outputs=[assistant_character]
127
+ ).then(get_response, [chatbot, audio_input], [chatbot, output_html]
128
+ ).then(update_img, outputs=[assistant_character])
129
+
130
+ # buzz_usr_proc.start()
131
+
132
+ demo.launch(debug=False, favicon_path='assets/favicon.png', show_api=False, share=False)
app_utils.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import whisper
3
+ from io import BytesIO
4
+ import base64
5
+ import boto3
6
+ from pydub import AudioSegment
7
+ from pydub.playback import play
8
+ import logging
9
+
10
+ from langchain import OpenAI
11
+ from langchain.chains import RetrievalQA
12
+ from langchain.vectorstores import Chroma
13
+ from langchain.document_loaders import DirectoryLoader
14
+ from langchain.embeddings.openai import OpenAIEmbeddings
15
+ from langchain.text_splitter import CharacterTextSplitter
16
+
17
+
18
+ OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
19
+ AWS_ACCESS_KEY_ID = os.getenv('AWS_ACCESS_KEY_ID')
20
+ AWS_SECRET_ACCESS_KEY = os.getenv('AWS_SECRET_ACCESS_KEY')
21
+ AWS_REGION_NAME = 'ap-south-1'
22
+
23
+
24
+ logging.basicConfig(level="INFO",
25
+ filename='conversations.log',
26
+ filemode='a',
27
+ format='%(asctime)s %(message)s',
28
+ datefmt='%H:%M:%S')
29
+
30
+
31
+ def buzz_user():
32
+ input_prompt = AudioSegment.from_mp3('assets/timeout_audio.mp3')
33
+ play(input_prompt)
34
+
35
+
36
+ def initialize_knowledge_base():
37
+
38
+ loader = DirectoryLoader('profiles', glob='**/*.txt')
39
+ docs = loader.load()
40
+
41
+ char_text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
42
+ doc_texts = char_text_splitter.split_documents(docs)
43
+
44
+ openAI_embeddings = OpenAIEmbeddings()
45
+ vStore = Chroma.from_documents(doc_texts, openAI_embeddings)
46
+
47
+ conv_model = RetrievalQA.from_chain_type(
48
+ llm=OpenAI(),
49
+ chain_type="stuff",
50
+ retriever=vStore.as_retriever(
51
+ search_kwargs={"k": 1}
52
+ )
53
+ )
54
+ voice_model = whisper.load_model("tiny")
55
+
56
+ return conv_model, voice_model
57
+
58
+
59
+ def text_to_speech_gen(answer):
60
+
61
+ polly = boto3.client('polly',
62
+ aws_access_key_id=AWS_ACCESS_KEY_ID,
63
+ aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
64
+ region_name=AWS_REGION_NAME)
65
+
66
+ response = polly.synthesize_speech(
67
+ Text=answer,
68
+ VoiceId='Matthew',
69
+ OutputFormat='mp3',
70
+ Engine = "neural")
71
+
72
+ audio_stream = response['AudioStream'].read()
73
+ audio_html = audio_to_html(audio_stream)
74
+
75
+ return audio_html
76
+
77
+
78
+ def audio_to_html(audio_bytes):
79
+ audio_io = BytesIO(audio_bytes)
80
+ audio_io.seek(0)
81
+ audio_base64 = base64.b64encode(audio_io.read()).decode("utf-8")
82
+ audio_html = f'<audio src="data:audio/mpeg;base64,{audio_base64}" controls autoplay></audio>'
83
+
84
+ return audio_html
85
+
86
+
87
+ def get_chat_history(user_message, history):
88
+ return "", history + [[user_message, None]]
89
+
90
+
assets/char_idle.gif ADDED
assets/char_poses_base64.py ADDED
The diff for this file is too large to render. See raw diff
 
assets/char_speaking.gif ADDED
assets/char_thinking.gif ADDED
assets/favicon.png ADDED
assets/timeout_audio.mp3 ADDED
Binary file (21.9 kB). View file
 
profiles/john.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Full name is Alfred Brandon
2
+ you were born in 12th March 1958
3
+ John is at the shop
4
+ Brian is on holiday
5
+ Sarah is coming home soon
6
+ Sarah is net door and has left
7
+ You have granddaughters, jennny and michelle, they are 7 and 10
8
+ Your cousin Arthur is in Australia, he’s back next July
9
+ Your neighbuor Steven passed away a few days ago
10
+ Your wife hospital, broke arm
11
+ John will be back by 8
requirements.txt ADDED
@@ -0,0 +1,147 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiofiles==23.1.0
2
+ aiohttp==3.8.4
3
+ aiosignal==1.3.1
4
+ altair==4.2.2
5
+ anyio==3.6.2
6
+ argilla==1.6.0
7
+ async-timeout==4.0.2
8
+ attrs==23.1.0
9
+ backoff==2.2.1
10
+ boto3==1.26.115
11
+ botocore==1.29.115
12
+ certifi==2022.12.7
13
+ charset-normalizer==3.1.0
14
+ chromadb==0.3.21
15
+ click==8.1.3
16
+ clickhouse-connect==0.5.20
17
+ cmake==3.26.3
18
+ commonmark==0.9.1
19
+ contourpy==1.0.7
20
+ cycler==0.11.0
21
+ dataclasses-json==0.5.7
22
+ Deprecated==1.2.13
23
+ duckdb==0.7.1
24
+ entrypoints==0.4
25
+ et-xmlfile==1.1.0
26
+ fastapi==0.95.1
27
+ ffmpeg-python==0.2.0
28
+ ffmpy==0.3.0
29
+ filelock==3.12.0
30
+ fonttools==4.39.3
31
+ frozenlist==1.3.3
32
+ fsspec==2023.4.0
33
+ future==0.18.3
34
+ gradio==3.27.0
35
+ gradio_client==0.1.3
36
+ greenlet==2.0.2
37
+ h11==0.14.0
38
+ hnswlib==0.7.0
39
+ httpcore==0.16.3
40
+ httptools==0.5.0
41
+ httpx==0.23.3
42
+ huggingface-hub==0.13.4
43
+ idna==3.4
44
+ Jinja2==3.1.2
45
+ jmespath==1.0.1
46
+ joblib==1.2.0
47
+ jsonschema==4.17.3
48
+ kiwisolver==1.4.4
49
+ langchain==0.0.144
50
+ linkify-it-py==2.0.0
51
+ lit==16.0.1
52
+ llvmlite==0.39.1
53
+ lxml==4.9.2
54
+ lz4==4.3.2
55
+ Markdown==3.4.3
56
+ markdown-it-py==2.2.0
57
+ MarkupSafe==2.1.2
58
+ marshmallow==3.19.0
59
+ marshmallow-enum==1.5.1
60
+ matplotlib==3.7.1
61
+ mdit-py-plugins==0.3.3
62
+ mdurl==0.1.2
63
+ monotonic==1.6
64
+ more-itertools==9.1.0
65
+ mpmath==1.3.0
66
+ msg-parser==1.2.0
67
+ multidict==6.0.4
68
+ mypy-extensions==1.0.0
69
+ networkx==3.1
70
+ nltk==3.8.1
71
+ numba==0.56.4
72
+ numexpr==2.8.4
73
+ numpy==1.23.5
74
+ nvidia-cublas-cu11==11.10.3.66
75
+ nvidia-cuda-cupti-cu11==11.7.101
76
+ nvidia-cuda-nvrtc-cu11==11.7.99
77
+ nvidia-cuda-runtime-cu11==11.7.99
78
+ nvidia-cudnn-cu11==8.5.0.96
79
+ nvidia-cufft-cu11==10.9.0.58
80
+ nvidia-curand-cu11==10.2.10.91
81
+ nvidia-cusolver-cu11==11.4.0.1
82
+ nvidia-cusparse-cu11==11.7.4.91
83
+ nvidia-nccl-cu11==2.14.3
84
+ nvidia-nvtx-cu11==11.7.91
85
+ olefile==0.46
86
+ openai==0.27.4
87
+ openai-whisper @ git+https://github.com/openai/whisper.git@c09a7ae299c4c34c5839a76380ae407e7d785914
88
+ openapi-schema-pydantic==1.2.4
89
+ openpyxl==3.1.2
90
+ orjson==3.8.10
91
+ packaging==23.1
92
+ pandas==1.5.3
93
+ Pillow==9.5.0
94
+ posthog==3.0.0
95
+ pydantic==1.10.7
96
+ pydub==0.25.1
97
+ Pygments==2.15.1
98
+ pypandoc==1.11
99
+ pyparsing==3.0.9
100
+ pyrsistent==0.19.3
101
+ python-dateutil==2.8.2
102
+ python-docx==0.8.11
103
+ python-dotenv==1.0.0
104
+ python-magic==0.4.27
105
+ python-multipart==0.0.6
106
+ python-pptx==0.6.21
107
+ pytz==2023.3
108
+ PyYAML==6.0
109
+ regex==2023.3.23
110
+ requests==2.28.2
111
+ rfc3986==1.5.0
112
+ rich==13.0.1
113
+ s3transfer==0.6.0
114
+ scikit-learn==1.2.2
115
+ scipy==1.10.1
116
+ semantic-version==2.10.0
117
+ sentence-transformers==2.2.2
118
+ sentencepiece==0.1.98
119
+ six==1.16.0
120
+ sniffio==1.3.0
121
+ SQLAlchemy==1.4.47
122
+ starlette==0.26.1
123
+ sympy==1.11.1
124
+ tenacity==8.2.2
125
+ threadpoolctl==3.1.0
126
+ tiktoken==0.3.1
127
+ tokenizers==0.13.3
128
+ toolz==0.12.0
129
+ torch==2.0.0
130
+ torchvision==0.15.1
131
+ tqdm==4.65.0
132
+ transformers==4.28.1
133
+ triton==2.0.0
134
+ typing-inspect==0.8.0
135
+ typing_extensions==4.5.0
136
+ tzdata==2023.3
137
+ uc-micro-py==1.0.1
138
+ unstructured==0.5.13
139
+ urllib3==1.26.15
140
+ uvicorn==0.21.1
141
+ uvloop==0.17.0
142
+ watchfiles==0.19.0
143
+ websockets==11.0.2
144
+ wrapt==1.14.1
145
+ XlsxWriter==3.1.0
146
+ yarl==1.8.2
147
+ zstandard==0.21.0