Achyuth4 commited on
Commit
4386cfd
·
0 Parent(s):

Duplicate from AchyuthGamer/ramba

Browse files
Files changed (7) hide show
  1. .gitattributes +35 -0
  2. .gitignore +3 -0
  3. Dockerfile +15 -0
  4. README.md +12 -0
  5. achyuthailogo.png +0 -0
  6. app.py +262 -0
  7. requirements.txt +2 -0
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ venv
2
+ .mypy_cache
3
+ __pycache__
Dockerfile ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
2
+ # you will also find guides on how best to write your Dockerfile
3
+
4
+ FROM python:3.10
5
+
6
+ COPY app.py .
7
+ COPY requirements.txt .
8
+
9
+ RUN python -m venv venv
10
+ RUN ./venv/bin/pip install -r requirements.txt
11
+
12
+ ENV H2O_WAVE_LISTEN=":7860"
13
+ ENV H2O_WAVE_ADDRESS="http://127.0.0.1:7860"
14
+
15
+ CMD ["./venv/bin/wave", "run", "app.py", "--no-reload"]
README.md ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: h2oGPT - ChatBot
3
+ emoji: 💻
4
+ colorFrom: indigo
5
+ colorTo: indigo
6
+ sdk: docker
7
+ pinned: false
8
+ license: apache-2.0
9
+ duplicated_from: AchyuthGamer/ramba
10
+ ---
11
+
12
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
achyuthailogo.png ADDED
app.py ADDED
@@ -0,0 +1,262 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from h2o_wave import main, app, Q, ui, data
2
+ from gradio_client import Client
3
+ import ast
4
+
5
+
6
+ async def init_ui(q: Q) -> None:
7
+ q.page['meta'] = ui.meta_card(
8
+ box='',
9
+ layouts=[
10
+ ui.layout(breakpoint='xs', min_height='100vh', zones=[
11
+ ui.zone('main', size='1', direction=ui.ZoneDirection.ROW, zones=[
12
+ ui.zone('sidebar', size='250px'),
13
+ ui.zone('body', direction=ui.ZoneDirection.COLUMN, zones=[
14
+ ui.zone('title', size='55px'),
15
+ ui.zone('content', size='1'),
16
+ ui.zone('footer'),
17
+ ]),
18
+ ])
19
+ ])
20
+ ],
21
+ title='AchyuthGPT',
22
+ )
23
+ q.page['sidebar'] = ui.nav_card(
24
+ box='sidebar', color='primary', title='AchyuthGPT', subtitle='Programmed by Achyuth',
25
+ value=f"#{q.args['#']}' if q.args['#'] else '#page1",
26
+ image='https://huggingface.co/spaces/AchyuthGamer/AchyuthGPT-v1/resolve/main/achyuthailogo.png', items=[
27
+ ui.nav_group('', items=[
28
+ ui.nav_item(name='dwave-docs', label='Wave docs', path='https://AchyuthGPT.blogspot.com/'),
29
+ ui.nav_item(name='Achyuth-GPT', label='Achyuth GPT', path='https://github.com/achyuth4/AchyuthGPT-llmstudio'),
30
+ ui.nav_item(name='fine-tune', label='LLM Studio', path='https://github.com/achyuth4/AchyuthGPT-llmstudio'),
31
+ ui.nav_item(name='more-models', label='More models', path='https://huggingface.co/achyuthgamer'),
32
+ ]),
33
+ ],
34
+ secondary_items=[
35
+ ui.toggle(name='dark_mode', label='Dark mode', trigger=True),
36
+ ui.text('<center>Developer - N.Achyuth Reddy.</center>')
37
+ ]
38
+ )
39
+
40
+ q.page['chatbot'] = ui.chatbot_card(
41
+ box=ui.box('content'),
42
+ data=data('content from_user', t='list'),
43
+ name='chatbot'
44
+ )
45
+ q.page['title'] = ui.section_card(
46
+ box='title',
47
+ title='',
48
+ subtitle='',
49
+ items=[
50
+ ui.dropdown(name='model', trigger=True, label='', value='gpt', choices=[
51
+ ui.choice(name='gpt', label='AchyuthGPT-1'),
52
+ ui.choice(name='falcon', label='AchyuthGPT-2'),
53
+ ui.choice(name='llma', label='AchyuthGPT-3'),
54
+ ui.choice(name='mpt', label='AchyuthGPT-4'),
55
+ ui.choice(name='lmsys', label='AchyuthGPT-5'),
56
+ ui.choice(name='gpt-3.5-turbo', label='AchyuthGPT-6'),
57
+ ]),
58
+ ui.button(name='clear', label='Clear', icon='Delete'),
59
+ ],
60
+ )
61
+
62
+ """
63
+ :param load_8bit: load model in 8-bit using bitsandbytes
64
+ :param load_4bit: load model in 4-bit using bitsandbytes
65
+ :param load_half: load model in float16
66
+ :param infer_devices: whether to control devices with gpu_id. If False, then spread across GPUs
67
+ :param base_model: model HF-type name. If use --base_model to preload model, cannot unload in gradio in models tab
68
+ :param tokenizer_base_model: tokenizer HF-type name. Usually not required, inferred from base_model.
69
+ :param lora_weights: LORA weights path/HF link
70
+ :param gpu_id: if infer_devices, then use gpu_id for cuda device ID, or auto mode if gpu_id != -1
71
+ :param compile_model Whether to compile the model
72
+ :param use_cache: Whether to use caching in model (some models fail when multiple threads use)
73
+ :param inference_server: Consume base_model as type of model at this address
74
+ Address can be text-generation-server hosting that base_model
75
+ e.g. python generate.py --inference_server="http://192.168.1.46:6112" --base_model=h2oai/h2ogpt-oasst1-512-12b
76
+ Or Address can be "openai_chat" or "openai" for OpenAI API
77
+ e.g. python generate.py --inference_server="openai_chat" --base_model=gpt-3.5-turbo
78
+ e.g. python generate.py --inference_server="openai" --base_model=text-davinci-003
79
+ :param prompt_type: type of prompt, usually matched to fine-tuned model or plain for foundational model
80
+ :param prompt_dict: If prompt_type=custom, then expects (some) items returned by get_prompt(..., return_dict=True)
81
+ :param model_lock: Lock models to specific combinations, for ease of use and extending to many models
82
+ Only used if gradio = True
83
+ List of dicts, each dict has base_model, tokenizer_base_model, lora_weights, inference_server, prompt_type, and prompt_dict
84
+ If all models have same prompt_type, and prompt_dict, can still specify that once in CLI outside model_lock as default for dict
85
+ Can specify model_lock instead of those items on CLI
86
+ As with CLI itself, base_model can infer prompt_type and prompt_dict if in prompter.py.
87
+ Also, tokenizer_base_model and lora_weights are optional.
88
+ Also, inference_server is optional if loading model from local system.
89
+ All models provided will automatically appear in compare model mode
90
+ Model loading-unloading and related choices will be disabled. Model/lora/server adding will be disabled
91
+ :param model_lock_columns: How many columns to show if locking models (and so showing all at once)
92
+ If None, then defaults to up to 3
93
+ if -1, then all goes into 1 row
94
+ Maximum value is 4 due to non-dynamic gradio rendering elements
95
+ :param fail_if_cannot_connect: if doing model locking (e.g. with many models), fail if True. Otherwise ignore.
96
+ Useful when many endpoints and want to just see what works, but still have to wait for timeout.
97
+ :param temperature: generation temperature
98
+ :param top_p: generation top_p
99
+ :param top_k: generation top_k
100
+ :param num_beams: generation number of beams
101
+ :param repetition_penalty: generation repetition penalty
102
+ :param num_return_sequences: generation number of sequences (1 forced for chat)
103
+ :param do_sample: generation sample
104
+ :param max_new_tokens: generation max new tokens
105
+ :param min_new_tokens: generation min tokens
106
+ :param early_stopping: generation early stopping
107
+ :param max_time: maximum time to allow for generation
108
+ :param memory_restriction_level: 0 = no restriction to tokens or model, 1 = some restrictions on token 2 = HF like restriction 3 = very low memory case
109
+ :param debug: enable debug mode
110
+ :param save_dir: directory chat data is saved to
111
+ :param share: whether to share the gradio app with sharable URL
112
+ :param local_files_only: whether to only use local files instead of doing to HF for models
113
+ :param resume_download: whether to resume downloads from HF for models
114
+ :param use_auth_token: whether to use HF auth token (requires CLI did huggingface-cli login before)
115
+ :param trust_remote_code: whether to use trust any code needed for HF model
116
+ :param offload_folder: path for spilling model onto disk
117
+ :param src_lang: source languages to include if doing translation (None = all)
118
+ :param tgt_lang: target languages to include if doing translation (None = all)
119
+ :param cli: whether to use CLI (non-gradio) interface.
120
+ :param cli_loop: whether to loop for CLI (False usually only for testing)
121
+ :param gradio: whether to enable gradio, or to enable benchmark mode
122
+ :param gradio_offline_level: > 0, then change fonts so full offline
123
+ == 1 means backend won't need internet for fonts, but front-end UI might if font not cached
124
+ == 2 means backend and frontend don't need internet to download any fonts.
125
+ Note: Some things always disabled include HF telemetry, gradio telemetry, chromadb posthog that involve uploading.
126
+ This option further disables google fonts for downloading, which is less intrusive than uploading,
127
+ but still required in air-gapped case. The fonts don't look as nice as google fonts, but ensure full offline behavior.
128
+ Also set --share=False to avoid sharing a gradio live link.
129
+ :param chat: whether to enable chat mode with chat history
130
+ :param chat_context: whether to use extra helpful context if human_bot
131
+ :param stream_output: whether to stream output
132
+ :param show_examples: whether to show clickable examples in gradio
133
+ :param verbose: whether to show verbose prints
134
+ :param h2ocolors: whether to use H2O.ai theme
135
+ :param height: height of chat window
136
+ :param show_lora: whether to show LORA options in UI (expert so can be hard to understand)
137
+ :param login_mode_if_model0: set to True to load --base_model after client logs in, to be able to free GPU memory when model is swapped
138
+ :param block_gradio_exit: whether to block gradio exit (used for testing)
139
+ :param concurrency_count: gradio concurrency count (1 is optimal for LLMs)
140
+ :param api_open: If False, don't let API calls skip gradio queue
141
+ :param allow_api: whether to allow API calls at all to gradio server
142
+ :param input_lines: how many input lines to show for chat box (>1 forces shift-enter for submit, else enter is submit)
143
+ :param gradio_size: Overall size of text and spaces: "xsmall", "small", "medium", "large".
144
+ Small useful for many chatbots in model_lock mode
145
+ :param auth: gradio auth for launcher in form [(user1, pass1), (user2, pass2), ...]
146
+ e.g. --auth=[('jon','password')] with no spaces
147
+ :param max_max_time: Maximum max_time for gradio slider
148
+ :param max_max_new_tokens: Maximum max_new_tokens for gradio slider
149
+ :param sanitize_user_prompt: whether to remove profanity from user input (slows down input processing)
150
+ :param sanitize_bot_response: whether to remove profanity and repeat lines from bot output (about 2x slower generation for long streaming cases due to better_profanity being slow)
151
+ :param extra_model_options: extra models to show in list in gradio
152
+ :param extra_lora_options: extra LORA to show in list in gradio
153
+ :param extra_server_options: extra servers to show in list in gradio
154
+ :param score_model: which model to score responses (None means no scoring)
155
+ :param eval_filename: json file to use for evaluation, if None is sharegpt
156
+ :param eval_prompts_only_num: for no gradio benchmark, if using eval_filename prompts for eval instead of examples
157
+ :param eval_prompts_only_seed: for no gradio benchmark, seed for eval_filename sampling
158
+ :param eval_as_output: for no gradio benchmark, whether to test eval_filename output itself
159
+ :param langchain_mode: Data source to include. Choose "UserData" to only consume files from make_db.py.
160
+ WARNING: wiki_full requires extra data processing via read_wiki_full.py and requires really good workstation to generate db, unless already present.
161
+ :param langchain_action: Mode langchain operations in on documents.
162
+ Query: Make query of document(s)
163
+ Summarize or Summarize_map_reduce: Summarize document(s) via map_reduce
164
+ Summarize_all: Summarize document(s) using entire document at once
165
+ Summarize_refine: Summarize document(s) using entire document, and try to refine before returning summary
166
+ :param force_langchain_evaluate: Whether to force langchain LLM use even if not doing langchain, mostly for testing.
167
+ :param user_path: user path to glob from to generate db for vector search, for 'UserData' langchain mode.
168
+ If already have db, any new/changed files are added automatically if path set, does not have to be same path used for prior db sources
169
+ :param detect_user_path_changes_every_query: whether to detect if any files changed or added every similarity search (by file hashes).
170
+ Expensive for large number of files, so not done by default. By default only detect changes during db loading.
171
+ :param visible_langchain_modes: dbs to generate at launch to be ready for LLM
172
+ Can be up to ['wiki', 'wiki_full', 'UserData', 'MyData', 'github h2oGPT', 'DriverlessAI docs']
173
+ But wiki_full is expensive and requires preparation
174
+ To allow scratch space only live in session, add 'MyData' to list
175
+ Default: If only want to consume local files, e.g. prepared by make_db.py, only include ['UserData']
176
+ FIXME: Avoid 'All' for now, not implemented
177
+ :param visible_langchain_actions: Which actions to allow
178
+ :param document_choice: Default document choice when taking subset of collection
179
+ :param load_db_if_exists: Whether to load chroma db if exists or re-generate db
180
+ :param keep_sources_in_context: Whether to keep url sources in context, not helpful usually
181
+ :param db_type: 'faiss' for in-memory or 'chroma' or 'weaviate' for persisted on disk
182
+ :param use_openai_embedding: Whether to use OpenAI embeddings for vector db
183
+ :param use_openai_model: Whether to use OpenAI model for use with vector db
184
+ :param hf_embedding_model: Which HF embedding model to use for vector db
185
+ Default is instructor-large with 768 parameters per embedding if have GPUs, else all-MiniLM-L6-v1 if no GPUs
186
+ Can also choose simpler model with 384 parameters per embedding: "sentence-transformers/all-MiniLM-L6-v2"
187
+ Can also choose even better embedding with 1024 parameters: 'hkunlp/instructor-xl'
188
+ We support automatically changing of embeddings for chroma, with a backup of db made if this is done
189
+ :param allow_upload_to_user_data: Whether to allow file uploads to update shared vector db
190
+ :param allow_upload_to_my_data: Whether to allow file uploads to update scratch vector db
191
+ :param enable_url_upload: Whether to allow upload from URL
192
+ :param enable_text_upload: Whether to allow upload of text
193
+ :param enable_sources_list: Whether to allow list (or download for non-shared db) of list of sources for chosen db
194
+ :param chunk: Whether to chunk data (True unless know data is already optimally chunked)
195
+ :param chunk_size: Size of chunks, with typically top-4 passed to LLM, so neesd to be in context length
196
+ :param top_k_docs: number of chunks to give LLM
197
+ :param reverse_docs: whether to reverse docs order so most relevant is closest to question.
198
+ Best choice for sufficiently smart model, and truncation occurs for oldest context, so best then too.
199
+ But smaller 6_9 models fail to use newest context and can get stuck on old information.
200
+ :param auto_reduce_chunks: Whether to automatically reduce top_k_docs to fit context given prompt
201
+ :param max_chunks: If top_k_docs=-1, maximum number of chunks to allow
202
+ :param n_jobs: Number of processors to use when consuming documents (-1 = all, is default)
203
+ :param enable_captions: Whether to support captions using BLIP for image files as documents, then preloads that model
204
+ :param captions_model: Which model to use for captions.
205
+ captions_model: str = "Salesforce/blip-image-captioning-base", # continue capable
206
+ captions_model: str = "Salesforce/blip2-flan-t5-xl", # question/answer capable, 16GB state
207
+ captions_model: str = "Salesforce/blip2-flan-t5-xxl", # question/answer capable, 60GB state
208
+ Note: opt-based blip2 are not permissive license due to opt and Meta license restrictions
209
+ :param pre_load_caption_model: Whether to preload caption model, or load after forking parallel doc loader
210
+ parallel loading disabled if preload and have images, to prevent deadlocking on cuda context
211
+ Recommended if using larger caption model
212
+ :param caption_gpu: If support caption, then use GPU if exists
213
+ :param enable_ocr: Whether to support OCR on images
214
+ :return:
215
+ """
216
+
217
+ @app('/')
218
+ async def serve(q: Q):
219
+ if not q.client.initialized:
220
+ await init_ui(q)
221
+ q.client.model_client = Client('https://gpt.h2o.ai/')
222
+ q.client.initialized = True
223
+
224
+ # A new message arrived.
225
+ if q.args.chatbot:
226
+ # Append user message.
227
+ q.page['chatbot'].data += [q.args.chatbot, True]
228
+ # Append bot response.
229
+ kwargs = dict(instruction_nochat=q.args.chatbot)
230
+ try:
231
+ res = q.client.model_client.predict(str(dict(kwargs)), api_name='/submit_nochat_api')
232
+ bot_res = ast.literal_eval(res)['response']
233
+ q.page['chatbot'].data += [bot_res, False]
234
+ except:
235
+ q.page['meta'] = ui.meta_card(box='', notification_bar=ui.notification_bar(
236
+ text='An error occurred during prediction. Please try later or a different model.',
237
+ type='error',
238
+ ))
239
+ elif q.args.clear:
240
+ # Recreate the card.
241
+ q.page['chatbot'] = ui.chatbot_card(
242
+ box=ui.box('content'),
243
+ data=data('content from_user', t='list'),
244
+ name='chatbot'
245
+ )
246
+ elif q.args.dark_mode is not None:
247
+ q.page['meta'].theme = 'achyuthgpt-dark' if q.args.dark_mode else 'light'
248
+ q.page['sidebar'].color = 'card' if q.args.dark_mode else 'primary'
249
+ elif q.args.model:
250
+ try:
251
+ q.client.model_client = Client(f'https://{q.args.model}.h2o.ai/')
252
+ q.page['meta'] = ui.meta_card(box='', notification_bar=ui.notification_bar(
253
+ text='Model changed successfully.',
254
+ type='success',
255
+ ))
256
+ except:
257
+ q.page['meta'] = ui.meta_card(box='', notification_bar=ui.notification_bar(
258
+ text='An error occurred while changing the model. Please try a different one.',
259
+ type='error',
260
+ ))
261
+
262
+ await q.page.save()
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ h2o-wave
2
+ gradio-client