File size: 14,709 Bytes
0f14eb2
 
 
b19e41e
0f14eb2
 
 
aa3252e
 
0f14eb2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
aa3252e
 
 
0f14eb2
 
 
aa3252e
0f14eb2
a90aeb4
0f14eb2
 
 
 
aa3252e
0f14eb2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
aa3252e
0f14eb2
aa3252e
0f14eb2
 
 
 
 
 
 
 
aa3252e
0f14eb2
 
 
aa3252e
0f14eb2
 
 
 
 
 
 
 
ecc89ed
 
0f14eb2
 
ecc89ed
0f14eb2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
aa3252e
0f14eb2
aa3252e
 
0f14eb2
 
 
 
 
 
 
 
 
 
 
 
 
 
aa3252e
 
 
0f14eb2
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
from googlesearch import search
import requests
import trafilatura

from concurrent.futures import ThreadPoolExecutor
import json
import ast
import gradio as gr
from huggingface_hub import InferenceClient
import tiktoken
import time
import os
from PIL import Image
client = InferenceClient(api_key=os.getenv('HF_TOKEN'))


import tiktoken
import requests
import os
import json
import random
from huggingface_hub import InferenceClient



def upload_to_catbox(file_path):
    """
    Upload a file to Catbox and return the URL.

    Args:
        file_path (str): Path to the file to upload.

    Returns:
        str: URL of the uploaded file.
    """
    with open(file_path, "rb") as file:
        response = requests.post(
            "https://catbox.moe/user/api.php",
            data={"reqtype": "fileupload"},
            files={"fileToUpload": file}
        )
    if response.status_code == 200:
        return response.text.strip()
    else:
        return "Failed to upload file."
    

# Set your Hugging Face API key here


# Sample data stored in a variable


def generate_quickchart_config(user_input, data):
    """
    Use Hugging Face InferenceClient to determine if a chart is needed and generate its configuration.

    Args:
        user_input (str): The user's question or description of the desired analysis.
        data (dict): Sample data containing stock prices and dates.

    Returns:
        dict or None: QuickChart configuration parameters generated by the model, or None if no chart is requested.
    """
    prompt = f"""
    You are given the following stock price data:

    {data}

    Based on the user question: "{user_input}", determine if a chart is required to answer the question.
    If a chart is required, generate a valid QuickChart configuration in the following JSON format:
    {{
        "type": <chart type>,
        "data": {{
            "labels": <x-axis labels>,
            "datasets": [
                {{
                    "label": <dataset label>,
                    "data": <y-axis data>,
                    "borderColor": <color>,
                    "fill": <boolean>
                }}
            ]
        }},
        "options": {{
            "title": {{
                "display": true,
                "text": <chart title>
            }},
            "scales": {{
                "xAxes": [{{"scaleLabel": {{"display": true, "labelString": "Date"}}}}],
                "yAxes": [{{"scaleLabel": {{"display": true, "labelString": "Price ($)"}}}}]
            }}
        }}
    }}
    If a chart is not needed, respond with ONLY the word "NO". for example if they ask: make a line chart with cisco stock price over the last days, make the chart
    """
    messages = [
        {"role": "user", "content": prompt}
    ]

    # Make the inference request
    completion = client.chat.completions.create(
        model="Qwen/Qwen2.5-Coder-32B-Instruct",  # Replace with your specific model
        messages=messages,
        max_tokens=2000
    )
    response_content = completion.choices[0].message["content"].strip()
    print(f"Model response: {response_content}")

    if response_content.lower() == "no":
        print("No chart is required for this question.")
        return None
    else:
        try:
            return json.loads(response_content)  # Convert JSON string to Python dictionary
        except Exception as e:
            print("Error parsing JSON from model response:", e)
            return None

def generate_chart(config, filename=f"chart.png", output_dir="charts"):
    """
    Generate a chart using QuickChart.io and save it locally.

    Args:
        config (dict): QuickChart configuration.
        filename (str): Name of the output file (e.g., 'chart.png').
        output_dir (str): Directory to save the chart image.

    Returns:
        str: Path to the saved chart file.
    """
    # QuickChart API URL
    url = "https://quickchart.io/chart"

    # Make the output directory if it doesn't exist
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # Send the request to QuickChart API
    response = requests.post(url, json={"c": config})

    if response.status_code == 200:
        # Save the chart image
        file_path = os.path.join(output_dir, filename)
        with open(file_path, "wb") as file:
            file.write(response.content)
        print(f"Chart saved at {file_path}")
        return file_path
    else:
        print(f"Failed to generate chart: {response.text}")
        return None

# Main execution flow




dots_animation = [
    "Working on your response.",
    "Working on your response..",
    "Working on your response...",
]

arrow_animation = [
    "----> Preparing your answer",
    "---> Preparing your answer",
    "--> Preparing your answer",
    "-> Preparing your answer",
    "> Preparing your answer",
]

loader_animation = [
    "[    ] Fetching data...",
    "[=   ] Fetching data...",
    "[==  ] Fetching data...",
    "[=== ] Fetching data...",
    "[====] Fetching data...",
]

typing_animation = [
    "Bot is typing.",
    "Bot is typing..",
    "Bot is typing...",
]
rotating_text_animation = [
    "Working |",
    "Working /",
    "Working -",
    "Working \\",
]




def tokenize_with_qwen(text):
    """
    Tokenizes the input text using a compatible tokenizer for Qwen models and returns a string of tokens.

    Parameters:
    text (list or str): The text (or list of strings) to be tokenized.

    Returns:
    str: A single string of tokens, truncated to 32,500 tokens if necessary.
    """
    # Ensure input is a string (concatenate if it's a list)
    if isinstance(text, list):
        text = ''.join(text)
    elif not isinstance(text, str):
        raise ValueError("Input must be a string or a list of strings.")
    
    # Use a base encoding like cl100k_base for GPT-style tokenization
    encoding = tiktoken.get_encoding("cl100k_base")
    
    # Tokenize the text into token IDs
    token_ids = encoding.encode(text)
    
    # Decode each token ID into its string representation
    token_strings = [encoding.decode_single_token_bytes(token_id).decode('utf-8', errors='replace') for token_id in token_ids]
    
    # Truncate if the number of tokens exceeds 32,500
    if len(token_strings) > 23000:
        token_strings = token_strings[:17000]
    
    # Join tokens back into a single string
    stringed_tokens = ''.join(token_strings)
    return stringed_tokens





def fetch_and_process_url(link):
    try:
        # Fetch URL content
        req = requests.get(link, headers={"User-Agent": "Mozilla/5.0"}, timeout=10)
        html_content = req.text  # Use raw HTML directly
        # Extract main content using trafilatura
        return trafilatura.extract(html_content)
    except Exception as e:
        return f"Error fetching or processing {link}: {e}"

def perform_search(query, num_results=5):
    try:
        # Perform Google search
        urls = [url for url in search(query, num_results=num_results)]
        print("URLs Found:")
        print(urls)
    except Exception as e:
        print(f"An error occurred during search: {e}")
        return

    # Fetch and process URLs in parallel
    with ThreadPoolExecutor(max_workers=30) as executor:
        results = list(executor.map(fetch_and_process_url, urls))

    # Combine results into a single formatted output
    formatted_text = '\n\n'.join(filter(None, results))  # Skip None or empty results
    return formatted_text

def chat(user_input,history):
    
    format_template = examples = """

    {"user_input": "cisco systems stock price for the last 4 days", "searches": ["cisco stock price last 4 days", "cisco systems stock historical data", "current price of Cisco Systems", "cisco stock price chart"]},
    {"user_input": "Apple stock price yesterday", "searches": ["Apple stock price yesterday", "historical price of Apple stock"]},
    {"user_input": "Tesla quarterly revenue", "searches": ["Tesla latest quarterly revenue", "Tesla revenue report Q3 2024"]},
    {"user_input": "CAPM model for Tesla", "searches": ["Tesla stock beta value", "current risk-free rate", "expected market return for CAPM model"]},
    {"user_input": "Hi", "searches": []},
    {"user_input": "Who are you?", "searches": []},
    {"user_input": "Google earnings per share last quarter", "searches": ["Google EPS last quarter", "Google quarterly earnings report"]},
    {"user_input": "Calculate WACC for Microsoft", "searches": ["Microsoft cost of equity", "Microsoft cost of debt", "Microsoft capital structure", "current risk-free rate", "Microsoft beta"]},
    {"user_input": "Show Amazon stock chart for last 5 years", "searches": ["Amazon stock chart last 5 years", "Amazon historical price data"]},
    {"user_input": "GDP of China in 2023", "searches": ["China GDP 2023", "latest GDP figures for China"]},
    {"user_input": "Portfolio optimization model", "searches": ["efficient frontier portfolio theory", "input data for portfolio optimization model", "expected returns and covariances"]},
    {"user_input": "Find current inflation rate in the US", "searches": ["current US inflation rate", "US CPI data"]},
    {"user_input": "What is NPV and how do you calculate it?", "searches": ["definition of NPV", "how to calculate NPV"]},
    {"user_input": "Dividend yield for Coca-Cola", "searches": ["Coca-Cola dividend yield", "latest Coca-Cola dividend data"]},
    {"user_input": "Sharpe ratio formula example", "searches": ["Sharpe ratio formula", "example calculation of Sharpe ratio"]},
    {"user_input": "What is the current Fed interest rate?", "searches": ["current Federal Reserve interest rate", "latest Fed interest rate decision"]},
    {"user_input": "Generate DCF model for Tesla", "searches": ["Tesla free cash flow data", "Tesla growth rate projections", "current discount rate for Tesla", "steps to build a DCF model"]},
    {"user_input": "Tell me a joke", "searches": []},
    {"user_input": "Explain the concept of opportunity cost", "searches": ["definition of opportunity cost", "examples of opportunity cost in economics"]}

"""

    
    
    search_messages = history or [{'role': 'system', 'content': 'you are IM.FIN'}]

    print(f'here is the search messages: \n\n\n\n {search_messages} \n\n\n')
    search_messages.append({'role':'user','content':f'based on {user_input} and {search_messages}, respond with a list of google searches that will give the correct data to respond, respond in this format: {format_template} with up to 3 searches but try and limit it to the minimum needed. RETURN 1 DICTIONARY IN THE SPECIFIED FORMAT BASED ON THE USER INPUT {user_input}. RETURN ABSOLUTELY NO OTHER TEXT OTHER THAN THE DICTIONARY WITH THE SEARCHES. here is the history use it {search_messages}. MAKE SURE YOU ALWAYS HAVE A , BETWEEN THE user_input and searches. only return one dictionary'})
    for value in dots_animation:
        yield value
    response_for_searches = client.chat.completions.create(
        model='Qwen/Qwen2.5-72B-Instruct',

        messages=search_messages
    )
    searches_resp = response_for_searches.choices[0].message.content
    yield dots_animation[1]
    print(f'search model response: {searches_resp}')
    searches = ast.literal_eval(searches_resp)
    search_messages.append(searches)
    
    print(searches)
    yield arrow_animation[0]
    summary_messages = [
        {'role':'system','content':'you are IM.FIN'}
    ]
    
    var = [perform_search(search) for search in searches['searches']]
    yield arrow_animation[1]
    
    var = tokenize_with_qwen(var)
    yield arrow_animation[2]
    print(f'the type of var is {type(var)}')
    var = ''.join(var)
    print(f'the data: {var}')

    

    
    yield arrow_animation[3]
    summary_messages.append({'role':'user','content':f'use {user_input} to summarize {var}, return nothing other than the summarized response. MAKE SURE TO PICK OUT THE NUMERICAL DATA BASED ON THE USER RESPONSE'})
    for value in arrow_animation:
        time.sleep(1)
        yield value
    response_for_chat = client.chat.completions.create(
        model='Qwen/Qwen2.5-72B-Instruct',

        messages=summary_messages,
        max_tokens=2000
    )

    summary = response_for_chat.choices[0].message.content
    chart_url = 'nonethereyet'
    ### possible chart generation
    
    name_of_file = f"dynamic_chart{random.randint(0,1000)}.png"
    config = generate_quickchart_config(user_input, summary)
    if config:
        generate_chart(config, filename=name_of_file)
        image_path = f'{name_of_file}'
        chart_url = upload_to_catbox(f'charts/{image_path}')
    else:
        print("No chart was generated.")
        pass

    for value in arrow_animation:
        
        yield value
    final_messages = [
        {'role':'system','content':'you are IM.FIN, you are a virtual stock analyst built to automate investing tasks and simulate the intelligence of stock analysts, you can form opinions based on data and form conclusions like stock analysts, you were created by quantineuron.com. KEEP RESPONSES CONCISE, ANSWERING THE USERS INPUT '}
    ]
   
    print(f'here is the summary: {summary}')
    final_messages.append({'role':'user','content': f'based on this data {summary}, answer {user_input}, here is the history {final_messages}. ONLY USE THE DATA THAT IS NEEDED AND ACT AS THOUGH THAT DATA IS YOURS AND CORRECT. KEEP RESPONSES CONCISE. IF THE DATA PROVIDED IS NOT RELEVANT TO THE USERS REQUEST, IGNORE IT AND ANSWER NORMALLY. IF THE USER ASKS FOR ANY TYPE OF CHART, DO NOT ATTEMPT TO MAKE IT YOURSELF, SIMPLY MAKE A TABLE WITH THE DATA, THE CHART WILL BE FOUND AT THIS URL: {chart_url} SO SAY TO THE USER IT IS THERE AND LINK IT TO THEM, IF THEY HAVE NOT ASKED FOR A CHART, DO NOT INCLUDE THE URL IN YOUR RESPONSE '})
    yield typing_animation[0]
    final_response = client.chat.completions.create(
        model='Qwen/Qwen2.5-72B-Instruct',
   
        messages=final_messages,
        max_tokens=2000,
        stream=True
    )
    yield typing_animation[1]
    response = ""
    for chunk in final_response:
        content = chunk.choices[0].delta.content or ''
        response += content
    
        yield response
    


    final_messages.append(response)
    search_messages.append(response)
    
    print(f'\n\n here is the chat history for the final response \n\n\n {response}')
    
    


avatar = 'https://quantineuron.com/wp-content/uploads/2024/08/cropped-final-logo-with-background-removed.png'


theme = gr.themes.Soft(
    primary_hue="sky",
    neutral_hue="zinc",
)



# Add the CSS to the ChatInterface
gr.ChatInterface(
    
    
    fn=chat,
    theme=theme
).launch()