File size: 1,755 Bytes
3af157b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import streamlit as st
import pathlib

from huggingface_hub import hf_hub_download
from langchain_community.llms import LlamaCpp
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate


@st.cache_resource()
def load_llm(repo_id, filename):
    # Create a directory for models if it doesn't exist
    models_folder = pathlib.Path("models")
    models_folder.mkdir(exist_ok=True)

    # Download the model
    model_path = hf_hub_download(
        repo_id=repo_id, filename=filename, local_dir=models_folder
    )

    llm = LlamaCpp(
        model_path=model_path,
        repo_id=repo_id,
        filename=filename,
        verbose=False,
        use_mmap=True,
        use_mlock=True,
        n_threads=4,
        n_threads_batch=4,
        n_ctx=8000,
    )
    print(f"{repo_id} loaded successfully. ✅")
    return llm


# Streamed response emulator
def response_generator(llm, messages, question, retriever):
    system_prompt = (
        "You are an assistant for question-answering tasks. "
        "Use the following pieces of retrieved context to answer "
        "the question. If you don't know the answer, say that you "
        "don't know. Use three sentences maximum and keep the "
        "answer concise."
        "\n\n"
        "{context}"
    )

    prompt = ChatPromptTemplate.from_messages(
        [
            ("system", system_prompt),
            ("user", "{input}"),
        ]
    )

    question_answer_chain = create_stuff_documents_chain(llm, prompt)
    rag_chain = create_retrieval_chain(retriever, question_answer_chain)

    results = rag_chain.invoke({"input": question})

    return results