How to use local dir model?

#4
by JLouisBiz - opened

I would like to have script to load model from local dir but local_path doesn't work, do you have solution?

#!/usr/bin/env python3

import os
import sys

def activate_virtualenv(venv_path):
    """
    Activates a virtual environment by modifying sys.path and environment variables.
    """
    # Check if the virtual environment exists
    if not os.path.exists(venv_path):
        raise FileNotFoundError(f"Virtual environment not found at: {venv_path}")

    # Add the virtual environment's site-packages to sys.path
    python_version = f"python{sys.version_info.major}.{sys.version_info.minor}"
    site_packages = os.path.join(venv_path, "lib", python_version, "site-packages")
    
    if not os.path.exists(site_packages):
        raise FileNotFoundError(f"Site-packages directory not found: {site_packages}")

    sys.path.insert(0, site_packages)

    # Set the VIRTUAL_ENV environment variable
    os.environ["VIRTUAL_ENV"] = venv_path

    # Add the virtual environment's bin directory to PATH
    venv_bin = os.path.join(venv_path, "bin")
    os.environ["PATH"] = venv_bin + os.pathsep + os.environ.get("PATH", "")

def main(file_path, prompt=None):
    # Define the path to your virtual environment
    venv_path = '/home/data1/protected/venv'
    
    # Activate the virtual environment
    activate_virtualenv(venv_path)

    try:
        # Import the required module
        from namo.api.vl import VLInfer
    except ImportError as e:
        print(f"Error importing required modules: {e}")
        print("Please ensure the virtual environment is set up correctly and all dependencies are installed.")
        sys.exit(1)

    # Define the full path to your model
    model_path = "/mnt/data/LLM/lucasjin/Namo-500M-V1"

    # Load the model from the specified path
    try:
        model = VLInfer(model_path=model_path)
    except Exception as e:
        print(f"Error loading the model from {model_path}: {e}")
        sys.exit(1)

    # Generate response
    if prompt:
        response = model.generate(prompt, file_path)
    else:
        response = model.generate(file_path)
    
    print(response)

if __name__ == "__main__":
    # Check if the required arguments are provided
    if len(sys.argv) < 2:
        print("Usage: python script.py <file_path> [prompt]")
        sys.exit(1)

    # Get arguments from the command line
    file_path = sys.argv[1]
    prompt = sys.argv[2] if len(sys.argv) > 2 else None

    # Execute the main function
    main(file_path, prompt)
#!/usr/bin/env python3

import os
import sys

def activate_virtualenv(venv_path):
    """
    Activates a virtual environment by modifying sys.path and environment variables.
    """
    # Check if the virtual environment exists
    if not os.path.exists(venv_path):
        raise FileNotFoundError(f"Virtual environment not found at: {venv_path}")

    # Add the virtual environment's site-packages to sys.path
    python_version = f"python{sys.version_info.major}.{sys.version_info.minor}"
    site_packages = os.path.join(venv_path, "lib", python_version, "site-packages")
    
    if not os.path.exists(site_packages):
        raise FileNotFoundError(f"Site-packages directory not found: {site_packages}")

    sys.path.insert(0, site_packages)

    # Set the VIRTUAL_ENV environment variable
    os.environ["VIRTUAL_ENV"] = venv_path

    # Add the virtual environment's bin directory to PATH
    venv_bin = os.path.join(venv_path, "bin")
    os.environ["PATH"] = venv_bin + os.pathsep + os.environ.get("PATH", "")

def main(file_path, prompt=None):
    # Define the path to your virtual environment
    venv_path = '/home/data1/protected/namo'
    
    # Activate the virtual environment
    activate_virtualenv(venv_path)

    try:
        # Import the required module
        from namo.api.vl import VLInfer
    except ImportError as e:
        print(f"Error importing required modules: {e}")
        print("Please ensure the virtual environment is set up correctly and all dependencies are installed.")
        sys.exit(1)

    # Load the model using the correct initialization method
    try:
        # Use the correct argument expected by VLInfer (e.g., model_type)
        model = VLInfer(model_type='namo')  # Replace 'namo' with the correct model type if needed
    except Exception as e:
        print(f"Error loading the model: {e}")
        sys.exit(1)

    # Define the default prompt
    default_prompt = (
        "If the image contains text, perform OCR to extract the text. "
        "If the image is a photo, describe it in detail, including objects, colors, and any relevant context."
    )

    # Use the provided prompt or the default prompt
    final_prompt = prompt if prompt else default_prompt

    # Generate response
    try:
        response = model.generate(final_prompt, images=[file_path])
    except Exception as e:
        print(f"Error generating response: {e}")
        sys.exit(1)
    
    print(response)

if __name__ == "__main__":
    # Check if the required arguments are provided
    if len(sys.argv) < 2:
        print("Usage: python script.py <file_path> [prompt]")
        sys.exit(1)

    # Get arguments from the command line
    file_path = sys.argv[1]
    prompt = sys.argv[2] if len(sys.argv) > 2 else None

    # Execute the main function
    main(file_path, prompt)
    

The above script works, but I wish I would know how to remove all the logging, that I get only the output.

rcd-llm-namo-describe-image.sh electric-bill.jpg  2> /dev/null | grep -v -e "==>"

that is the way I am using it. Though with standard prompt I am getting now "None" at the end.

Is there special function or settings for OCR?

If I give the above prompt, I get:

The image is a screenshot of a financial document, specifically a statement from Elmhurst Mutual Power & Light Company. The document is structured with various sections, including a summary of the account, a breakdown of the monthly energy use, and a comparison of the total energy use over different time periods. The text is in English and is presented in a mix of bold and regular fonts, with some sections highlighted in blue to draw attention to specific figures or data.

The document includes contact information for the company, including a phone number, fax number, and email address. There are also instructions for the user to fill out a form and to provide their name, address, and phone number. The document is dated 03/23/2025, and the account number is 82135. The document is printed on white paper and has a watermark that reads "DOOM 5312 4000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
None

then if I do following on same document:
rcd-llm-namo-describe-image.sh electric-bill.jpg "OCR this text" 2> /dev/null | grep -v -e "==>"

$231.35
None

So it doesn't really do OCR well on the electric bill full of text and numbers.

Your need to confirm your account before you can post a new comment.

Sign up or log in to comment