How to use local dir model?

by JLouisBiz - opened 10 days ago

10 days ago

I would like to have script to load model from local dir but local_path doesn't work, do you have solution?

#!/usr/bin/env python3

import os
import sys

def activate_virtualenv(venv_path):
    """
    Activates a virtual environment by modifying sys.path and environment variables.
    """
    # Check if the virtual environment exists
    if not os.path.exists(venv_path):
        raise FileNotFoundError(f"Virtual environment not found at: {venv_path}")

    # Add the virtual environment's site-packages to sys.path
    python_version = f"python{sys.version_info.major}.{sys.version_info.minor}"
    site_packages = os.path.join(venv_path, "lib", python_version, "site-packages")
    
    if not os.path.exists(site_packages):
        raise FileNotFoundError(f"Site-packages directory not found: {site_packages}")

    sys.path.insert(0, site_packages)

    # Set the VIRTUAL_ENV environment variable
    os.environ["VIRTUAL_ENV"] = venv_path

    # Add the virtual environment's bin directory to PATH
    venv_bin = os.path.join(venv_path, "bin")
    os.environ["PATH"] = venv_bin + os.pathsep + os.environ.get("PATH", "")

def main(file_path, prompt=None):
    # Define the path to your virtual environment
    venv_path = '/home/data1/protected/venv'
    
    # Activate the virtual environment
    activate_virtualenv(venv_path)

    try:
        # Import the required module
        from namo.api.vl import VLInfer
    except ImportError as e:
        print(f"Error importing required modules: {e}")
        print("Please ensure the virtual environment is set up correctly and all dependencies are installed.")
        sys.exit(1)

    # Define the full path to your model
    model_path = "/mnt/data/LLM/lucasjin/Namo-500M-V1"

    # Load the model from the specified path
    try:
        model = VLInfer(model_path=model_path)
    except Exception as e:
        print(f"Error loading the model from {model_path}: {e}")
        sys.exit(1)

    # Generate response
    if prompt:
        response = model.generate(prompt, file_path)
    else:
        response = model.generate(file_path)
    
    print(response)

if __name__ == "__main__":
    # Check if the required arguments are provided
    if len(sys.argv) < 2:
        print("Usage: python script.py <file_path> [prompt]")
        sys.exit(1)

    # Get arguments from the command line
    file_path = sys.argv[1]
    prompt = sys.argv[2] if len(sys.argv) > 2 else None

    # Execute the main function
    main(file_path, prompt)

JLouisBiz

10 days ago

•

edited 10 days ago

#!/usr/bin/env python3

import os
import sys

def activate_virtualenv(venv_path):
    """
    Activates a virtual environment by modifying sys.path and environment variables.
    """
    # Check if the virtual environment exists
    if not os.path.exists(venv_path):
        raise FileNotFoundError(f"Virtual environment not found at: {venv_path}")

    # Add the virtual environment's site-packages to sys.path
    python_version = f"python{sys.version_info.major}.{sys.version_info.minor}"
    site_packages = os.path.join(venv_path, "lib", python_version, "site-packages")
    
    if not os.path.exists(site_packages):
        raise FileNotFoundError(f"Site-packages directory not found: {site_packages}")

    sys.path.insert(0, site_packages)

    # Set the VIRTUAL_ENV environment variable
    os.environ["VIRTUAL_ENV"] = venv_path

    # Add the virtual environment's bin directory to PATH
    venv_bin = os.path.join(venv_path, "bin")
    os.environ["PATH"] = venv_bin + os.pathsep + os.environ.get("PATH", "")

def main(file_path, prompt=None):
    # Define the path to your virtual environment
    venv_path = '/home/data1/protected/namo'
    
    # Activate the virtual environment
    activate_virtualenv(venv_path)

    try:
        # Import the required module
        from namo.api.vl import VLInfer
    except ImportError as e:
        print(f"Error importing required modules: {e}")
        print("Please ensure the virtual environment is set up correctly and all dependencies are installed.")
        sys.exit(1)

    # Load the model using the correct initialization method
    try:
        # Use the correct argument expected by VLInfer (e.g., model_type)
        model = VLInfer(model_type='namo')  # Replace 'namo' with the correct model type if needed
    except Exception as e:
        print(f"Error loading the model: {e}")
        sys.exit(1)

    # Define the default prompt
    default_prompt = (
        "If the image contains text, perform OCR to extract the text. "
        "If the image is a photo, describe it in detail, including objects, colors, and any relevant context."
    )

    # Use the provided prompt or the default prompt
    final_prompt = prompt if prompt else default_prompt

    # Generate response
    try:
        response = model.generate(final_prompt, images=[file_path])
    except Exception as e:
        print(f"Error generating response: {e}")
        sys.exit(1)
    
    print(response)

if __name__ == "__main__":
    # Check if the required arguments are provided
    if len(sys.argv) < 2:
        print("Usage: python script.py <file_path> [prompt]")
        sys.exit(1)

    # Get arguments from the command line
    file_path = sys.argv[1]
    prompt = sys.argv[2] if len(sys.argv) > 2 else None

    # Execute the main function
    main(file_path, prompt)

The above script works, but I wish I would know how to remove all the logging, that I get only the output.

JLouisBiz

10 days ago

rcd-llm-namo-describe-image.sh electric-bill.jpg  2> /dev/null | grep -v -e "==>"

that is the way I am using it. Though with standard prompt I am getting now "None" at the end.

Is there special function or settings for OCR?

If I give the above prompt, I get:

The image is a screenshot of a financial document, specifically a statement from Elmhurst Mutual Power & Light Company. The document is structured with various sections, including a summary of the account, a breakdown of the monthly energy use, and a comparison of the total energy use over different time periods. The text is in English and is presented in a mix of bold and regular fonts, with some sections highlighted in blue to draw attention to specific figures or data.

The document includes contact information for the company, including a phone number, fax number, and email address. There are also instructions for the user to fill out a form and to provide their name, address, and phone number. The document is dated 03/23/2025, and the account number is 82135. The document is printed on white paper and has a watermark that reads "DOOM 5312 4000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
None

then if I do following on same document:
rcd-llm-namo-describe-image.sh electric-bill.jpg "OCR this text" 2> /dev/null | grep -v -e "==>"

$231.35
None

So it doesn't really do OCR well on the electric bill full of text and numbers.

Upload images, audio, and videos by dragging in the text input, pasting, or clicking here.

Tap or paste here to upload images

Your need to confirm your account before you can post a new comment.

· Sign up or log in to comment