How to use local dir model?
I would like to have script to load model from local dir but local_path doesn't work, do you have solution?
#!/usr/bin/env python3
import os
import sys
def activate_virtualenv(venv_path):
"""
Activates a virtual environment by modifying sys.path and environment variables.
"""
# Check if the virtual environment exists
if not os.path.exists(venv_path):
raise FileNotFoundError(f"Virtual environment not found at: {venv_path}")
# Add the virtual environment's site-packages to sys.path
python_version = f"python{sys.version_info.major}.{sys.version_info.minor}"
site_packages = os.path.join(venv_path, "lib", python_version, "site-packages")
if not os.path.exists(site_packages):
raise FileNotFoundError(f"Site-packages directory not found: {site_packages}")
sys.path.insert(0, site_packages)
# Set the VIRTUAL_ENV environment variable
os.environ["VIRTUAL_ENV"] = venv_path
# Add the virtual environment's bin directory to PATH
venv_bin = os.path.join(venv_path, "bin")
os.environ["PATH"] = venv_bin + os.pathsep + os.environ.get("PATH", "")
def main(file_path, prompt=None):
# Define the path to your virtual environment
venv_path = '/home/data1/protected/venv'
# Activate the virtual environment
activate_virtualenv(venv_path)
try:
# Import the required module
from namo.api.vl import VLInfer
except ImportError as e:
print(f"Error importing required modules: {e}")
print("Please ensure the virtual environment is set up correctly and all dependencies are installed.")
sys.exit(1)
# Define the full path to your model
model_path = "/mnt/data/LLM/lucasjin/Namo-500M-V1"
# Load the model from the specified path
try:
model = VLInfer(model_path=model_path)
except Exception as e:
print(f"Error loading the model from {model_path}: {e}")
sys.exit(1)
# Generate response
if prompt:
response = model.generate(prompt, file_path)
else:
response = model.generate(file_path)
print(response)
if __name__ == "__main__":
# Check if the required arguments are provided
if len(sys.argv) < 2:
print("Usage: python script.py <file_path> [prompt]")
sys.exit(1)
# Get arguments from the command line
file_path = sys.argv[1]
prompt = sys.argv[2] if len(sys.argv) > 2 else None
# Execute the main function
main(file_path, prompt)
#!/usr/bin/env python3
import os
import sys
def activate_virtualenv(venv_path):
"""
Activates a virtual environment by modifying sys.path and environment variables.
"""
# Check if the virtual environment exists
if not os.path.exists(venv_path):
raise FileNotFoundError(f"Virtual environment not found at: {venv_path}")
# Add the virtual environment's site-packages to sys.path
python_version = f"python{sys.version_info.major}.{sys.version_info.minor}"
site_packages = os.path.join(venv_path, "lib", python_version, "site-packages")
if not os.path.exists(site_packages):
raise FileNotFoundError(f"Site-packages directory not found: {site_packages}")
sys.path.insert(0, site_packages)
# Set the VIRTUAL_ENV environment variable
os.environ["VIRTUAL_ENV"] = venv_path
# Add the virtual environment's bin directory to PATH
venv_bin = os.path.join(venv_path, "bin")
os.environ["PATH"] = venv_bin + os.pathsep + os.environ.get("PATH", "")
def main(file_path, prompt=None):
# Define the path to your virtual environment
venv_path = '/home/data1/protected/namo'
# Activate the virtual environment
activate_virtualenv(venv_path)
try:
# Import the required module
from namo.api.vl import VLInfer
except ImportError as e:
print(f"Error importing required modules: {e}")
print("Please ensure the virtual environment is set up correctly and all dependencies are installed.")
sys.exit(1)
# Load the model using the correct initialization method
try:
# Use the correct argument expected by VLInfer (e.g., model_type)
model = VLInfer(model_type='namo') # Replace 'namo' with the correct model type if needed
except Exception as e:
print(f"Error loading the model: {e}")
sys.exit(1)
# Define the default prompt
default_prompt = (
"If the image contains text, perform OCR to extract the text. "
"If the image is a photo, describe it in detail, including objects, colors, and any relevant context."
)
# Use the provided prompt or the default prompt
final_prompt = prompt if prompt else default_prompt
# Generate response
try:
response = model.generate(final_prompt, images=[file_path])
except Exception as e:
print(f"Error generating response: {e}")
sys.exit(1)
print(response)
if __name__ == "__main__":
# Check if the required arguments are provided
if len(sys.argv) < 2:
print("Usage: python script.py <file_path> [prompt]")
sys.exit(1)
# Get arguments from the command line
file_path = sys.argv[1]
prompt = sys.argv[2] if len(sys.argv) > 2 else None
# Execute the main function
main(file_path, prompt)
The above script works, but I wish I would know how to remove all the logging, that I get only the output.
rcd-llm-namo-describe-image.sh electric-bill.jpg 2> /dev/null | grep -v -e "==>"
that is the way I am using it. Though with standard prompt I am getting now "None" at the end.
Is there special function or settings for OCR?
If I give the above prompt, I get:
The image is a screenshot of a financial document, specifically a statement from Elmhurst Mutual Power & Light Company. The document is structured with various sections, including a summary of the account, a breakdown of the monthly energy use, and a comparison of the total energy use over different time periods. The text is in English and is presented in a mix of bold and regular fonts, with some sections highlighted in blue to draw attention to specific figures or data.
The document includes contact information for the company, including a phone number, fax number, and email address. There are also instructions for the user to fill out a form and to provide their name, address, and phone number. The document is dated 03/23/2025, and the account number is 82135. The document is printed on white paper and has a watermark that reads "DOOM 5312 4000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
None
then if I do following on same document:rcd-llm-namo-describe-image.sh electric-bill.jpg "OCR this text" 2> /dev/null | grep -v -e "==>"
$231.35
None
So it doesn't really do OCR well on the electric bill full of text and numbers.