joy-caption-alpha-two

Runtime error

App Files Files Community

svjack commited on Jan 18

Commit

957334c

verified ·

1 Parent(s): 565c0d8

Update caption_generator.py

Browse files

Files changed (1) hide show

caption_generator.py +30 -16

caption_generator.py CHANGED Viewed

@@ -1,5 +1,5 @@
 '''
-python caption_generator.py /path/to/input/image.jpg /path/to/output/directory --caption_type "Descriptive" --caption_length "long" --extra_options 0 2 5 --name_input "John"
 '''
 import argparse
@@ -9,6 +9,7 @@ from torch import nn
 from transformers import AutoModel, AutoProcessor, AutoTokenizer, PreTrainedTokenizer, PreTrainedTokenizerFast, AutoModelForCausalLM
 from PIL import Image
 import torchvision.transforms.functional as TVF
 # Constants
 CLIP_PATH = "google/siglip-so400m-patch14-384"
@@ -174,8 +175,8 @@ def generate_caption(input_image: Image.Image, caption_type: str, caption_length
 # Main function
 def main():
     parser = argparse.ArgumentParser(description="Generate a caption for an image.")
-    parser.add_argument("input_image", type=str, help="Path to the input image")
-    parser.add_argument("output_path", type=str, help="Path to save the output caption and image")
     parser.add_argument("--caption_type", type=str, default="Descriptive", choices=CAPTION_TYPE_MAP.keys(), help="Type of caption to generate")
     parser.add_argument("--caption_length", type=str, default="long", help="Length of the caption")
     parser.add_argument("--extra_options", nargs="*", type=int, default=[], help="Extra options for caption generation (provide IDs separated by spaces)")
@@ -189,24 +190,37 @@ def main():
     # Load models
     clip_processor, clip_model, tokenizer, text_model, image_adapter = load_models()
-    # Open the input image
-    input_image = Image.open(args.input_image)
-    # Generate caption
-    prompt_str, caption = generate_caption(input_image, args.caption_type, args.caption_length, selected_extra_options, args.name_input, args.custom_prompt, clip_processor, clip_model, tokenizer, text_model, image_adapter)
-    # Save caption and image
     output_path = Path(args.output_path)
     output_path.mkdir(parents=True, exist_ok=True)
-    image_name = Path(args.input_image).name.replace(" ", "_")
-    output_image_path = output_path / image_name
-    input_image.save(output_image_path)
-    txt_file_path = output_path / f"{output_image_path.stem}.txt"
-    with open(txt_file_path, "w") as f:
-        f.write(f"Prompt: {prompt_str}\n\nCaption: {caption}")
-    print(f"Caption saved to {txt_file_path}")
 if __name__ == "__main__":
     # Print extra options with IDs for reference

 '''
+python caption_generator.py /path/to/input /path/to/output/directory --caption_type "Descriptive" --caption_length "long" --extra_options 0 2 5 --name_input "John"
 '''
 import argparse
 from transformers import AutoModel, AutoProcessor, AutoTokenizer, PreTrainedTokenizer, PreTrainedTokenizerFast, AutoModelForCausalLM
 from PIL import Image
 import torchvision.transforms.functional as TVF
+from tqdm import tqdm  # 引入 tqdm 用于显示进度条
 # Constants
 CLIP_PATH = "google/siglip-so400m-patch14-384"
 # Main function
 def main():
     parser = argparse.ArgumentParser(description="Generate a caption for an image.")
+    parser.add_argument("input_path", type=str, help="Path to the input image or directory containing images")
+    parser.add_argument("output_path", type=str, help="Path to save the output captions and images")
     parser.add_argument("--caption_type", type=str, default="Descriptive", choices=CAPTION_TYPE_MAP.keys(), help="Type of caption to generate")
     parser.add_argument("--caption_length", type=str, default="long", help="Length of the caption")
     parser.add_argument("--extra_options", nargs="*", type=int, default=[], help="Extra options for caption generation (provide IDs separated by spaces)")
     # Load models
     clip_processor, clip_model, tokenizer, text_model, image_adapter = load_models()
+    # Determine if input is a directory or a single file
+    input_path = Path(args.input_path)
+    if input_path.is_dir():
+        image_paths = list(input_path.glob("*.[pjP][npP][gG]")) + list(input_path.glob("*.[jJ][pP][eE][gG]"))  # 支持 PNG 和 JPEG 格式
+    else:
+        image_paths = [input_path]
+    # Create output directory if it doesn't exist
     output_path = Path(args.output_path)
     output_path.mkdir(parents=True, exist_ok=True)
+    # Process each image
+    for image_path in tqdm(image_paths, desc="Processing images"):
+        try:
+            # Open the input image
+            input_image = Image.open(image_path)
+            # Generate caption
+            prompt_str, caption = generate_caption(input_image, args.caption_type, args.caption_length, selected_extra_options, args.name_input, args.custom_prompt, clip_processor, clip_model, tokenizer, text_model, image_adapter)
+            # Save caption and image
+            image_name = image_path.name.replace(" ", "_")
+            output_image_path = output_path / image_name
+            input_image.save(output_image_path)
+            txt_file_path = output_path / f"{output_image_path.stem}.txt"
+            with open(txt_file_path, "w") as f:
+                f.write(f"Prompt: {prompt_str}\n\nCaption: {caption}")
+        except Exception as e:
+            print(f"Error processing {image_path}: {e}")
 if __name__ == "__main__":
     # Print extra options with IDs for reference