Spaces:

RajatMalviya
/

telecom

Runtime error

App Files Files Community

RajatMalviya commited on Apr 5

Commit

f673b98

verified ·

1 Parent(s): bea810c

Update app.py

Browse files

Files changed (1) hide show

app.py +103 -400

app.py CHANGED Viewed

@@ -1,416 +1,119 @@
-import gradio as gr
-import ffmpeg
 import os
-import uuid
-import requests
-import tempfile
-import shutil
-import re
 import time
-import concurrent.futures
 import torch
-from pathlib import Path
-from dotenv import load_dotenv
-from transformers import AutoProcessor, AutoModelForCausalLM, AutoTokenizer
-from diffusers import StableDiffusionPipeline, DPMSolverMultistepScheduler
 from PIL import Image
-import tempfile
-# os.makedirs("./hf_cache", exist_ok=True)
-# os.environ["HF_HOME"] = "./hf_cache"
-# os.environ["TRANSFORMERS_CACHE"] = "./hf_cache/transformers"
-# os.environ["HUGGINGFACE_HUB_CACHE"] = "./hf_cache/hub"
-# Use system temp directories which should be writable
-TMP_DIR = tempfile.gettempdir()
-MODEL_DIR = os.path.join(TMP_DIR, "hf_models")
-# Set environment variables to use these directories
-os.environ["TRANSFORMERS_CACHE"] = os.path.join(TMP_DIR, "transformers_cache")
-os.environ["HF_HOME"] = os.path.join(TMP_DIR, "hf_home")
-os.environ["HUGGINGFACE_HUB_CACHE"] = os.path.join(TMP_DIR, "hf_hub_cache")
-# Helper function to safely create directories
-def safe_makedirs(directory):
     try:
-        os.makedirs(directory, exist_ok=True)
-        return True
-    except (PermissionError, OSError) as e:
-        print(f"Warning: Could not create directory {directory}: {e}")
-        return False
-# Create necessary directories
-for directory in [MODEL_DIR, os.environ["TRANSFORMERS_CACHE"],
-                 os.environ["HF_HOME"], os.environ["HUGGINGFACE_HUB_CACHE"]]:
-    safe_makedirs(directory)
-# Add GPU decorator for Hugging Face Spaces
-try:
-    from spaces import GPU
-    use_gpu = True
-    @GPU
-    def get_gpu():
-        return True
-    # Call the function to trigger GPU allocation
-    get_gpu()
-except ImportError:
-    use_gpu = False
-    print("Running without GPU acceleration")
-# Load environment variables from .env file if it exists
-load_dotenv()
-# Global variables to hold models (lazy loading)
-llava_model = None
-llava_processor = None
-stable_diffusion_pipeline = None
-# Set up the model directory
-MODEL_DIR = "./model"
-os.makedirs(MODEL_DIR, exist_ok=True)
-def load_llava_model():
-    """Load LLaVA model for image captioning"""
-    global llava_model, llava_processor
-    if llava_model is None or llava_processor is None:
-        print("Loading LLaVA model for image analysis...")
-        model_id = "llava-hf/llava-1.5-7b-hf"
         try:
-            # Load processor and model with system temp directory
-            llava_processor = AutoProcessor.from_pretrained(
-                model_id,
-                local_files_only=False
-            )
-            llava_model = AutoModelForCausalLM.from_pretrained(
-                model_id,
-                torch_dtype=torch.float16,
-                device_map="auto",
-                local_files_only=False
-            )
         except Exception as e:
-            print(f"Error loading LLaVA model: {e}")
-            raise
-    return llava_model, llava_processor
-# In the stylize_video function, replace:
-os.makedirs("outputs", exist_ok=True)
-persistent_output = os.path.join("outputs", f"stylized_{uuid.uuid4()}.mp4")
-# With:
-outputs_dir = os.path.join(TMP_DIR, "outputs")
-safe_makedirs(outputs_dir)
-persistent_output = os.path.join(outputs_dir, f"stylized_{uuid.uuid4()}.mp4")
-def load_stable_diffusion_model():
-    """Load Stable Diffusion model for Ghibli-style image generation"""
-    global stable_diffusion_pipeline
-    if stable_diffusion_pipeline is None:
-        print("Loading Stable Diffusion model for image generation...")
-        model_id = "nitrosocke/Ghibli-Diffusion"
-        # Load the pipeline with precision to balance memory usage and quality
-        stable_diffusion_pipeline = StableDiffusionPipeline.from_pretrained(
-            model_id,
-            torch_dtype=torch.float16,
-            safety_checker=None,  # Disable safety checker for performance
-            cache_dir=os.path.join(MODEL_DIR, "stable_diffusion")
-        )
-        # Move to GPU if available
-        if torch.cuda.is_available():
-            stable_diffusion_pipeline = stable_diffusion_pipeline.to("cuda")
-        # Use the DPM-Solver++ scheduler for better quality at lower steps
-        stable_diffusion_pipeline.scheduler = DPMSolverMultistepScheduler.from_config(
-            stable_diffusion_pipeline.scheduler.config,
-            algorithm_type="dpmsolver++",
-            use_karras_sigmas=True
-        )
-    return stable_diffusion_pipeline
-def analyze_image_with_llava(image_path):
-    """Process a single frame with LLaVA to generate a description"""
-    try:
-        # Load the model if not already loaded
-        model, processor = load_llava_model()
-        # Load the image
-        image = Image.open(image_path)
-        # Prompt for Ghibli-specific description
-        prompt = "Describe this image in detail, focusing on elements that would be important to recreate it in Studio Ghibli animation style."
-        # Process the image and generate text
-        inputs = processor(prompt, image, return_tensors="pt").to(model.device)
-        # Generate with appropriate parameters
-        with torch.no_grad():
-            output = model.generate(
-                **inputs,
-                max_new_tokens=300,
-                do_sample=True,
-                temperature=0.7,
-                top_p=0.9,
-            )
-        # Decode the output
-        generated_text = processor.decode(output[0], skip_special_tokens=True)
-        # Extract just the response part (remove the prompt)
-        response = generated_text.split(prompt)[-1].strip()
-        print(f"LLaVA analysis for frame {os.path.basename(image_path)}: {response[:150]}...")
-        return response
-    except Exception as e:
-        import traceback
-        print(f"Error analyzing image {os.path.basename(image_path)}: {str(e)}")
-        print(traceback.format_exc())
-        return f"Error analyzing image: {str(e)}"
-def generate_ghibli_image(image_description, style_prompt, output_path):
-    """Generate a Ghibli-style image based on the description using Stable Diffusion"""
-    try:
-        # Load the model if not already loaded
-        pipeline = load_stable_diffusion_model()
-        # Combine the image description with the style prompt
-        full_prompt = f"{image_description}. {style_prompt}. Hand-drawn animation style, soft colors, attention to detail, Miyazaki aesthetic."
-        # Ensure prompt isn't too long
-        if len(full_prompt) > 500:
-            full_prompt = full_prompt[:497] + "..."
-        # Generate the image
-        with torch.autocast("cuda" if torch.cuda.is_available() else "cpu"):
-            image = pipeline(
-                prompt=full_prompt,
-                negative_prompt="3d, cgi, low quality, blurry, distorted, deformed",
-                num_inference_steps=30,
-                guidance_scale=7.5,
-                width=768,
-                height=768,
             ).images[0]
-        # Save the generated image
-        image.save(output_path)
-        print(f"Successfully saved stylized frame: {os.path.basename(output_path)}")
-        return True
-    except Exception as e:
-        import traceback
-        print(f"Error generating image: {str(e)}")
-        print(traceback.format_exc())
-        return False
-def process_frame(frame_path, style_prompt):
-    """Process a single frame with LLaVA analysis and Stable Diffusion generation"""
-    try:
-        # First use LLaVA to analyze the image
-        image_description = analyze_image_with_llava(frame_path)
-        if image_description.startswith("Error"):
-            return False
-        # Now use Stable Diffusion to generate a stylized version
-        result = generate_ghibli_image(image_description, style_prompt, frame_path)
-        return result
-    except Exception as e:
-        import traceback
-        print(f"Error processing frame {os.path.basename(frame_path)}: {str(e)}")
-        print(traceback.format_exc())
-        return False
-def stylize_video(video_path, style_prompt, num_frames=15):
-    try:
-        # Create temp directories
-        temp_dir = tempfile.mkdtemp()
-        input_filename = os.path.join(temp_dir, "input.mp4")
-        frames_dir = os.path.join(temp_dir, "frames")
-        os.makedirs(frames_dir, exist_ok=True)
-        # Save the input video to a temporary file
-        if isinstance(video_path, str):
-            if video_path.startswith('http'):
-                # It's a URL, download it
-                response = requests.get(video_path, stream=True)
-                with open(input_filename, 'wb') as f:
-                    for chunk in response.iter_content(chunk_size=8192):
-                        f.write(chunk)
-            elif os.path.exists(video_path):
-                # It's a file path, copy it
-                shutil.copy(video_path, input_filename)
-            else:
-                return None, f"Video file not found: {video_path}"
-        else:
-            # Assume it's binary data
-            with open(input_filename, "wb") as f:
-                f.write(video_path)
-        # Make sure the video file exists
-        if not os.path.exists(input_filename):
-            return None, "Failed to save input video"
-        # Extract frames - using lower fps for longer videos (1 frame per second)
-        ffmpeg.input(input_filename).output(f"{frames_dir}/%04d.png", vf="fps=1").run(quiet=True)
-        # Check if frames were extracted
-        frames = sorted([os.path.join(frames_dir, f) for f in os.listdir(frames_dir) if f.endswith('.png')])
-        if not frames:
-            return None, "No frames were extracted from the video"
-        # Limit to a maximum number of frames for reasonable processing times
-        if len(frames) > num_frames:
-            # Take evenly distributed frames
-            indices = [int(i * (len(frames) - 1) / (num_frames - 1)) for i in range(num_frames)]
-            frames = [frames[i] for i in indices]
-        print(f"Processing {len(frames)} frames")
-        # Process frames sequentially if we're using a GPU (to avoid CUDA OOM errors)
-        # Otherwise, use a modest level of parallelism
-        if torch.cuda.is_available():
-            # Sequential processing to avoid CUDA OOM errors
-            processed_frames = []
-            for i, frame in enumerate(frames):
-                success = process_frame(frame, style_prompt)
-                if success:
-                    processed_frames.append(frame)
-                    print(f"Completed frame {os.path.basename(frame)} ({i+1}/{len(frames)})")
-                else:
-                    print(f"Failed to process frame {os.path.basename(frame)}")
-                # Free up CUDA cache between frames
-                torch.cuda.empty_cache()
-        else:
-            # Process frames in parallel with limited workers if no GPU
-            with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
-                futures = {executor.submit(process_frame, frame, style_prompt): frame for frame in frames}
-                # Collect results
-                processed_frames = []
-                for future in concurrent.futures.as_completed(futures):
-                    frame = futures[future]
-                    if future.result():
-                        processed_frames.append(frame)
-                        print(f"Completed frame {os.path.basename(frame)} ({len(processed_frames)}/{len(frames)})")
-        if not processed_frames:
-            return None, "Failed to process any frames. Please check the logs for more information."
-        # Even if not all frames were processed, try to create a video with what we have
-        print(f"Successfully processed {len(processed_frames)}/{len(frames)} frames")
-        # Ensure frames are in the correct order (important for video continuity)
-        processed_frames.sort()
-        # Reassemble frames into video
-        output_filename = os.path.join(temp_dir, "stylized.mp4")
-        # Use a higher bitrate and better codec for higher quality
-        ffmpeg.input(f"{frames_dir}/%04d.png", framerate=1) \
-              .output(output_filename, vcodec='libx264', pix_fmt='yuv420p', crf=18) \
-              .run(quiet=True)
-        # Check if the output file exists and has content
-        if not os.path.exists(output_filename) or os.path.getsize(output_filename) == 0:
-            return None, "Failed to create output video"
-        # Copy to a persistent location for Gradio to serve
-        os.makedirs("outputs", exist_ok=True)
-        persistent_output = os.path.join("outputs", f"stylized_{uuid.uuid4()}.mp4")
-        shutil.copy(output_filename, persistent_output)
-        # Return the relative path (Gradio can handle this)
-        print(f"Output video created at: {persistent_output}")
-        # Cleanup temp files
-        shutil.rmtree(temp_dir)
-        return persistent_output, f"Video stylized successfully with {len(processed_frames)} frames!"
     except Exception as e:
-        import traceback
-        traceback_str = traceback.format_exc()
-        print(f"Error: {str(e)}\n{traceback_str}")
-        return None, f"Error: {str(e)}"
-# Use Gradio examples feature with local files
-example_videos = [
-    ["sample_video.mp4", "Studio Ghibli animation with Hayao Miyazaki's distinctive hand-drawn art style"]
-]
-with gr.Blocks(title="Video-to-Ghibli Style Converter (Open Source)") as iface:
-    gr.Markdown("# Video-to-Ghibli Style Converter (Open Source)")
-    gr.Markdown("Upload a video and convert it to Studio Ghibli animation style using LLaVA and Stable Diffusion.")
-    with gr.Row():
-        with gr.Column(scale=2):
-            # Main input column
-            video_input = gr.Video(label="Upload Video (up to 15 seconds)")
-            style_prompt = gr.Textbox(
-                label="Style Prompt",
-                value="Studio Ghibli animation with Hayao Miyazaki's distinctive hand-drawn art style"
-            )
-            num_frames_slider = gr.Slider(
-                minimum=5,
-                maximum=15,
-                value=10,
-                step=1,
-                label="Number of frames to process"
-            )
-            submit_btn = gr.Button("Stylize Video", variant="primary")
-        with gr.Column(scale=2):
-            # Output column
-            video_output = gr.Video(label="Stylized Video")
-            status_output = gr.Textbox(label="Status", value="Ready. Upload a video to start.")
-    submit_btn.click(
-        fn=stylize_video,
-        inputs=[video_input, style_prompt, num_frames_slider],
-        outputs=[video_output, status_output]
-    )
-    gr.Markdown("""
-    ## Instructions
-    1. Upload a video up to 15 seconds long
-    2. Customize the style prompt if desired
-    3. Adjust the number of frames to process (fewer = faster)
-    4. Click "Stylize Video" and wait for processing
-    ## Example Style Prompts
-    - "Studio Ghibli animation with Hayao Miyazaki's distinctive hand-drawn art style"
-    - "Studio Ghibli style with magical and dreamy atmosphere"
-    - "Nostalgic Studio Ghibli animation style with watercolor backgrounds and clean linework"
-    - "Ghibli-inspired animation with vibrant colors and fantasy elements"
-    Note: Each frame is analyzed by LLaVA-1.5-7B and then transformed by Stable Diffusion (Ghibli-Diffusion model).
-    Videos are processed at 1 frame per second to keep processing time reasonable.
-    ## Technical Details
-    - Image Analysis: Using LLaVA-1.5-7B for frame understanding and description
-    - Image Generation: Using Stable Diffusion (nitrosocke/Ghibli-Diffusion) for style transfer
-    - All processing happens locally - no API keys needed!
-    """)
 if __name__ == "__main__":
-    iface.launch()

 import os
+import io
+import json
+import base64
 import time
 import torch
 from PIL import Image
+from typing import Optional
+from fastapi import FastAPI, File, UploadFile, Form, HTTPException
+from fastapi.responses import Response
+from fastapi.middleware.cors import CORSMiddleware
+from safetensors.torch import save_file
+from src.pipeline import FluxPipeline
+from src.transformer_flux import FluxTransformer2DModel
+from src.lora_helper import set_single_lora, set_multi_lora, unset_lora
+# Define paths
+base_path = "black-forest-labs/FLUX.1-dev"
+lora_base_path = "./models"
+# Initialize the model
+print("Loading model...")
+pipe = FluxPipeline.from_pretrained(base_path, torch_dtype=torch.bfloat16)
+transformer = FluxTransformer2DModel.from_pretrained(base_path, subfolder="transformer", torch_dtype=torch.bfloat16)
+pipe.transformer = transformer
+pipe.to("cuda")
+print("Model loaded successfully!")
+# Function to clear cache
+def clear_cache(transformer):
+    for name, attn_processor in transformer.attn_processors.items():
+        attn_processor.bank_kv.clear()
+# Create FastAPI app
+app = FastAPI(title="Ghibli Image Generator API",
+              description="Convert images to Ghibli Studio style using EasyControl")
+# Add CORS middleware
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# Health check endpoint
+@app.get("/health")
+async def health_check():
+    return {"status": "healthy", "model": "loaded"}
+# Main image conversion endpoint
+@app.post("/generate-ghibli")
+async def generate_ghibli(
+    file: UploadFile = File(...),
+    prompt: str = Form("Ghibli Studio style, Charming hand-drawn anime-style illustration"),
+    height: int = Form(768),
+    width: int = Form(768),
+    seed: int = Form(42)
+):
     try:
+        # Validate input image
+        if not file.content_type.startswith("image/"):
+            raise HTTPException(status_code=400, detail="File must be an image")
+        # Read and validate image
+        image_data = await file.read()
         try:
+            spatial_img = Image.open(io.BytesIO(image_data))
         except Exception as e:
+            raise HTTPException(status_code=400, detail=f"Invalid image: {str(e)}")
+        # Validate dimensions
+        if height < 256 or height > 1024 or width < 256 or width > 1024:
+            raise HTTPException(status_code=400, detail="Dimensions must be between 256 and 1024")
+        # Configure LoRA
+        lora_path = os.path.join(lora_base_path, "Ghibli.safetensors")
+        set_single_lora(pipe.transformer, lora_path, lora_weights=[1], cond_size=512)
+        # Generate image
+        with torch.cuda.amp.autocast():
+            output = pipe(
+                prompt,
+                height=height,
+                width=width,
+                guidance_scale=3.5,
+                num_inference_steps=25,
+                max_sequence_length=512,
+                generator=torch.Generator("cpu").manual_seed(seed),
+                subject_images=[],
+                spatial_images=[spatial_img],
+                cond_size=512,
             ).images[0]
+        # Clear cache
+        clear_cache(pipe.transformer)
+        # Convert output to bytes
+        img_byte_arr = io.BytesIO()
+        output.save(img_byte_arr, format='PNG')
+        img_byte_arr.seek(0)
+        # Return the image directly
+        return Response(
+            content=img_byte_arr.getvalue(),
+            media_type="image/png"
+        )
+    except HTTPException as e:
+        raise e
     except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Error processing image: {str(e)}")
+# Run the API with uvicorn
 if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run("app:app", host="0.0.0.0", port=7860)