import gradio as gr import pixeltable as pxt from pixeltable.functions.huggingface import clip_image, clip_text from pixeltable.iterators import FrameIterator import PIL.Image import os # Process video and create index def process_video(video_file, progress=gr.Progress()): progress(0, desc="Initializing...") # Pixeltable setup pxt.drop_dir('video_search', force=True) pxt.create_dir('video_search') # Update type declaration to use simpler syntax video_table = pxt.create_table('video_search.videos', {'video': pxt.Video}) frames_view = pxt.create_view( 'video_search.frames', video_table, iterator=FrameIterator.create(video=video_table.video, fps=1) ) progress(0.2, desc="Inserting video...") video_table.insert([{'video': video_file.name}]) progress(0.4, desc="Creating embedding index...") # Updated embedding pattern using .using() frames_view.add_embedding_index( 'frame', string_embed=clip_text.using(model_id='openai/clip-vit-base-patch32'), image_embed=clip_image.using(model_id='openai/clip-vit-base-patch32') ) progress(1.0, desc="Processing complete") return "Good news! Your video has been processed. Easily find the moments you need by searching with text or images." # Perform similarity search def similarity_search(query, search_type, num_results, progress=gr.Progress()): frames_view = pxt.get_table('video_search.frames') progress(0.5, desc="Performing search...") if search_type == "Text": sim = frames_view.frame.similarity(query) else: # Image search sim = frames_view.frame.similarity(query) results = frames_view.order_by(sim, asc=False).limit(num_results).select(frames_view.frame, sim=sim).collect() progress(1.0, desc="Search complete") return [row['frame'] for row in results] # Gradio interface with gr.Blocks(theme=gr.themes.Base()) as demo: gr.Markdown( """ <div style= margin-bottom: 20px;"> <img src="https://raw.githubusercontent.com/pixeltable/pixeltable/main/docs/resources/pixeltable-logo-large.png" alt="Pixeltable" style="max-width: 150px;" /> <h2>Text and Image similarity search on video frames with embedding indexes</h2> </div> """ ) gr.HTML( """ <p> <a href="https://github.com/pixeltable/pixeltable" target="_blank" style="color: #F25022; text-decoration: none; font-weight: bold;">Pixeltable</a> is a declarative interface for working with text, images, embeddings, and even video, enabling you to store, transform, index, and iterate on data. </p> """ ) with gr.Row(): with gr.Column(scale=1): gr.Markdown( """ <h3>1. Insert video</h3> """) video_file = gr.File(label="Upload Video") process_button = gr.Button("Process Video") process_output = gr.Textbox(label="Status", lines=2) gr.Markdown( """ <h3>2. Search video frames</h3> """) search_type = gr.Radio(["Text", "Image"], label="Search Type", value="Text") text_input = gr.Textbox(label="Text Query") image_input = gr.Image(label="Image Query", type="pil", visible=False) num_results = gr.Slider(minimum=1, maximum=20, value=5, step=1, label="Number of Results") search_button = gr.Button("Search") with gr.Column(scale=2): gr.Markdown( """ <h3>3. Visualize results</h3> """) results_gallery = gr.Gallery(label="Search Results", columns=3) gr.Examples( examples=[ ["bangkok.mp4"], ["lotr.mp4"], ["mi.mp4"], ], label="Click one of the examples below to get started", inputs=[video_file], fn=process_video ) def update_search_input(choice): return gr.update(visible=choice=="Text"), gr.update(visible=choice=="Image") search_type.change(update_search_input, search_type, [text_input, image_input]) process_button.click( process_video, inputs=[video_file], outputs=[process_output] ) def perform_search(search_type, text_query, image_query, num_results): query = text_query if search_type == "Text" else image_query return similarity_search(query, search_type, num_results) search_button.click( perform_search, inputs=[search_type, text_input, image_input, num_results], outputs=[results_gallery] ) if __name__ == "__main__": demo.launch()