Spaces:

gizemsarsinlar
/

Art_Analysis_with_Phi-4

Running on Zero

App Files Files Community

Art_Analysis_with_Phi-4 / app.py

gizemsarsinlar

Update app.py

2cf5592 verified about 2 months ago

raw

history blame contribute delete

3.96 kB

	import gradio as gr
	from PIL import Image
	import torch
	import soundfile as sf # Ses işleme için
	from transformers import AutoModelForCausalLM, AutoProcessor
	import spaces

	# Modeli yükle
	model_path = "microsoft/Phi-4-multimodal-instruct"
	processor = AutoProcessor.from_pretrained(model_path, trust_remote_code=True)
	model = AutoModelForCausalLM.from_pretrained(
	model_path,
	device_map="auto",
	torch_dtype="auto",
	trust_remote_code=True,
	_attn_implementation="eager",
	)

	user_prompt = "<\|user\|>"
	assistant_prompt = "<\|assistant\|>"
	prompt_suffix = "<\|end\|>"

	@spaces.GPU
	def process_input(input_type, file):
	if not file:
	return "Please upload an image of an artwork."

	if input_type == "Image":
	# Prompt for Artworks
	prompt = (
	f"{user_prompt} You are an expert art historian and critic. Analyze the given artwork with these aspects:\n\n"
	f"1. General Description: Describe the colors, composition, and subject.\n"
	f"2. Artistic Style: Identify the artistic movement.\n"
	f"3. Historical Context: Discuss the period and influences.\n"
	f"4. Symbolism & Meaning: Interpret the messages conveyed.\n"
	f"5. Technical Analysis: Examine brushwork, lighting, and composition.\n"
	f"6. Impact & Significance: Explain its relevance in art history.\n\n"
	f"Here is the artwork for analysis:\n"
	f"<\|image_1\|>\n"
	f"{prompt_suffix}{assistant_prompt}"
	)
	image = Image.open(file)
	inputs = processor(text=prompt, images=image, return_tensors="pt").to(model.device)

	elif input_type == "Audio":
	prompt = (
	f"{user_prompt} Please transcribe the given audio into text accurately.\n\n"
	f"<\|audio_1\|>\n"
	f"{prompt_suffix}{assistant_prompt}"
	)
	audio, samplerate = sf.read(file)
	inputs = processor(text=prompt, audios=[(audio, samplerate)], return_tensors="pt").to(model.device)

	else:
	return "Geçersiz giriş türü seçildi."

	with torch.no_grad():
	generate_ids = model.generate(
	**inputs,
	max_new_tokens=1000,
	num_logits_to_keep=0,
	temperature=0.7,
	top_k=50,
	)
	generate_ids = generate_ids[:, inputs["input_ids"].shape[1]:]
	response = processor.batch_decode(
	generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False
	)[0]

	return response

	with gr.Blocks(title="Art & Audio Analysis with Phi-4") as demo:
	gr.Markdown(
	"""
	# 🎨🗣️ Multimodal Art Analysis with Phi-4
	- Art Analysis: Upload a piece of art, AI will perform a detailed analysis.
	- Audio Transcription: Upload your audio file, AI will convert it to text.

	🚀 Powered by Microsoft's `Phi-4-multimodal-instruct` model.

	With this project, you can both analyze the uploaded image and convert the guide's verbal explanation into text while visiting any exhibition or museum.

	"""
	)

	with gr.Row():
	with gr.Column(scale=1):
	input_type = gr.Radio(
	choices=["Image", "Audio"],
	label="Select Input Type",
	value="Image",
	)
	file_input = gr.File(
	label="Upload File",
	file_types=["image", "audio"],
	)
	submit_btn = gr.Button("Analyze", variant="primary")

	with gr.Column(scale=2):
	output_text = gr.Textbox(
	label="AI Response",
	placeholder="The AI's response will appear here...",
	lines=12,
	interactive=False,
	)

	submit_btn.click(
	fn=process_input,
	inputs=[input_type, file_input],
	outputs=output_text,
	)

	demo.launch()