File size: 3,908 Bytes
e48b085 d209a4c 03a7dca 89073fe e48b085 1078a0a 4eee0e1 e48b085 03a7dca 0a40c8a 03a7dca 4c41699 9b44783 03a7dca 9b44783 03a7dca 0a40c8a 228a66b 0a40c8a 228a66b 0a40c8a 228a66b 0a40c8a 228a66b 2f3c50a 228a66b 0a40c8a 03a7dca 0a40c8a 03a7dca 89073fe 03a7dca 0a40c8a 03a7dca 0a40c8a 03a7dca 228a66b 03a7dca 0a40c8a 228a66b 0a40c8a 228a66b d209a4c 0a40c8a d209a4c 03a7dca 0a40c8a 228a66b 03a7dca 0a40c8a 2e5bff4 03a7dca 0a40c8a 03a7dca 6db1028 5669498 2e5bff4 03a7dca |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 |
import os
import time
import gradio as gr
import torch
from PIL import Image
from gtts import gTTS
import numpy as np
import cv2
from transformers import BlipProcessor, BlipForConditionalGeneration, AutoTokenizer, AutoModelForSeq2SeqLM
from huggingface_hub import login
# meu tokennnupv2 .
hf_token = os.getenv("HUGGINGFACE_TOKEN2")
if hf_token:
login(token=hf_token)
# YOLOv5
model = torch.hub.load('ultralytics/yolov5', 'yolov5s')
# Calcula a GLCM e o contrastee
def calculate_glcm_contrast(image):
gray_image = cv2.cvtColor(np.array(image), cv2.COLOR_BGR2GRAY)
max_value = gray_image.max() + 1
glcm = np.zeros((max_value, max_value), dtype=np.float64)
for i in range(gray_image.shape[0] - 1):
for j in range(gray_image.shape[1] - 1):
x = gray_image[i, j]
y = gray_image[i + 1, j + 1]
glcm[x, y] += 1
glcm = glcm / glcm.sum()
contrast = 0.0
for i in range(max_value):
for j in range(max_value):
contrast += (i - j) ** 2 * glcm[i, j]
return contrast
# Analisar a textura e a temperatura de cor
def analyze_image_properties(image):
# Análise de cor (média RGB)
image_rgb = cv2.cvtColor(np.array(image), cv2.COLOR_BGR2RGB)
avg_color_per_row = np.average(image_rgb, axis=0)
avg_color = np.average(avg_color_per_row, axis=0)
# Determinar temperatura da cor
if avg_color[0] > avg_color[2]: # Mais vermelho que azul
temperature = 'quente'
else:
temperature = 'fria'
# Análise de textura
texture_contrast = calculate_glcm_contrast(image)
texture = 'lisa' if texture_contrast < 100 else 'texturizada'
return temperature, texture
# Descrever imagem usando BLIP
def describe_image(image):
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
inputs = processor(image, return_tensors="pt")
out = model.generate(**inputs)
description = processor.decode(out[0], skip_special_tokens=True)
return description
# Traduzir descrição para pt
def translate_description(description):
model_name = 'Helsinki-NLP/opus-mt-tc-big-en-pt'
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
translated = model.generate(**tokenizer(description, return_tensors="pt", padding=True))
translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
return translated_text
# Processar imagem e gerar saída de voz
def process_image(image):
# Detecção de objetos
results = model(image)
detected_image = results.render()[0]
# Análise de textura e temperatura de cor
temperature, texture = analyze_image_properties(image)
# Descrição da imagem
description = describe_image(image)
translated_description = translate_description(description)
# Construir a descrição final
final_description = f"{translated_description}. A textura é {texture} e a temperatura de cor é {temperature}."
# Texto para voz
tts = gTTS(text=final_description, lang='pt')
attempts = 0
while attempts < 5:
try:
tts.save("output.mp3")
break
except gTTS.tts.gTTSError as e:
if e.r.status_code == 429:
print("Too many requests. Waiting before retrying...")
time.sleep(5)
attempts += 1
else:
raise e
# Saída
return Image.fromarray(detected_image), final_description, "output.mp3"
#
example_image_path = "example1.JPG"
# Gradio
iface = gr.Interface(
fn=process_image,
inputs=gr.Image(type="pil"),
outputs=[gr.Image(type="pil"), gr.Textbox(), gr.Audio(type="filepath")],
examples=[example_image_path]
)
iface.launch()
|