Spaces:

pierrefdz
/

gen-attrib

Running

App Files Files Community

gen-attrib / app.py

pierrefdz

updts

959dbd8 5 days ago

raw

history blame contribute delete

12.3 kB

	import streamlit as st
	from PIL import Image
	import os
	import faiss
	import torch
	import numpy as np
	from request import get_ft, get_topk

	def load_model():
	"""Load DINOv2 model once and cache it"""
	model = torch.hub.load('facebookresearch/dinov2', 'dinov2_vits14')
	model.eval()
	model.to(torch.device('cuda' if torch.cuda.is_available() else 'cpu'))
	return model

	def load_index(index_path):
	"""Load FAISS index once and cache it"""
	return faiss.read_index(index_path)

	def distance_to_similarity(distances, temp=1e-4):
	"""Convert distance to similarity"""
	for ii in range(len(distances)):
	contribs = distances[ii].max() - distances[ii]
	contribs = contribs / temp
	sum_contribs = np.exp(contribs).sum()
	distances[ii] = np.exp(contribs) / sum_contribs
	return distances

	def calculate_rewards(subscription, num_generations, author_share, ro_share, num_users_k, similarities):
	"""Calculate rewards based on user inputs and similarities"""
	num_users = num_users_k * 1000
	paid_per_gen = subscription / num_generations
	aro_share = paid_per_gen * (author_share + ro_share) / 100 * 100 # Convert to cents

	rewards = []
	for sim in similarities[0]:
	training_data_reward = aro_share * sim
	author_month_reward = (training_data_reward / 100) * num_users / ((author_share + ro_share)/100) * (author_share/100)
	ro_month_reward = (training_data_reward / 100) * num_users / ((author_share + ro_share)/100) * (ro_share/100)

	rewards.append({
	'paid_per_month': f"{subscription:.0f}€",
	'paid_per_gen': f"{paid_per_gen:.2f}€",
	'aro_share': f"{aro_share:.2f}c€",
	'attribution': f"{sim*100:.0f}%",
	'training_data_reward': f"{training_data_reward:.2f}c€",
	'author_month_reward': f"{author_month_reward:.0f}€",
	'ro_month_reward': f"{ro_month_reward:.0f}€"
	})
	return rewards

	def main():
	st.title("Reward Simulator")

	# Add introduction text and expandable details
	st.markdown("""
	This simulator helps estimate potential rewards for authors and rights owners when their images are used
	to train AI image generation models.
	""")

	with st.expander("Learn more about how it works"):
	st.markdown("""
	### How it works
	1. Select or upload an image that represents AI-generated content
	2. The system finds similar images that might have influenced the generation in a database of 10M images (OpenImages)
	3. Based on your parameters, it calculates potential rewards for:
	- Original image authors
	- Rights owners (e.g., stock photo companies, galleries)

	### Key assumptions
	- Attribution scores indicate the level of influence of training images
	- Rewards are distributed based on subscription revenue
	- Calculations use simplified models and are for demonstration purposes

	### Use cases
	- Explore fair compensation models for AI training data
	- Simulate different revenue sharing scenarios
	- Understand the relationship between model training and attribution
	""")

	# Load model and index
	model = load_model()
	index = load_index("data/openimages_index.bin")
	with open("data/openimages_urls.txt", "r") as f:
	urls = f.readlines()

	# Sidebar controls for reward simulation
	with st.sidebar:
	st.subheader("Reward Simulation Parameters")

	subscription = st.number_input(
	"User Monthly Subscription (€/month)",
	min_value=1.0,
	max_value=100.0,
	value=12.0,
	help="Monthly subscription fee for users. Examples:\n"
	"- Adobe Firefly: starts at $4.99\n"
	"- Midjourney: starts at $10\n"
	"- DALL·E 3: included with ChatGPT Plus ($20)\n"
	"- Getty Edify: €45 for 25 generations (includes legal protection)"
	)

	num_generations = st.number_input(
	"Number of Image Generations per Month",
	min_value=1,
	max_value=1000,
	value=60,
	help="Number of generations done by one user on average per month.\n"
	"Examples:\n"
	"- Adobe Firefly basic plan: 100 generations/month\n"
	"- Midjourney basic plan: 200 generations/month"
	)

	author_share = st.number_input(
	"Authors share (%)",
	min_value=0.0,
	max_value=100.0,
	value=5.0,
	help="Percentage of subscription allocated to authors. Typical examples:\n"
	"- Printed books: 5-15%\n"
	"- Music (performers + songwriters): 15-30% of net revenues\n"
	"- Stock photography: 15-45% of revenues"
	)

	ro_share = st.number_input(
	"Right Owners Share (%)",
	min_value=0.0,
	max_value=100.0,
	value=10.0,
	help="Percentage of subscription allocated to right owners. Examples:\n"
	"- Music CMOs (ASCAP, BMI, SACEM): 2-8% of net revenues\n"
	"- Image stocks: retain 55-85% of revenues"
	)

	num_users_k = st.number_input(
	"Subscribers (in thousands)",
	min_value=1,
	max_value=10000,
	value=500,
	help="Number of paid users in thousands.\n"
	"Note: Exact figures aren't public, but Midjourney is estimated\n"
	"to have 2 to 5 million paying subscribers"
	)

	num_neighbors = st.slider(
	"Number of similar images",
	min_value=1,
	max_value=20,
	value=8,
	help="Number of most similar images to display and calculate rewards for"
	)

	# Display default images in a row
	st.subheader("Select an image to attribute rewards to similar images")
	cols = st.columns(3)
	default_images = {
	"Image 1": "assets/1.jpg",
	"Image 2": "assets/2.jpg",
	"Image 3": "assets/3.jpg"
	}

	# Create session state for storing the selected image if it doesn't exist
	if 'query_image' not in st.session_state:
	st.session_state.query_image = Image.open("assets/1.jpg").convert('RGB')

	# Display the three default images as buttons
	for idx, (col, (name, path)) in enumerate(zip(cols, default_images.items())):
	img = Image.open(path).convert('RGB')
	col.image(img, caption=name, width=200, use_container_width=True)
	# Center the button using columns
	button_cols = col.columns([1, 2, 1]) # Create 3 sub-columns with 1:2:1 ratio
	if button_cols[1].button(f"Select {name}", use_container_width=True): # Place button in middle column
	st.session_state.query_image = img

	# Upload option below
	st.subheader("Or upload your own image:")
	uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
	if uploaded_file is not None:
	st.session_state.query_image = Image.open(uploaded_file).convert('RGB')

	# Display selected image
	st.subheader("You query image:")
	st.image(st.session_state.query_image, caption="Selected Image", width=300)

	# Get features and search
	features = get_ft(model, st.session_state.query_image)
	distances, indices = get_topk(index, features, topk=num_neighbors)
	similarities = distance_to_similarity(distances, temp=1e-5)

	# Calculate rewards
	rewards = calculate_rewards(subscription, num_generations,
	author_share, ro_share, num_users_k, similarities)

	# Display results in a table
	st.subheader("Similar Images and Rewards")
	for i in range(num_neighbors):
	img_idx = indices[0][i]
	reward = rewards[i]

	cols = st.columns(4)
	# Column 1: Similar image
	cols[0].image(urls[img_idx], caption=f"Similar Image {i+1}", width=150)

	# Column 2: Basic metrics
	cols[1].markdown(
	"""
	<div style="margin-bottom: 8px;">
	<div class="metric-row"
	title="Average payment per month and per paid user"
	<span class="metric-label">Monthly Subscription:</span>
	<span class="metric-value">{}</span>
	</div>
	<div class="metric-row"
	title="Payment per generated image = Monthly Subscription / Number of Generations"
	<span class="metric-label">Paid per Generation:</span>
	<span class="metric-value">{}</span>
	</div>
	<div class="metric-row"
	title="Share for Authors and Right Owners = Paid per Gen × (Authors + Right Owners share%)"
	<span class="metric-label">Authors & RO Share:</span>
	<span class="metric-value">{}</span>
	</div>
	</div>
	""".format(
	reward['paid_per_month'],
	reward['paid_per_gen'],
	reward['aro_share']
	),
	unsafe_allow_html=True
	)

	# Column 3: Attribution
	cols[2].markdown(
	"""
	<div style="margin-bottom: 8px;">
	<div class="metric-row"
	title="Percentage indicating how much this training data contributed to the generated image"
	<span class="metric-label">Attribution:</span>
	<span class="metric-value">{}</span>
	</div>
	<div class="metric-row"
	title="Reward for training data usage = Authors & RO Share × Attribution%"
	<span class="metric-label">Training Data Reward:</span>
	<span class="metric-value">{}</span>
	</div>
	</div>
	""".format(
	reward['attribution'],
	reward['training_data_reward']
	),
	unsafe_allow_html=True
	)

	# Column 4: Monthly rewards
	cols[3].markdown(
	"""
	<div style="margin-bottom: 8px;">
	<div class="metric-row"
	title="Monthly reward = Training Data Reward × Number of Subscribers / (Authors + RO share%) × Author Share%"
	<span class="metric-label">Author Monthly Reward:</span>
	<span class="metric-value">{}</span>
	</div>
	<div class="metric-row"
	title="Monthly reward = Training Data Reward × Number of Subscribers / (Authors + RO share%) × RO Share%"
	<span class="metric-label">Right Owner Monthly Reward:</span>
	<span class="metric-value">{}</span>
	</div>
	</div>
	""".format(
	reward['author_month_reward'],
	reward['ro_month_reward']
	),
	unsafe_allow_html=True
	)

	# Add CSS for styling
	st.markdown(
	"""
	<style>
	.metric-row {
	display: flex;
	justify-content: space-between;
	align-items: flex-start;
	padding: 4px;
	margin: 2px 0;
	border-radius: 2px;
	cursor: pointer;
	}
	.metric-row:hover {
	background-color: #f0f2f6;
	}
	.metric-label {
	font-family: monospace;
	margin-right: 4px;
	font-size: 12px;
	}
	.metric-value {
	font-family: monospace;
	font-size: 12px;
	}
	</style>
	""",
	unsafe_allow_html=True
	)

	st.markdown("---")

	if __name__ == "__main__":
	main()