from fastapi import FastAPI
from fastapi.staticfiles import StaticFiles
from fastapi.responses import FileResponse
from transformers import pipeline

app = FastAPI()


from transformers import pipeline

image_to_text = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")


# [{'generated_text': 'a soccer game with a player jumping to catch the ball '}]

model = AutoModelForSeq2SeqLM.from_pretrained("google/pix2struct-ocrvqa-large")
app.mount("/", StaticFiles(directory="static", html=True), name="static")

@app.get("/")
def index() -> FileResponse:
    return FileResponse(path="/app/static/index.html", media_type="text/html")

@app.get("/ocr")
def ocr(input):
    result = image_to_text(input)
    print(result)