Spaces:
Running
Running
Amélioration du support Poppler pour la conversion PDF et la prévisualisation
Browse files- Dockerfile +15 -3
- app.py +79 -4
Dockerfile
CHANGED
@@ -2,12 +2,24 @@ FROM python:3.9
|
|
2 |
|
3 |
WORKDIR /code
|
4 |
|
5 |
-
# Installer poppler-utils pour pdf2image
|
6 |
-
RUN apt-get update && apt-get install -y
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
|
8 |
COPY requirements.txt /code/requirements.txt
|
9 |
RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
|
10 |
|
|
|
|
|
|
|
|
|
11 |
COPY . /code
|
12 |
|
13 |
-
|
|
|
|
2 |
|
3 |
WORKDIR /code
|
4 |
|
5 |
+
# Installer poppler-utils complet pour pdf2image
|
6 |
+
RUN apt-get update && apt-get install -y \
|
7 |
+
poppler-utils \
|
8 |
+
libpoppler-cpp-dev \
|
9 |
+
libpoppler-private-dev \
|
10 |
+
pkg-config \
|
11 |
+
&& rm -rf /var/lib/apt/lists/* \
|
12 |
+
&& which pdftoppm && echo "Poppler correctement installé!" \
|
13 |
+
|| echo "ERREUR: Poppler non installé correctement!"
|
14 |
|
15 |
COPY requirements.txt /code/requirements.txt
|
16 |
RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
|
17 |
|
18 |
+
# Vérification de l'installation de pdf2image
|
19 |
+
RUN python -c "from pdf2image import convert_from_bytes; print('pdf2image est correctement installé!')" \
|
20 |
+
|| echo "ERREUR: pdf2image n'est pas installé correctement!"
|
21 |
+
|
22 |
COPY . /code
|
23 |
|
24 |
+
# Commande de démarrage de l'application Gradio
|
25 |
+
CMD ["python", "app.py"]
|
app.py
CHANGED
@@ -4,9 +4,48 @@ import os
|
|
4 |
import json
|
5 |
import time
|
6 |
import base64
|
|
|
7 |
from PIL import Image
|
8 |
from pdf2image import convert_from_path
|
9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
# Configuration
|
11 |
GOOGLE_API_KEY = "AIzaSyA4ma5pE1pPCzHHn-i9tDWuKqQEgSltMtI"
|
12 |
genai.configure(api_key=GOOGLE_API_KEY)
|
@@ -919,7 +958,21 @@ def process_document(file, progress=gr.Progress()):
|
|
919 |
|
920 |
try:
|
921 |
if file.name.lower().endswith('.pdf'):
|
922 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
923 |
|
924 |
if len(images) > 10:
|
925 |
return {"error": TEXT["error"]["too_many_pages"]}
|
@@ -954,8 +1007,27 @@ def update_preview(file):
|
|
954 |
|
955 |
if file.name.lower().endswith('.pdf'):
|
956 |
try:
|
957 |
-
#
|
958 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
959 |
image_paths = []
|
960 |
|
961 |
for i, img in enumerate(images):
|
@@ -963,8 +1035,11 @@ def update_preview(file):
|
|
963 |
img.save(temp_filename)
|
964 |
image_paths.append(temp_filename)
|
965 |
|
|
|
966 |
return image_paths
|
967 |
-
except:
|
|
|
|
|
968 |
return []
|
969 |
elif file.name.lower().endswith(('.png', '.jpg', '.jpeg')):
|
970 |
return [file.name]
|
|
|
4 |
import json
|
5 |
import time
|
6 |
import base64
|
7 |
+
import subprocess
|
8 |
from PIL import Image
|
9 |
from pdf2image import convert_from_path
|
10 |
|
11 |
+
# Vérification de la disponibilité de Poppler
|
12 |
+
def check_poppler():
|
13 |
+
poppler_path = None
|
14 |
+
potential_paths = [
|
15 |
+
'/usr/bin',
|
16 |
+
'/usr/local/bin',
|
17 |
+
'/opt/homebrew/bin',
|
18 |
+
'/app/bin'
|
19 |
+
]
|
20 |
+
|
21 |
+
for path in potential_paths:
|
22 |
+
if os.path.exists(os.path.join(path, 'pdftoppm')):
|
23 |
+
poppler_path = path
|
24 |
+
print(f"✅ Poppler trouvé dans: {poppler_path}")
|
25 |
+
return poppler_path
|
26 |
+
|
27 |
+
# Essayer de localiser avec la commande which
|
28 |
+
try:
|
29 |
+
which_result = subprocess.run(['which', 'pdftoppm'], capture_output=True, text=True)
|
30 |
+
if which_result.returncode == 0:
|
31 |
+
poppler_bin = which_result.stdout.strip()
|
32 |
+
poppler_path = os.path.dirname(poppler_bin)
|
33 |
+
print(f"✅ Poppler trouvé via 'which' dans: {poppler_path}")
|
34 |
+
return poppler_path
|
35 |
+
except:
|
36 |
+
pass
|
37 |
+
|
38 |
+
print("⚠️ AVERTISSEMENT: Poppler non trouvé dans les chemins standards!")
|
39 |
+
print("⚠️ Les fonctionnalités de prévisualisation et de traitement PDF pourraient ne pas fonctionner.")
|
40 |
+
print("⚠️ Veuillez installer Poppler:")
|
41 |
+
print(" - Linux: apt-get install poppler-utils")
|
42 |
+
print(" - macOS: brew install poppler")
|
43 |
+
print(" - Windows: Téléchargez depuis https://github.com/oschwartz10612/poppler-windows/")
|
44 |
+
return None
|
45 |
+
|
46 |
+
# Vérifier Poppler au démarrage
|
47 |
+
POPPLER_PATH = check_poppler()
|
48 |
+
|
49 |
# Configuration
|
50 |
GOOGLE_API_KEY = "AIzaSyA4ma5pE1pPCzHHn-i9tDWuKqQEgSltMtI"
|
51 |
genai.configure(api_key=GOOGLE_API_KEY)
|
|
|
958 |
|
959 |
try:
|
960 |
if file.name.lower().endswith('.pdf'):
|
961 |
+
# Utiliser le chemin Poppler détecté au démarrage
|
962 |
+
if POPPLER_PATH:
|
963 |
+
images = convert_from_path(
|
964 |
+
file.name,
|
965 |
+
poppler_path=POPPLER_PATH,
|
966 |
+
use_pdftocairo=True,
|
967 |
+
dpi=150
|
968 |
+
)
|
969 |
+
else:
|
970 |
+
print("Trying without poppler_path")
|
971 |
+
images = convert_from_path(
|
972 |
+
file.name,
|
973 |
+
use_pdftocairo=True,
|
974 |
+
dpi=150
|
975 |
+
)
|
976 |
|
977 |
if len(images) > 10:
|
978 |
return {"error": TEXT["error"]["too_many_pages"]}
|
|
|
1007 |
|
1008 |
if file.name.lower().endswith('.pdf'):
|
1009 |
try:
|
1010 |
+
# Utiliser le chemin Poppler détecté au démarrage
|
1011 |
+
if POPPLER_PATH:
|
1012 |
+
images = convert_from_path(
|
1013 |
+
file.name,
|
1014 |
+
first_page=1,
|
1015 |
+
last_page=3,
|
1016 |
+
poppler_path=POPPLER_PATH,
|
1017 |
+
use_pdftocairo=True,
|
1018 |
+
dpi=150
|
1019 |
+
)
|
1020 |
+
else:
|
1021 |
+
# Essayer sans spécifier le chemin, en utilisant des options simplifiées
|
1022 |
+
print("Trying without poppler_path")
|
1023 |
+
images = convert_from_path(
|
1024 |
+
file.name,
|
1025 |
+
first_page=1,
|
1026 |
+
last_page=3,
|
1027 |
+
use_pdftocairo=True,
|
1028 |
+
dpi=150
|
1029 |
+
)
|
1030 |
+
|
1031 |
image_paths = []
|
1032 |
|
1033 |
for i, img in enumerate(images):
|
|
|
1035 |
img.save(temp_filename)
|
1036 |
image_paths.append(temp_filename)
|
1037 |
|
1038 |
+
print(f"Successfully created {len(image_paths)} preview images")
|
1039 |
return image_paths
|
1040 |
+
except Exception as e:
|
1041 |
+
print(f"Error converting PDF to images: {str(e)}")
|
1042 |
+
# En cas d'erreur, retourner une image d'erreur qui sera affichée
|
1043 |
return []
|
1044 |
elif file.name.lower().endswith(('.png', '.jpg', '.jpeg')):
|
1045 |
return [file.name]
|