explicit conversion timeout and handling

This commit is contained in:
2026-03-26 18:56:28 -07:00
parent aa5dcddaa3
commit 8e93f75257
7 changed files with 65 additions and 16 deletions
@@ -57,13 +57,19 @@ def convert_pptx_to_pdf(pptx_path: Path, pdf_path: Path, *, timeout_s: int = 120
str(output_dir),
str(pptx_path.resolve()),
]
completed = subprocess.run(
command,
check=False,
capture_output=True,
text=True,
timeout=timeout_s,
)
try:
completed = subprocess.run(
command,
check=False,
capture_output=True,
text=True,
timeout=timeout_s,
)
except subprocess.TimeoutExpired as exc:
raise RuntimeError(
"LibreOffice conversion timed out after "
f"{timeout_s} seconds while rendering {pptx_path.name}"
) from exc
if completed.returncode != 0:
raise RuntimeError(
f"LibreOffice conversion failed: {completed.stderr.strip() or completed.stdout.strip()}"
@@ -116,13 +122,20 @@ def render_pdf_to_images(
str(pdf_path.resolve()),
str(prefix_path),
]
completed = subprocess.run(
command,
check=False,
capture_output=True,
text=True,
timeout=timeout_s,
)
try:
completed = subprocess.run(
command,
check=False,
capture_output=True,
text=True,
timeout=timeout_s,
)
except subprocess.TimeoutExpired as exc:
raise RuntimeError(
"Poppler rasterization timed out after "
f"{timeout_s} seconds while rendering {pdf_path.name}; "
"increase conversion PDF render timeout or lower image DPI"
) from exc
if completed.returncode != 0:
raise RuntimeError(
f"Poppler rasterization failed: {completed.stderr.strip() or completed.stdout.strip()}"
@@ -165,6 +178,8 @@ def convert_pptx_to_slidedeck(
*,
dpi: int = 180,
image_format: str = "png",
pptx_to_pdf_timeout_s: int = 180,
pdf_to_images_timeout_s: int = 600,
) -> SlideDeckResult:
"""Convert a PPTX into rendered images and extracted notes.
@@ -177,6 +192,8 @@ def convert_pptx_to_slidedeck(
work_dir: Scratch directory for generated outputs.
dpi: Rasterization DPI for output slide images.
image_format: Output image format accepted by `pdftoppm`.
pptx_to_pdf_timeout_s: Timeout in seconds for the LibreOffice subprocess.
pdf_to_images_timeout_s: Timeout in seconds for the Poppler subprocess.
Returns:
Fully materialized `SlideDeckResult` with local image paths.
@@ -189,12 +206,13 @@ def convert_pptx_to_slidedeck(
pdf_path = work_dir / f"{pptx_path.stem}.pdf"
image_dir = work_dir / "slides"
convert_pptx_to_pdf(pptx_path, pdf_path)
convert_pptx_to_pdf(pptx_path, pdf_path, timeout_s=pptx_to_pdf_timeout_s)
image_paths = render_pdf_to_images(
pdf_path,
image_dir,
dpi=dpi,
image_format=image_format,
timeout_s=pdf_to_images_timeout_s,
)
notes = extract_slide_notes(pptx_path)
@@ -16,6 +16,9 @@ class ServerConfig:
s3_secure: bool
s3_public_endpoint: str
s3_session_ttl_seconds: int
conversion_image_dpi: int
conversion_pptx_to_pdf_timeout_seconds: int
conversion_pdf_to_images_timeout_seconds: int
conversion_cleanup_delay_seconds: int
@@ -28,6 +31,13 @@ def load_server_config() -> ServerConfig:
s3_secure=os.getenv("S3_USE_SSL", "false").lower() == "true",
s3_public_endpoint=os.getenv("S3_PUBLIC_ENDPOINT", "localhost:8333"),
s3_session_ttl_seconds=int(os.getenv("S3_SESSION_TTL_SECONDS", "3600")),
conversion_image_dpi=int(os.getenv("CONVERSION_IMAGE_DPI", "150")),
conversion_pptx_to_pdf_timeout_seconds=int(
os.getenv("CONVERSION_PPTX_TO_PDF_TIMEOUT_SECONDS", "180")
),
conversion_pdf_to_images_timeout_seconds=int(
os.getenv("CONVERSION_PDF_TO_IMAGES_TIMEOUT_SECONDS", "600")
),
conversion_cleanup_delay_seconds=int(
os.getenv("CONVERSION_CLEANUP_DELAY_SECONDS", "3600")
),
@@ -182,6 +182,9 @@ class ConversionServiceImpl(conversion_connect.ConversionService):
convert_pptx_to_slidedeck,
source_path,
work_dir,
dpi=self._config.conversion_image_dpi,
pptx_to_pdf_timeout_s=self._config.conversion_pptx_to_pdf_timeout_seconds,
pdf_to_images_timeout_s=self._config.conversion_pdf_to_images_timeout_seconds,
)
session.slide_deck = await asyncio.to_thread(
self._upload_and_build_slide_deck,