From 8e93f752571cafb3e68582a838b03449b9c22b4c Mon Sep 17 00:00:00 2001 From: Elijah Duffy Date: Thu, 26 Mar 2026 18:56:28 -0700 Subject: [PATCH] explicit conversion timeout and handling --- .env.example | 3 ++ Makefile | 5 +- README.md | 9 ++++ deploy/docker-compose.yml | 3 ++ .../src/officeconvert/conversion.py | 48 +++++++++++++------ .../server/src/officeconvert_server/config.py | 10 ++++ .../src/officeconvert_server/service.py | 3 ++ 7 files changed, 65 insertions(+), 16 deletions(-) diff --git a/.env.example b/.env.example index eec05c5..4b62825 100644 --- a/.env.example +++ b/.env.example @@ -4,4 +4,7 @@ S3_USE_SSL=false S3_ACCESS_KEY=minioadmin S3_SECRET_KEY=minioadmin S3_SESSION_TTL_SECONDS=3600 +CONVERSION_IMAGE_DPI=150 +CONVERSION_PPTX_TO_PDF_TIMEOUT_SECONDS=180 +CONVERSION_PDF_TO_IMAGES_TIMEOUT_SECONDS=600 CONVERSION_CLEANUP_DELAY_SECONDS=3600 diff --git a/Makefile b/Makefile index 4cebf50..f2c9fc7 100644 --- a/Makefile +++ b/Makefile @@ -35,5 +35,8 @@ run-server: export S3_ACCESS_KEY="$${S3_ACCESS_KEY:-minioadmin}"; \ export S3_SECRET_KEY="$${S3_SECRET_KEY:-minioadmin}"; \ export S3_SESSION_TTL_SECONDS="$${S3_SESSION_TTL_SECONDS:-3600}"; \ + export CONVERSION_IMAGE_DPI="$${CONVERSION_IMAGE_DPI:-150}"; \ + export CONVERSION_PPTX_TO_PDF_TIMEOUT_SECONDS="$${CONVERSION_PPTX_TO_PDF_TIMEOUT_SECONDS:-180}"; \ + export CONVERSION_PDF_TO_IMAGES_TIMEOUT_SECONDS="$${CONVERSION_PDF_TO_IMAGES_TIMEOUT_SECONDS:-600}"; \ export CONVERSION_CLEANUP_DELAY_SECONDS="$${CONVERSION_CLEANUP_DELAY_SECONDS:-3600}"; \ - uv run --project python uvicorn officeconvert_server.app:app --host "$${UVICORN_HOST:-0.0.0.0}" --port "$${UVICORN_PORT:-8080}" + uv run --project python --package officeconvert-server python -m uvicorn officeconvert_server.app:app --host "$${UVICORN_HOST:-0.0.0.0}" --port "$${UVICORN_PORT:-8080}" diff --git a/README.md b/README.md index 8489f78..0b41205 100644 --- a/README.md +++ b/README.md @@ -94,6 +94,7 @@ make run-server - defaults S3 endpoint to `localhost:8333` for host-based development - auto-normalizes `seaweedfs:8333` to `localhost:8333` for host runs - supports optional `UVICORN_HOST` and `UVICORN_PORT` overrides +- exposes conversion tuning vars (`CONVERSION_IMAGE_DPI`, `CONVERSION_PPTX_TO_PDF_TIMEOUT_SECONDS`, `CONVERSION_PDF_TO_IMAGES_TIMEOUT_SECONDS`) Server endpoint base URL: @@ -133,3 +134,11 @@ Use `.env.example` as your baseline env configuration. - This project defaults to **SeaweedFS S3 API** for object transit in development and compose deployments. - The Python server uses the `minio` Python SDK, which is intentional because SeaweedFS is S3-compatible. - Runtime configuration uses `S3_*` environment variables. + +## Conversion Tuning Notes + +If conversion fails on larger decks, tune these environment variables: + +- `CONVERSION_IMAGE_DPI` (default `150`): lower values reduce image generation time. +- `CONVERSION_PPTX_TO_PDF_TIMEOUT_SECONDS` (default `180`): timeout for LibreOffice export. +- `CONVERSION_PDF_TO_IMAGES_TIMEOUT_SECONDS` (default `600`): timeout for Poppler rasterization. diff --git a/deploy/docker-compose.yml b/deploy/docker-compose.yml index 519176f..d8a1ae4 100644 --- a/deploy/docker-compose.yml +++ b/deploy/docker-compose.yml @@ -27,6 +27,9 @@ services: S3_ACCESS_KEY: ${S3_ACCESS_KEY:-minioadmin} S3_SECRET_KEY: ${S3_SECRET_KEY:-minioadmin} S3_SESSION_TTL_SECONDS: ${S3_SESSION_TTL_SECONDS:-3600} + CONVERSION_IMAGE_DPI: ${CONVERSION_IMAGE_DPI:-150} + CONVERSION_PPTX_TO_PDF_TIMEOUT_SECONDS: ${CONVERSION_PPTX_TO_PDF_TIMEOUT_SECONDS:-180} + CONVERSION_PDF_TO_IMAGES_TIMEOUT_SECONDS: ${CONVERSION_PDF_TO_IMAGES_TIMEOUT_SECONDS:-600} CONVERSION_CLEANUP_DELAY_SECONDS: ${CONVERSION_CLEANUP_DELAY_SECONDS:-3600} ports: - "8080:8080" diff --git a/python/packages/officeconvert/src/officeconvert/conversion.py b/python/packages/officeconvert/src/officeconvert/conversion.py index a107f32..b9ea7b0 100644 --- a/python/packages/officeconvert/src/officeconvert/conversion.py +++ b/python/packages/officeconvert/src/officeconvert/conversion.py @@ -57,13 +57,19 @@ def convert_pptx_to_pdf(pptx_path: Path, pdf_path: Path, *, timeout_s: int = 120 str(output_dir), str(pptx_path.resolve()), ] - completed = subprocess.run( - command, - check=False, - capture_output=True, - text=True, - timeout=timeout_s, - ) + try: + completed = subprocess.run( + command, + check=False, + capture_output=True, + text=True, + timeout=timeout_s, + ) + except subprocess.TimeoutExpired as exc: + raise RuntimeError( + "LibreOffice conversion timed out after " + f"{timeout_s} seconds while rendering {pptx_path.name}" + ) from exc if completed.returncode != 0: raise RuntimeError( f"LibreOffice conversion failed: {completed.stderr.strip() or completed.stdout.strip()}" @@ -116,13 +122,20 @@ def render_pdf_to_images( str(pdf_path.resolve()), str(prefix_path), ] - completed = subprocess.run( - command, - check=False, - capture_output=True, - text=True, - timeout=timeout_s, - ) + try: + completed = subprocess.run( + command, + check=False, + capture_output=True, + text=True, + timeout=timeout_s, + ) + except subprocess.TimeoutExpired as exc: + raise RuntimeError( + "Poppler rasterization timed out after " + f"{timeout_s} seconds while rendering {pdf_path.name}; " + "increase conversion PDF render timeout or lower image DPI" + ) from exc if completed.returncode != 0: raise RuntimeError( f"Poppler rasterization failed: {completed.stderr.strip() or completed.stdout.strip()}" @@ -165,6 +178,8 @@ def convert_pptx_to_slidedeck( *, dpi: int = 180, image_format: str = "png", + pptx_to_pdf_timeout_s: int = 180, + pdf_to_images_timeout_s: int = 600, ) -> SlideDeckResult: """Convert a PPTX into rendered images and extracted notes. @@ -177,6 +192,8 @@ def convert_pptx_to_slidedeck( work_dir: Scratch directory for generated outputs. dpi: Rasterization DPI for output slide images. image_format: Output image format accepted by `pdftoppm`. + pptx_to_pdf_timeout_s: Timeout in seconds for the LibreOffice subprocess. + pdf_to_images_timeout_s: Timeout in seconds for the Poppler subprocess. Returns: Fully materialized `SlideDeckResult` with local image paths. @@ -189,12 +206,13 @@ def convert_pptx_to_slidedeck( pdf_path = work_dir / f"{pptx_path.stem}.pdf" image_dir = work_dir / "slides" - convert_pptx_to_pdf(pptx_path, pdf_path) + convert_pptx_to_pdf(pptx_path, pdf_path, timeout_s=pptx_to_pdf_timeout_s) image_paths = render_pdf_to_images( pdf_path, image_dir, dpi=dpi, image_format=image_format, + timeout_s=pdf_to_images_timeout_s, ) notes = extract_slide_notes(pptx_path) diff --git a/python/packages/server/src/officeconvert_server/config.py b/python/packages/server/src/officeconvert_server/config.py index 67f4548..6cc3cc1 100644 --- a/python/packages/server/src/officeconvert_server/config.py +++ b/python/packages/server/src/officeconvert_server/config.py @@ -16,6 +16,9 @@ class ServerConfig: s3_secure: bool s3_public_endpoint: str s3_session_ttl_seconds: int + conversion_image_dpi: int + conversion_pptx_to_pdf_timeout_seconds: int + conversion_pdf_to_images_timeout_seconds: int conversion_cleanup_delay_seconds: int @@ -28,6 +31,13 @@ def load_server_config() -> ServerConfig: s3_secure=os.getenv("S3_USE_SSL", "false").lower() == "true", s3_public_endpoint=os.getenv("S3_PUBLIC_ENDPOINT", "localhost:8333"), s3_session_ttl_seconds=int(os.getenv("S3_SESSION_TTL_SECONDS", "3600")), + conversion_image_dpi=int(os.getenv("CONVERSION_IMAGE_DPI", "150")), + conversion_pptx_to_pdf_timeout_seconds=int( + os.getenv("CONVERSION_PPTX_TO_PDF_TIMEOUT_SECONDS", "180") + ), + conversion_pdf_to_images_timeout_seconds=int( + os.getenv("CONVERSION_PDF_TO_IMAGES_TIMEOUT_SECONDS", "600") + ), conversion_cleanup_delay_seconds=int( os.getenv("CONVERSION_CLEANUP_DELAY_SECONDS", "3600") ), diff --git a/python/packages/server/src/officeconvert_server/service.py b/python/packages/server/src/officeconvert_server/service.py index 2f24b88..ded2403 100644 --- a/python/packages/server/src/officeconvert_server/service.py +++ b/python/packages/server/src/officeconvert_server/service.py @@ -182,6 +182,9 @@ class ConversionServiceImpl(conversion_connect.ConversionService): convert_pptx_to_slidedeck, source_path, work_dir, + dpi=self._config.conversion_image_dpi, + pptx_to_pdf_timeout_s=self._config.conversion_pptx_to_pdf_timeout_seconds, + pdf_to_images_timeout_s=self._config.conversion_pdf_to_images_timeout_seconds, ) session.slide_deck = await asyncio.to_thread( self._upload_and_build_slide_deck,