explicit conversion timeout and handling
This commit is contained in:
@@ -4,4 +4,7 @@ S3_USE_SSL=false
|
|||||||
S3_ACCESS_KEY=minioadmin
|
S3_ACCESS_KEY=minioadmin
|
||||||
S3_SECRET_KEY=minioadmin
|
S3_SECRET_KEY=minioadmin
|
||||||
S3_SESSION_TTL_SECONDS=3600
|
S3_SESSION_TTL_SECONDS=3600
|
||||||
|
CONVERSION_IMAGE_DPI=150
|
||||||
|
CONVERSION_PPTX_TO_PDF_TIMEOUT_SECONDS=180
|
||||||
|
CONVERSION_PDF_TO_IMAGES_TIMEOUT_SECONDS=600
|
||||||
CONVERSION_CLEANUP_DELAY_SECONDS=3600
|
CONVERSION_CLEANUP_DELAY_SECONDS=3600
|
||||||
|
|||||||
@@ -35,5 +35,8 @@ run-server:
|
|||||||
export S3_ACCESS_KEY="$${S3_ACCESS_KEY:-minioadmin}"; \
|
export S3_ACCESS_KEY="$${S3_ACCESS_KEY:-minioadmin}"; \
|
||||||
export S3_SECRET_KEY="$${S3_SECRET_KEY:-minioadmin}"; \
|
export S3_SECRET_KEY="$${S3_SECRET_KEY:-minioadmin}"; \
|
||||||
export S3_SESSION_TTL_SECONDS="$${S3_SESSION_TTL_SECONDS:-3600}"; \
|
export S3_SESSION_TTL_SECONDS="$${S3_SESSION_TTL_SECONDS:-3600}"; \
|
||||||
|
export CONVERSION_IMAGE_DPI="$${CONVERSION_IMAGE_DPI:-150}"; \
|
||||||
|
export CONVERSION_PPTX_TO_PDF_TIMEOUT_SECONDS="$${CONVERSION_PPTX_TO_PDF_TIMEOUT_SECONDS:-180}"; \
|
||||||
|
export CONVERSION_PDF_TO_IMAGES_TIMEOUT_SECONDS="$${CONVERSION_PDF_TO_IMAGES_TIMEOUT_SECONDS:-600}"; \
|
||||||
export CONVERSION_CLEANUP_DELAY_SECONDS="$${CONVERSION_CLEANUP_DELAY_SECONDS:-3600}"; \
|
export CONVERSION_CLEANUP_DELAY_SECONDS="$${CONVERSION_CLEANUP_DELAY_SECONDS:-3600}"; \
|
||||||
uv run --project python uvicorn officeconvert_server.app:app --host "$${UVICORN_HOST:-0.0.0.0}" --port "$${UVICORN_PORT:-8080}"
|
uv run --project python --package officeconvert-server python -m uvicorn officeconvert_server.app:app --host "$${UVICORN_HOST:-0.0.0.0}" --port "$${UVICORN_PORT:-8080}"
|
||||||
|
|||||||
@@ -94,6 +94,7 @@ make run-server
|
|||||||
- defaults S3 endpoint to `localhost:8333` for host-based development
|
- defaults S3 endpoint to `localhost:8333` for host-based development
|
||||||
- auto-normalizes `seaweedfs:8333` to `localhost:8333` for host runs
|
- auto-normalizes `seaweedfs:8333` to `localhost:8333` for host runs
|
||||||
- supports optional `UVICORN_HOST` and `UVICORN_PORT` overrides
|
- supports optional `UVICORN_HOST` and `UVICORN_PORT` overrides
|
||||||
|
- exposes conversion tuning vars (`CONVERSION_IMAGE_DPI`, `CONVERSION_PPTX_TO_PDF_TIMEOUT_SECONDS`, `CONVERSION_PDF_TO_IMAGES_TIMEOUT_SECONDS`)
|
||||||
|
|
||||||
Server endpoint base URL:
|
Server endpoint base URL:
|
||||||
|
|
||||||
@@ -133,3 +134,11 @@ Use `.env.example` as your baseline env configuration.
|
|||||||
- This project defaults to **SeaweedFS S3 API** for object transit in development and compose deployments.
|
- This project defaults to **SeaweedFS S3 API** for object transit in development and compose deployments.
|
||||||
- The Python server uses the `minio` Python SDK, which is intentional because SeaweedFS is S3-compatible.
|
- The Python server uses the `minio` Python SDK, which is intentional because SeaweedFS is S3-compatible.
|
||||||
- Runtime configuration uses `S3_*` environment variables.
|
- Runtime configuration uses `S3_*` environment variables.
|
||||||
|
|
||||||
|
## Conversion Tuning Notes
|
||||||
|
|
||||||
|
If conversion fails on larger decks, tune these environment variables:
|
||||||
|
|
||||||
|
- `CONVERSION_IMAGE_DPI` (default `150`): lower values reduce image generation time.
|
||||||
|
- `CONVERSION_PPTX_TO_PDF_TIMEOUT_SECONDS` (default `180`): timeout for LibreOffice export.
|
||||||
|
- `CONVERSION_PDF_TO_IMAGES_TIMEOUT_SECONDS` (default `600`): timeout for Poppler rasterization.
|
||||||
|
|||||||
@@ -27,6 +27,9 @@ services:
|
|||||||
S3_ACCESS_KEY: ${S3_ACCESS_KEY:-minioadmin}
|
S3_ACCESS_KEY: ${S3_ACCESS_KEY:-minioadmin}
|
||||||
S3_SECRET_KEY: ${S3_SECRET_KEY:-minioadmin}
|
S3_SECRET_KEY: ${S3_SECRET_KEY:-minioadmin}
|
||||||
S3_SESSION_TTL_SECONDS: ${S3_SESSION_TTL_SECONDS:-3600}
|
S3_SESSION_TTL_SECONDS: ${S3_SESSION_TTL_SECONDS:-3600}
|
||||||
|
CONVERSION_IMAGE_DPI: ${CONVERSION_IMAGE_DPI:-150}
|
||||||
|
CONVERSION_PPTX_TO_PDF_TIMEOUT_SECONDS: ${CONVERSION_PPTX_TO_PDF_TIMEOUT_SECONDS:-180}
|
||||||
|
CONVERSION_PDF_TO_IMAGES_TIMEOUT_SECONDS: ${CONVERSION_PDF_TO_IMAGES_TIMEOUT_SECONDS:-600}
|
||||||
CONVERSION_CLEANUP_DELAY_SECONDS: ${CONVERSION_CLEANUP_DELAY_SECONDS:-3600}
|
CONVERSION_CLEANUP_DELAY_SECONDS: ${CONVERSION_CLEANUP_DELAY_SECONDS:-3600}
|
||||||
ports:
|
ports:
|
||||||
- "8080:8080"
|
- "8080:8080"
|
||||||
|
|||||||
@@ -57,13 +57,19 @@ def convert_pptx_to_pdf(pptx_path: Path, pdf_path: Path, *, timeout_s: int = 120
|
|||||||
str(output_dir),
|
str(output_dir),
|
||||||
str(pptx_path.resolve()),
|
str(pptx_path.resolve()),
|
||||||
]
|
]
|
||||||
completed = subprocess.run(
|
try:
|
||||||
command,
|
completed = subprocess.run(
|
||||||
check=False,
|
command,
|
||||||
capture_output=True,
|
check=False,
|
||||||
text=True,
|
capture_output=True,
|
||||||
timeout=timeout_s,
|
text=True,
|
||||||
)
|
timeout=timeout_s,
|
||||||
|
)
|
||||||
|
except subprocess.TimeoutExpired as exc:
|
||||||
|
raise RuntimeError(
|
||||||
|
"LibreOffice conversion timed out after "
|
||||||
|
f"{timeout_s} seconds while rendering {pptx_path.name}"
|
||||||
|
) from exc
|
||||||
if completed.returncode != 0:
|
if completed.returncode != 0:
|
||||||
raise RuntimeError(
|
raise RuntimeError(
|
||||||
f"LibreOffice conversion failed: {completed.stderr.strip() or completed.stdout.strip()}"
|
f"LibreOffice conversion failed: {completed.stderr.strip() or completed.stdout.strip()}"
|
||||||
@@ -116,13 +122,20 @@ def render_pdf_to_images(
|
|||||||
str(pdf_path.resolve()),
|
str(pdf_path.resolve()),
|
||||||
str(prefix_path),
|
str(prefix_path),
|
||||||
]
|
]
|
||||||
completed = subprocess.run(
|
try:
|
||||||
command,
|
completed = subprocess.run(
|
||||||
check=False,
|
command,
|
||||||
capture_output=True,
|
check=False,
|
||||||
text=True,
|
capture_output=True,
|
||||||
timeout=timeout_s,
|
text=True,
|
||||||
)
|
timeout=timeout_s,
|
||||||
|
)
|
||||||
|
except subprocess.TimeoutExpired as exc:
|
||||||
|
raise RuntimeError(
|
||||||
|
"Poppler rasterization timed out after "
|
||||||
|
f"{timeout_s} seconds while rendering {pdf_path.name}; "
|
||||||
|
"increase conversion PDF render timeout or lower image DPI"
|
||||||
|
) from exc
|
||||||
if completed.returncode != 0:
|
if completed.returncode != 0:
|
||||||
raise RuntimeError(
|
raise RuntimeError(
|
||||||
f"Poppler rasterization failed: {completed.stderr.strip() or completed.stdout.strip()}"
|
f"Poppler rasterization failed: {completed.stderr.strip() or completed.stdout.strip()}"
|
||||||
@@ -165,6 +178,8 @@ def convert_pptx_to_slidedeck(
|
|||||||
*,
|
*,
|
||||||
dpi: int = 180,
|
dpi: int = 180,
|
||||||
image_format: str = "png",
|
image_format: str = "png",
|
||||||
|
pptx_to_pdf_timeout_s: int = 180,
|
||||||
|
pdf_to_images_timeout_s: int = 600,
|
||||||
) -> SlideDeckResult:
|
) -> SlideDeckResult:
|
||||||
"""Convert a PPTX into rendered images and extracted notes.
|
"""Convert a PPTX into rendered images and extracted notes.
|
||||||
|
|
||||||
@@ -177,6 +192,8 @@ def convert_pptx_to_slidedeck(
|
|||||||
work_dir: Scratch directory for generated outputs.
|
work_dir: Scratch directory for generated outputs.
|
||||||
dpi: Rasterization DPI for output slide images.
|
dpi: Rasterization DPI for output slide images.
|
||||||
image_format: Output image format accepted by `pdftoppm`.
|
image_format: Output image format accepted by `pdftoppm`.
|
||||||
|
pptx_to_pdf_timeout_s: Timeout in seconds for the LibreOffice subprocess.
|
||||||
|
pdf_to_images_timeout_s: Timeout in seconds for the Poppler subprocess.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Fully materialized `SlideDeckResult` with local image paths.
|
Fully materialized `SlideDeckResult` with local image paths.
|
||||||
@@ -189,12 +206,13 @@ def convert_pptx_to_slidedeck(
|
|||||||
pdf_path = work_dir / f"{pptx_path.stem}.pdf"
|
pdf_path = work_dir / f"{pptx_path.stem}.pdf"
|
||||||
image_dir = work_dir / "slides"
|
image_dir = work_dir / "slides"
|
||||||
|
|
||||||
convert_pptx_to_pdf(pptx_path, pdf_path)
|
convert_pptx_to_pdf(pptx_path, pdf_path, timeout_s=pptx_to_pdf_timeout_s)
|
||||||
image_paths = render_pdf_to_images(
|
image_paths = render_pdf_to_images(
|
||||||
pdf_path,
|
pdf_path,
|
||||||
image_dir,
|
image_dir,
|
||||||
dpi=dpi,
|
dpi=dpi,
|
||||||
image_format=image_format,
|
image_format=image_format,
|
||||||
|
timeout_s=pdf_to_images_timeout_s,
|
||||||
)
|
)
|
||||||
notes = extract_slide_notes(pptx_path)
|
notes = extract_slide_notes(pptx_path)
|
||||||
|
|
||||||
|
|||||||
@@ -16,6 +16,9 @@ class ServerConfig:
|
|||||||
s3_secure: bool
|
s3_secure: bool
|
||||||
s3_public_endpoint: str
|
s3_public_endpoint: str
|
||||||
s3_session_ttl_seconds: int
|
s3_session_ttl_seconds: int
|
||||||
|
conversion_image_dpi: int
|
||||||
|
conversion_pptx_to_pdf_timeout_seconds: int
|
||||||
|
conversion_pdf_to_images_timeout_seconds: int
|
||||||
conversion_cleanup_delay_seconds: int
|
conversion_cleanup_delay_seconds: int
|
||||||
|
|
||||||
|
|
||||||
@@ -28,6 +31,13 @@ def load_server_config() -> ServerConfig:
|
|||||||
s3_secure=os.getenv("S3_USE_SSL", "false").lower() == "true",
|
s3_secure=os.getenv("S3_USE_SSL", "false").lower() == "true",
|
||||||
s3_public_endpoint=os.getenv("S3_PUBLIC_ENDPOINT", "localhost:8333"),
|
s3_public_endpoint=os.getenv("S3_PUBLIC_ENDPOINT", "localhost:8333"),
|
||||||
s3_session_ttl_seconds=int(os.getenv("S3_SESSION_TTL_SECONDS", "3600")),
|
s3_session_ttl_seconds=int(os.getenv("S3_SESSION_TTL_SECONDS", "3600")),
|
||||||
|
conversion_image_dpi=int(os.getenv("CONVERSION_IMAGE_DPI", "150")),
|
||||||
|
conversion_pptx_to_pdf_timeout_seconds=int(
|
||||||
|
os.getenv("CONVERSION_PPTX_TO_PDF_TIMEOUT_SECONDS", "180")
|
||||||
|
),
|
||||||
|
conversion_pdf_to_images_timeout_seconds=int(
|
||||||
|
os.getenv("CONVERSION_PDF_TO_IMAGES_TIMEOUT_SECONDS", "600")
|
||||||
|
),
|
||||||
conversion_cleanup_delay_seconds=int(
|
conversion_cleanup_delay_seconds=int(
|
||||||
os.getenv("CONVERSION_CLEANUP_DELAY_SECONDS", "3600")
|
os.getenv("CONVERSION_CLEANUP_DELAY_SECONDS", "3600")
|
||||||
),
|
),
|
||||||
|
|||||||
@@ -182,6 +182,9 @@ class ConversionServiceImpl(conversion_connect.ConversionService):
|
|||||||
convert_pptx_to_slidedeck,
|
convert_pptx_to_slidedeck,
|
||||||
source_path,
|
source_path,
|
||||||
work_dir,
|
work_dir,
|
||||||
|
dpi=self._config.conversion_image_dpi,
|
||||||
|
pptx_to_pdf_timeout_s=self._config.conversion_pptx_to_pdf_timeout_seconds,
|
||||||
|
pdf_to_images_timeout_s=self._config.conversion_pdf_to_images_timeout_seconds,
|
||||||
)
|
)
|
||||||
session.slide_deck = await asyncio.to_thread(
|
session.slide_deck = await asyncio.to_thread(
|
||||||
self._upload_and_build_slide_deck,
|
self._upload_and_build_slide_deck,
|
||||||
|
|||||||
Reference in New Issue
Block a user