add logs at start, end, and errors

This commit is contained in:
2026-03-26 23:56:53 -07:00
parent fa261c90ee
commit f18e9da2dd
3 changed files with 116 additions and 11 deletions
@@ -4,6 +4,7 @@ from __future__ import annotations
from collections.abc import Callable
from dataclasses import dataclass
import logging
from pathlib import Path
import subprocess
from typing import Iterable
@@ -35,6 +36,12 @@ PHASE_EXTRACTING_NOTES = "extracting_notes"
PHASE_PPTX_TO_PDF = "pptx_to_pdf"
PHASE_PDF_TO_IMAGES = "pdf_to_images"
logger = logging.getLogger("uvicorn.error")
class ConversionTimeoutError(RuntimeError):
"""Raised when a conversion subprocess exceeds the configured timeout."""
def convert_pptx_to_pdf(pptx_path: Path, pdf_path: Path, *, timeout_s: int = 120) -> Path:
"""Convert a PPTX file to PDF using headless LibreOffice.
@@ -75,10 +82,12 @@ def convert_pptx_to_pdf(pptx_path: Path, pdf_path: Path, *, timeout_s: int = 120
timeout=timeout_s,
)
except subprocess.TimeoutExpired as exc:
raise RuntimeError(
message = (
"LibreOffice conversion timed out after "
f"{timeout_s} seconds while rendering {pptx_path.name}"
) from exc
)
logger.error(message, exc_info=True)
raise ConversionTimeoutError(message) from exc
if completed.returncode != 0:
raise RuntimeError(
f"LibreOffice conversion failed: {completed.stderr.strip() or completed.stdout.strip()}"
@@ -144,11 +153,13 @@ def render_pdf_to_images(
timeout=timeout_s,
)
except subprocess.TimeoutExpired as exc:
raise RuntimeError(
message = (
"Poppler rasterization timed out after "
f"{timeout_s} seconds while rendering {pdf_path.name}; "
"increase conversion PDF render timeout cap or lower image DPI"
) from exc
)
logger.error(message, exc_info=True)
raise ConversionTimeoutError(message) from exc
if completed.returncode != 0:
raise RuntimeError(
f"Poppler rasterization failed: {completed.stderr.strip() or completed.stdout.strip()}"
@@ -188,11 +199,13 @@ def render_pdf_to_images(
if operation_timeout_s is not None
else f"per-page timeout {timeout_s}s"
)
raise RuntimeError(
message = (
"Poppler rasterization timed out while rendering page "
f"{page_index}/{total_pages} of {pdf_path.name}; "
f"{timeout_context}. Increase timeout settings or lower image DPI."
) from exc
)
logger.error(message, exc_info=True)
raise ConversionTimeoutError(message) from exc
if completed.returncode != 0:
raise RuntimeError(
"Poppler rasterization failed on page "
@@ -298,16 +311,28 @@ def convert_pptx_to_slidedeck(
_emit_progress(progress_callback, PHASE_PPTX_TO_PDF, 1, 1)
_emit_progress(progress_callback, PHASE_PDF_TO_IMAGES, 0, slide_count)
pdf_to_images_page_timeout = _compute_page_timeout(
total_timeout_s=pdf_to_images_timeout,
page_count=slide_count,
base_timeout_s=pdf_to_images_base_timeout_s,
)
logger.info(
"Conversion plan source=%s slides=%d dpi=%d image_format=%s "
"computed_timeouts_s[pptx_to_pdf_total=%d,pdf_to_images_total=%d,pdf_to_images_per_page=%d]",
pptx_path.name,
slide_count,
dpi,
image_format,
pptx_to_pdf_timeout,
pdf_to_images_timeout,
pdf_to_images_page_timeout,
)
image_paths = render_pdf_to_images(
pdf_path,
image_dir,
dpi=dpi,
image_format=image_format,
timeout_s=_compute_page_timeout(
total_timeout_s=pdf_to_images_timeout,
page_count=slide_count,
base_timeout_s=pdf_to_images_base_timeout_s,
),
timeout_s=pdf_to_images_page_timeout,
total_pages=slide_count,
operation_timeout_s=pdf_to_images_timeout,
page_progress_callback=lambda current, max_pages: _emit_progress(