explicit conversion timeout and handling
This commit is contained in:
@@ -57,13 +57,19 @@ def convert_pptx_to_pdf(pptx_path: Path, pdf_path: Path, *, timeout_s: int = 120
|
||||
str(output_dir),
|
||||
str(pptx_path.resolve()),
|
||||
]
|
||||
completed = subprocess.run(
|
||||
command,
|
||||
check=False,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=timeout_s,
|
||||
)
|
||||
try:
|
||||
completed = subprocess.run(
|
||||
command,
|
||||
check=False,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=timeout_s,
|
||||
)
|
||||
except subprocess.TimeoutExpired as exc:
|
||||
raise RuntimeError(
|
||||
"LibreOffice conversion timed out after "
|
||||
f"{timeout_s} seconds while rendering {pptx_path.name}"
|
||||
) from exc
|
||||
if completed.returncode != 0:
|
||||
raise RuntimeError(
|
||||
f"LibreOffice conversion failed: {completed.stderr.strip() or completed.stdout.strip()}"
|
||||
@@ -116,13 +122,20 @@ def render_pdf_to_images(
|
||||
str(pdf_path.resolve()),
|
||||
str(prefix_path),
|
||||
]
|
||||
completed = subprocess.run(
|
||||
command,
|
||||
check=False,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=timeout_s,
|
||||
)
|
||||
try:
|
||||
completed = subprocess.run(
|
||||
command,
|
||||
check=False,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=timeout_s,
|
||||
)
|
||||
except subprocess.TimeoutExpired as exc:
|
||||
raise RuntimeError(
|
||||
"Poppler rasterization timed out after "
|
||||
f"{timeout_s} seconds while rendering {pdf_path.name}; "
|
||||
"increase conversion PDF render timeout or lower image DPI"
|
||||
) from exc
|
||||
if completed.returncode != 0:
|
||||
raise RuntimeError(
|
||||
f"Poppler rasterization failed: {completed.stderr.strip() or completed.stdout.strip()}"
|
||||
@@ -165,6 +178,8 @@ def convert_pptx_to_slidedeck(
|
||||
*,
|
||||
dpi: int = 180,
|
||||
image_format: str = "png",
|
||||
pptx_to_pdf_timeout_s: int = 180,
|
||||
pdf_to_images_timeout_s: int = 600,
|
||||
) -> SlideDeckResult:
|
||||
"""Convert a PPTX into rendered images and extracted notes.
|
||||
|
||||
@@ -177,6 +192,8 @@ def convert_pptx_to_slidedeck(
|
||||
work_dir: Scratch directory for generated outputs.
|
||||
dpi: Rasterization DPI for output slide images.
|
||||
image_format: Output image format accepted by `pdftoppm`.
|
||||
pptx_to_pdf_timeout_s: Timeout in seconds for the LibreOffice subprocess.
|
||||
pdf_to_images_timeout_s: Timeout in seconds for the Poppler subprocess.
|
||||
|
||||
Returns:
|
||||
Fully materialized `SlideDeckResult` with local image paths.
|
||||
@@ -189,12 +206,13 @@ def convert_pptx_to_slidedeck(
|
||||
pdf_path = work_dir / f"{pptx_path.stem}.pdf"
|
||||
image_dir = work_dir / "slides"
|
||||
|
||||
convert_pptx_to_pdf(pptx_path, pdf_path)
|
||||
convert_pptx_to_pdf(pptx_path, pdf_path, timeout_s=pptx_to_pdf_timeout_s)
|
||||
image_paths = render_pdf_to_images(
|
||||
pdf_path,
|
||||
image_dir,
|
||||
dpi=dpi,
|
||||
image_format=image_format,
|
||||
timeout_s=pdf_to_images_timeout_s,
|
||||
)
|
||||
notes = extract_slide_notes(pptx_path)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user