add conversion phase & progress metrics, timeout heuristics

This commit is contained in:
2026-03-26 23:30:25 -07:00
parent 26452aa57c
commit baf87ee195
12 changed files with 468 additions and 96 deletions
@@ -3,6 +3,7 @@
from __future__ import annotations
import asyncio
from collections.abc import Callable
from datetime import datetime, timedelta, timezone
from pathlib import Path
import shutil
@@ -14,6 +15,11 @@ from connectrpc.errors import ConnectError
from connectrpc.request import RequestContext
from google.protobuf.timestamp_pb2 import Timestamp
from officeconvert import SlideArtifact, convert_pptx_to_slidedeck
from officeconvert.conversion import (
PHASE_EXTRACTING_NOTES,
PHASE_PDF_TO_IMAGES,
PHASE_PPTX_TO_PDF,
)
from officeconvertapi.v1 import conversion_connect, conversion_pb2
from officeconvert_server.config import ServerConfig
@@ -98,6 +104,10 @@ class ConversionServiceImpl(conversion_connect.ConversionService):
)
session.status = conversion_pb2.CONVERSION_STATUS_RUNNING
session.phase = conversion_pb2.CONVERSION_PHASE_INACTIVE
session.current_progress = 0
session.max_progress = 0
session.error_message = ""
session.updated_at = utc_now()
session.conversion_task = asyncio.create_task(self._run_conversion(session))
@@ -119,6 +129,9 @@ class ConversionServiceImpl(conversion_connect.ConversionService):
status=session.status,
error_message=session.error_message,
updated_at=_to_timestamp(session.updated_at),
phase=session.phase,
current_progress=session.current_progress,
max_progress=session.max_progress,
)
async def get_slide_deck(
@@ -185,22 +198,47 @@ class ConversionServiceImpl(conversion_connect.ConversionService):
dpi=self._config.conversion_image_dpi,
pptx_to_pdf_timeout_s=self._config.conversion_pptx_to_pdf_timeout_seconds,
pdf_to_images_timeout_s=self._config.conversion_pdf_to_images_timeout_seconds,
pptx_to_pdf_base_timeout_s=self._config.conversion_pptx_to_pdf_base_timeout_seconds,
pptx_to_pdf_per_slide_timeout_s=self._config.conversion_pptx_to_pdf_per_slide_timeout_seconds,
pdf_to_images_base_timeout_s=self._config.conversion_pdf_to_images_base_timeout_seconds,
pdf_to_images_per_slide_timeout_s=self._config.conversion_pdf_to_images_per_slide_timeout_seconds,
progress_callback=lambda phase_name, current, max_value: self._set_session_progress_from_name(
session,
phase_name=phase_name,
current_progress=current,
max_progress=max_value,
),
)
self._set_session_progress(
session,
phase=conversion_pb2.CONVERSION_PHASE_UPLOADING_RESULTS,
current_progress=0,
max_progress=len(result.slides),
)
session.slide_deck = await asyncio.to_thread(
self._upload_and_build_slide_deck,
session,
result.slides,
result.source_filename,
lambda current, max_value: self._set_session_progress(
session,
phase=conversion_pb2.CONVERSION_PHASE_UPLOADING_RESULTS,
current_progress=current,
max_progress=max_value,
),
)
session.status = conversion_pb2.CONVERSION_STATUS_SUCCEEDED
session.phase = conversion_pb2.CONVERSION_PHASE_INACTIVE
session.updated_at = utc_now()
except asyncio.CancelledError:
session.status = conversion_pb2.CONVERSION_STATUS_FAILED
session.phase = conversion_pb2.CONVERSION_PHASE_INACTIVE
session.error_message = "conversion cancelled"
session.updated_at = utc_now()
raise
except Exception as exc:
session.status = conversion_pb2.CONVERSION_STATUS_FAILED
session.phase = conversion_pb2.CONVERSION_PHASE_INACTIVE
session.error_message = str(exc)
session.updated_at = utc_now()
finally:
@@ -212,10 +250,12 @@ class ConversionServiceImpl(conversion_connect.ConversionService):
session: ConversionSession,
slides: list[SlideArtifact],
source_filename: str,
progress_callback: Callable[[int, int], None] | None = None,
) -> conversion_pb2.SlideDeck:
"""Upload generated slide images and construct API response payload."""
response_slides: list[conversion_pb2.Slide] = []
for slide in slides:
slide_total = len(slides)
for slide_index, slide in enumerate(slides, start=1):
object_key = f"output/slide-{slide.index:04d}{slide.image_path.suffix}"
self._store.fput_object(session.bucket_name, object_key, slide.image_path)
image_url = self._store.presigned_get_url(
@@ -230,6 +270,8 @@ class ConversionServiceImpl(conversion_connect.ConversionService):
image_url=image_url,
)
)
if progress_callback is not None:
progress_callback(slide_index, slide_total)
return conversion_pb2.SlideDeck(
conversion_id=session.conversion_id,
@@ -263,6 +305,45 @@ class ConversionServiceImpl(conversion_connect.ConversionService):
raise ConnectError(Code.NOT_FOUND, "conversion_id not found")
return session
def _set_session_progress_from_name(
self,
session: ConversionSession,
*,
phase_name: str,
current_progress: int,
max_progress: int,
) -> None:
"""Map conversion-library phase names onto API enum phases."""
phase_map = {
PHASE_EXTRACTING_NOTES: conversion_pb2.CONVERSION_PHASE_EXTRACTING_NOTES,
PHASE_PPTX_TO_PDF: conversion_pb2.CONVERSION_PHASE_PPTX_TO_PDF,
PHASE_PDF_TO_IMAGES: conversion_pb2.CONVERSION_PHASE_PDF_TO_IMAGES,
}
self._set_session_progress(
session,
phase=phase_map.get(phase_name, conversion_pb2.CONVERSION_PHASE_INACTIVE),
current_progress=current_progress,
max_progress=max_progress,
)
def _set_session_progress(
self,
session: ConversionSession,
*,
phase: conversion_pb2.ConversionPhase,
current_progress: int,
max_progress: int,
) -> None:
"""Set normalized phase/progress counters and touch update timestamp."""
normalized_max = max(0, max_progress)
normalized_current = max(0, current_progress)
if normalized_max > 0:
normalized_current = min(normalized_current, normalized_max)
session.phase = phase
session.current_progress = normalized_current
session.max_progress = normalized_max
session.updated_at = utc_now()
def _to_timestamp(value: datetime) -> Timestamp:
"""Convert a timezone-aware datetime to protobuf Timestamp."""