add conversion phase & progress metrics, timeout heuristics
This commit is contained in:
@@ -19,6 +19,10 @@ class ServerConfig:
|
||||
conversion_image_dpi: int
|
||||
conversion_pptx_to_pdf_timeout_seconds: int
|
||||
conversion_pdf_to_images_timeout_seconds: int
|
||||
conversion_pptx_to_pdf_base_timeout_seconds: int
|
||||
conversion_pptx_to_pdf_per_slide_timeout_seconds: int
|
||||
conversion_pdf_to_images_base_timeout_seconds: int
|
||||
conversion_pdf_to_images_per_slide_timeout_seconds: int
|
||||
conversion_cleanup_delay_seconds: int
|
||||
|
||||
|
||||
@@ -31,13 +35,25 @@ def load_server_config() -> ServerConfig:
|
||||
s3_secure=os.getenv("S3_USE_SSL", "false").lower() == "true",
|
||||
s3_public_endpoint=os.getenv("S3_PUBLIC_ENDPOINT", "localhost:8333"),
|
||||
s3_session_ttl_seconds=int(os.getenv("S3_SESSION_TTL_SECONDS", "3600")),
|
||||
conversion_image_dpi=int(os.getenv("CONVERSION_IMAGE_DPI", "150")),
|
||||
conversion_image_dpi=int(os.getenv("CONVERSION_IMAGE_DPI", "72")),
|
||||
conversion_pptx_to_pdf_timeout_seconds=int(
|
||||
os.getenv("CONVERSION_PPTX_TO_PDF_TIMEOUT_SECONDS", "180")
|
||||
),
|
||||
conversion_pdf_to_images_timeout_seconds=int(
|
||||
os.getenv("CONVERSION_PDF_TO_IMAGES_TIMEOUT_SECONDS", "600")
|
||||
),
|
||||
conversion_pptx_to_pdf_base_timeout_seconds=int(
|
||||
os.getenv("CONVERSION_PPTX_TO_PDF_BASE_TIMEOUT_SECONDS", "45")
|
||||
),
|
||||
conversion_pptx_to_pdf_per_slide_timeout_seconds=int(
|
||||
os.getenv("CONVERSION_PPTX_TO_PDF_PER_SLIDE_TIMEOUT_SECONDS", "3")
|
||||
),
|
||||
conversion_pdf_to_images_base_timeout_seconds=int(
|
||||
os.getenv("CONVERSION_PDF_TO_IMAGES_BASE_TIMEOUT_SECONDS", "30")
|
||||
),
|
||||
conversion_pdf_to_images_per_slide_timeout_seconds=int(
|
||||
os.getenv("CONVERSION_PDF_TO_IMAGES_PER_SLIDE_TIMEOUT_SECONDS", "8")
|
||||
),
|
||||
conversion_cleanup_delay_seconds=int(
|
||||
os.getenv("CONVERSION_CLEANUP_DELAY_SECONDS", "3600")
|
||||
),
|
||||
|
||||
@@ -22,6 +22,9 @@ class ConversionSession:
|
||||
bucket_name: str
|
||||
upload_object_key: str
|
||||
status: conversion_pb2.ConversionStatus
|
||||
phase: conversion_pb2.ConversionPhase = conversion_pb2.CONVERSION_PHASE_INACTIVE
|
||||
current_progress: int = 0
|
||||
max_progress: int = 0
|
||||
created_at: datetime = field(default_factory=utc_now)
|
||||
updated_at: datetime = field(default_factory=utc_now)
|
||||
error_message: str = ""
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
from collections.abc import Callable
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from pathlib import Path
|
||||
import shutil
|
||||
@@ -14,6 +15,11 @@ from connectrpc.errors import ConnectError
|
||||
from connectrpc.request import RequestContext
|
||||
from google.protobuf.timestamp_pb2 import Timestamp
|
||||
from officeconvert import SlideArtifact, convert_pptx_to_slidedeck
|
||||
from officeconvert.conversion import (
|
||||
PHASE_EXTRACTING_NOTES,
|
||||
PHASE_PDF_TO_IMAGES,
|
||||
PHASE_PPTX_TO_PDF,
|
||||
)
|
||||
from officeconvertapi.v1 import conversion_connect, conversion_pb2
|
||||
|
||||
from officeconvert_server.config import ServerConfig
|
||||
@@ -98,6 +104,10 @@ class ConversionServiceImpl(conversion_connect.ConversionService):
|
||||
)
|
||||
|
||||
session.status = conversion_pb2.CONVERSION_STATUS_RUNNING
|
||||
session.phase = conversion_pb2.CONVERSION_PHASE_INACTIVE
|
||||
session.current_progress = 0
|
||||
session.max_progress = 0
|
||||
session.error_message = ""
|
||||
session.updated_at = utc_now()
|
||||
session.conversion_task = asyncio.create_task(self._run_conversion(session))
|
||||
|
||||
@@ -119,6 +129,9 @@ class ConversionServiceImpl(conversion_connect.ConversionService):
|
||||
status=session.status,
|
||||
error_message=session.error_message,
|
||||
updated_at=_to_timestamp(session.updated_at),
|
||||
phase=session.phase,
|
||||
current_progress=session.current_progress,
|
||||
max_progress=session.max_progress,
|
||||
)
|
||||
|
||||
async def get_slide_deck(
|
||||
@@ -185,22 +198,47 @@ class ConversionServiceImpl(conversion_connect.ConversionService):
|
||||
dpi=self._config.conversion_image_dpi,
|
||||
pptx_to_pdf_timeout_s=self._config.conversion_pptx_to_pdf_timeout_seconds,
|
||||
pdf_to_images_timeout_s=self._config.conversion_pdf_to_images_timeout_seconds,
|
||||
pptx_to_pdf_base_timeout_s=self._config.conversion_pptx_to_pdf_base_timeout_seconds,
|
||||
pptx_to_pdf_per_slide_timeout_s=self._config.conversion_pptx_to_pdf_per_slide_timeout_seconds,
|
||||
pdf_to_images_base_timeout_s=self._config.conversion_pdf_to_images_base_timeout_seconds,
|
||||
pdf_to_images_per_slide_timeout_s=self._config.conversion_pdf_to_images_per_slide_timeout_seconds,
|
||||
progress_callback=lambda phase_name, current, max_value: self._set_session_progress_from_name(
|
||||
session,
|
||||
phase_name=phase_name,
|
||||
current_progress=current,
|
||||
max_progress=max_value,
|
||||
),
|
||||
)
|
||||
self._set_session_progress(
|
||||
session,
|
||||
phase=conversion_pb2.CONVERSION_PHASE_UPLOADING_RESULTS,
|
||||
current_progress=0,
|
||||
max_progress=len(result.slides),
|
||||
)
|
||||
session.slide_deck = await asyncio.to_thread(
|
||||
self._upload_and_build_slide_deck,
|
||||
session,
|
||||
result.slides,
|
||||
result.source_filename,
|
||||
lambda current, max_value: self._set_session_progress(
|
||||
session,
|
||||
phase=conversion_pb2.CONVERSION_PHASE_UPLOADING_RESULTS,
|
||||
current_progress=current,
|
||||
max_progress=max_value,
|
||||
),
|
||||
)
|
||||
session.status = conversion_pb2.CONVERSION_STATUS_SUCCEEDED
|
||||
session.phase = conversion_pb2.CONVERSION_PHASE_INACTIVE
|
||||
session.updated_at = utc_now()
|
||||
except asyncio.CancelledError:
|
||||
session.status = conversion_pb2.CONVERSION_STATUS_FAILED
|
||||
session.phase = conversion_pb2.CONVERSION_PHASE_INACTIVE
|
||||
session.error_message = "conversion cancelled"
|
||||
session.updated_at = utc_now()
|
||||
raise
|
||||
except Exception as exc:
|
||||
session.status = conversion_pb2.CONVERSION_STATUS_FAILED
|
||||
session.phase = conversion_pb2.CONVERSION_PHASE_INACTIVE
|
||||
session.error_message = str(exc)
|
||||
session.updated_at = utc_now()
|
||||
finally:
|
||||
@@ -212,10 +250,12 @@ class ConversionServiceImpl(conversion_connect.ConversionService):
|
||||
session: ConversionSession,
|
||||
slides: list[SlideArtifact],
|
||||
source_filename: str,
|
||||
progress_callback: Callable[[int, int], None] | None = None,
|
||||
) -> conversion_pb2.SlideDeck:
|
||||
"""Upload generated slide images and construct API response payload."""
|
||||
response_slides: list[conversion_pb2.Slide] = []
|
||||
for slide in slides:
|
||||
slide_total = len(slides)
|
||||
for slide_index, slide in enumerate(slides, start=1):
|
||||
object_key = f"output/slide-{slide.index:04d}{slide.image_path.suffix}"
|
||||
self._store.fput_object(session.bucket_name, object_key, slide.image_path)
|
||||
image_url = self._store.presigned_get_url(
|
||||
@@ -230,6 +270,8 @@ class ConversionServiceImpl(conversion_connect.ConversionService):
|
||||
image_url=image_url,
|
||||
)
|
||||
)
|
||||
if progress_callback is not None:
|
||||
progress_callback(slide_index, slide_total)
|
||||
|
||||
return conversion_pb2.SlideDeck(
|
||||
conversion_id=session.conversion_id,
|
||||
@@ -263,6 +305,45 @@ class ConversionServiceImpl(conversion_connect.ConversionService):
|
||||
raise ConnectError(Code.NOT_FOUND, "conversion_id not found")
|
||||
return session
|
||||
|
||||
def _set_session_progress_from_name(
|
||||
self,
|
||||
session: ConversionSession,
|
||||
*,
|
||||
phase_name: str,
|
||||
current_progress: int,
|
||||
max_progress: int,
|
||||
) -> None:
|
||||
"""Map conversion-library phase names onto API enum phases."""
|
||||
phase_map = {
|
||||
PHASE_EXTRACTING_NOTES: conversion_pb2.CONVERSION_PHASE_EXTRACTING_NOTES,
|
||||
PHASE_PPTX_TO_PDF: conversion_pb2.CONVERSION_PHASE_PPTX_TO_PDF,
|
||||
PHASE_PDF_TO_IMAGES: conversion_pb2.CONVERSION_PHASE_PDF_TO_IMAGES,
|
||||
}
|
||||
self._set_session_progress(
|
||||
session,
|
||||
phase=phase_map.get(phase_name, conversion_pb2.CONVERSION_PHASE_INACTIVE),
|
||||
current_progress=current_progress,
|
||||
max_progress=max_progress,
|
||||
)
|
||||
|
||||
def _set_session_progress(
|
||||
self,
|
||||
session: ConversionSession,
|
||||
*,
|
||||
phase: conversion_pb2.ConversionPhase,
|
||||
current_progress: int,
|
||||
max_progress: int,
|
||||
) -> None:
|
||||
"""Set normalized phase/progress counters and touch update timestamp."""
|
||||
normalized_max = max(0, max_progress)
|
||||
normalized_current = max(0, current_progress)
|
||||
if normalized_max > 0:
|
||||
normalized_current = min(normalized_current, normalized_max)
|
||||
session.phase = phase
|
||||
session.current_progress = normalized_current
|
||||
session.max_progress = normalized_max
|
||||
session.updated_at = utc_now()
|
||||
|
||||
|
||||
def _to_timestamp(value: datetime) -> Timestamp:
|
||||
"""Convert a timezone-aware datetime to protobuf Timestamp."""
|
||||
|
||||
Reference in New Issue
Block a user