diff --git a/python/packages/officeconvert/src/officeconvert/conversion.py b/python/packages/officeconvert/src/officeconvert/conversion.py index 7ec2828..f95e42e 100644 --- a/python/packages/officeconvert/src/officeconvert/conversion.py +++ b/python/packages/officeconvert/src/officeconvert/conversion.py @@ -4,6 +4,7 @@ from __future__ import annotations from collections.abc import Callable from dataclasses import dataclass +import logging from pathlib import Path import subprocess from typing import Iterable @@ -35,6 +36,12 @@ PHASE_EXTRACTING_NOTES = "extracting_notes" PHASE_PPTX_TO_PDF = "pptx_to_pdf" PHASE_PDF_TO_IMAGES = "pdf_to_images" +logger = logging.getLogger("uvicorn.error") + + +class ConversionTimeoutError(RuntimeError): + """Raised when a conversion subprocess exceeds the configured timeout.""" + def convert_pptx_to_pdf(pptx_path: Path, pdf_path: Path, *, timeout_s: int = 120) -> Path: """Convert a PPTX file to PDF using headless LibreOffice. @@ -75,10 +82,12 @@ def convert_pptx_to_pdf(pptx_path: Path, pdf_path: Path, *, timeout_s: int = 120 timeout=timeout_s, ) except subprocess.TimeoutExpired as exc: - raise RuntimeError( + message = ( "LibreOffice conversion timed out after " f"{timeout_s} seconds while rendering {pptx_path.name}" - ) from exc + ) + logger.error(message, exc_info=True) + raise ConversionTimeoutError(message) from exc if completed.returncode != 0: raise RuntimeError( f"LibreOffice conversion failed: {completed.stderr.strip() or completed.stdout.strip()}" @@ -144,11 +153,13 @@ def render_pdf_to_images( timeout=timeout_s, ) except subprocess.TimeoutExpired as exc: - raise RuntimeError( + message = ( "Poppler rasterization timed out after " f"{timeout_s} seconds while rendering {pdf_path.name}; " "increase conversion PDF render timeout cap or lower image DPI" - ) from exc + ) + logger.error(message, exc_info=True) + raise ConversionTimeoutError(message) from exc if completed.returncode != 0: raise RuntimeError( f"Poppler rasterization failed: {completed.stderr.strip() or completed.stdout.strip()}" @@ -188,11 +199,13 @@ def render_pdf_to_images( if operation_timeout_s is not None else f"per-page timeout {timeout_s}s" ) - raise RuntimeError( + message = ( "Poppler rasterization timed out while rendering page " f"{page_index}/{total_pages} of {pdf_path.name}; " f"{timeout_context}. Increase timeout settings or lower image DPI." - ) from exc + ) + logger.error(message, exc_info=True) + raise ConversionTimeoutError(message) from exc if completed.returncode != 0: raise RuntimeError( "Poppler rasterization failed on page " @@ -298,16 +311,28 @@ def convert_pptx_to_slidedeck( _emit_progress(progress_callback, PHASE_PPTX_TO_PDF, 1, 1) _emit_progress(progress_callback, PHASE_PDF_TO_IMAGES, 0, slide_count) + pdf_to_images_page_timeout = _compute_page_timeout( + total_timeout_s=pdf_to_images_timeout, + page_count=slide_count, + base_timeout_s=pdf_to_images_base_timeout_s, + ) + logger.info( + "Conversion plan source=%s slides=%d dpi=%d image_format=%s " + "computed_timeouts_s[pptx_to_pdf_total=%d,pdf_to_images_total=%d,pdf_to_images_per_page=%d]", + pptx_path.name, + slide_count, + dpi, + image_format, + pptx_to_pdf_timeout, + pdf_to_images_timeout, + pdf_to_images_page_timeout, + ) image_paths = render_pdf_to_images( pdf_path, image_dir, dpi=dpi, image_format=image_format, - timeout_s=_compute_page_timeout( - total_timeout_s=pdf_to_images_timeout, - page_count=slide_count, - base_timeout_s=pdf_to_images_base_timeout_s, - ), + timeout_s=pdf_to_images_page_timeout, total_pages=slide_count, operation_timeout_s=pdf_to_images_timeout, page_progress_callback=lambda current, max_pages: _emit_progress( diff --git a/python/packages/server/src/officeconvert_server/app.py b/python/packages/server/src/officeconvert_server/app.py index c4bfb09..bbc6cec 100644 --- a/python/packages/server/src/officeconvert_server/app.py +++ b/python/packages/server/src/officeconvert_server/app.py @@ -2,15 +2,45 @@ from __future__ import annotations +import logging +import os + from officeconvertapi.v1.conversion_connect import ConversionServiceASGIApplication from officeconvert_server.config import load_server_config from officeconvert_server.service import ConversionServiceImpl from officeconvert_server.storage import S3Store +logger = logging.getLogger(__name__) + + +def _configure_application_logging() -> None: + """Ensure application loggers emit through uvicorn's configured handlers.""" + level_name = os.getenv("OFFICECONVERT_LOG_LEVEL", "INFO").upper() + level = logging.getLevelName(level_name) + if not isinstance(level, int): + level = logging.INFO + uvicorn_error_logger = logging.getLogger("uvicorn.error") + uvicorn_handlers = list(uvicorn_error_logger.handlers) + + for logger_name in ("officeconvert", "officeconvert_server"): + app_logger = logging.getLogger(logger_name) + app_logger.setLevel(level) + if uvicorn_handlers: + # Route application logs through uvicorn's stderr handlers. + app_logger.handlers = uvicorn_handlers + app_logger.propagate = False + + logger.info( + "Application logging configured officeconvert_log_level=%s uvicorn_handlers=%d", + logging.getLevelName(level), + len(uvicorn_handlers), + ) + def create_app() -> ConversionServiceASGIApplication: """Construct and return the configured Connect ASGI application.""" + _configure_application_logging() config = load_server_config() store = S3Store( endpoint=config.s3_endpoint, diff --git a/python/packages/server/src/officeconvert_server/service.py b/python/packages/server/src/officeconvert_server/service.py index 80b8fcb..540ca30 100644 --- a/python/packages/server/src/officeconvert_server/service.py +++ b/python/packages/server/src/officeconvert_server/service.py @@ -5,9 +5,11 @@ from __future__ import annotations import asyncio from collections.abc import Callable from datetime import datetime, timedelta, timezone +import logging from pathlib import Path import shutil import tempfile +import time import uuid from connectrpc.code import Code @@ -16,6 +18,7 @@ from connectrpc.request import RequestContext from google.protobuf.timestamp_pb2 import Timestamp from officeconvert import SlideArtifact, convert_pptx_to_slidedeck from officeconvert.conversion import ( + ConversionTimeoutError, PHASE_EXTRACTING_NOTES, PHASE_PDF_TO_IMAGES, PHASE_PPTX_TO_PDF, @@ -26,6 +29,8 @@ from officeconvert_server.config import ServerConfig from officeconvert_server.models import ConversionSession, utc_now from officeconvert_server.storage import S3Store +logger = logging.getLogger("uvicorn.error") + class ConversionServiceImpl(conversion_connect.ConversionService): """Implements the conversion API with in-memory state and S3 orchestration.""" @@ -179,6 +184,16 @@ class ConversionServiceImpl(conversion_connect.ConversionService): async def _run_conversion(self, session: ConversionSession) -> None: """Execute conversion flow and persist terminal state in memory.""" + started_at = time.monotonic() + logger.info( + "Starting conversion conversion_id=%s source_filename=%s dpi=%d " + "timeout_caps_s[pptx_to_pdf_total=%d,pdf_to_images_total=%d]", + session.conversion_id, + session.source_filename, + self._config.conversion_image_dpi, + self._config.conversion_pptx_to_pdf_timeout_seconds, + self._config.conversion_pdf_to_images_timeout_seconds, + ) work_dir = Path( tempfile.mkdtemp(prefix=f"officeconvert-{session.conversion_id}-") ).resolve() @@ -230,17 +245,52 @@ class ConversionServiceImpl(conversion_connect.ConversionService): session.status = conversion_pb2.CONVERSION_STATUS_SUCCEEDED session.phase = conversion_pb2.CONVERSION_PHASE_INACTIVE session.updated_at = utc_now() + elapsed_s = time.monotonic() - started_at + logger.info( + "Conversion succeeded conversion_id=%s source_filename=%s slides=%d elapsed_s=%.3f", + session.conversion_id, + session.source_filename, + len(result.slides), + elapsed_s, + ) except asyncio.CancelledError: session.status = conversion_pb2.CONVERSION_STATUS_FAILED session.phase = conversion_pb2.CONVERSION_PHASE_INACTIVE session.error_message = "conversion cancelled" session.updated_at = utc_now() + elapsed_s = time.monotonic() - started_at + logger.warning( + "Conversion cancelled conversion_id=%s source_filename=%s elapsed_s=%.3f", + session.conversion_id, + session.source_filename, + elapsed_s, + ) raise + except ConversionTimeoutError as exc: + session.status = conversion_pb2.CONVERSION_STATUS_FAILED + session.phase = conversion_pb2.CONVERSION_PHASE_INACTIVE + session.error_message = str(exc) + session.updated_at = utc_now() + elapsed_s = time.monotonic() - started_at + logger.error( + "Conversion timed out conversion_id=%s source_filename=%s elapsed_s=%.3f error=%s", + session.conversion_id, + session.source_filename, + elapsed_s, + exc, + ) except Exception as exc: session.status = conversion_pb2.CONVERSION_STATUS_FAILED session.phase = conversion_pb2.CONVERSION_PHASE_INACTIVE session.error_message = str(exc) session.updated_at = utc_now() + elapsed_s = time.monotonic() - started_at + logger.exception( + "Conversion failed conversion_id=%s source_filename=%s elapsed_s=%.3f", + session.conversion_id, + session.source_filename, + elapsed_s, + ) finally: await self._cleanup_local_artifacts(session) session.cleanup_task = asyncio.create_task(self._delayed_cleanup(session))