add logs at start, end, and errors

This commit is contained in:
2026-03-26 23:56:53 -07:00
parent fa261c90ee
commit f18e9da2dd
3 changed files with 116 additions and 11 deletions
@@ -4,6 +4,7 @@ from __future__ import annotations
from collections.abc import Callable from collections.abc import Callable
from dataclasses import dataclass from dataclasses import dataclass
import logging
from pathlib import Path from pathlib import Path
import subprocess import subprocess
from typing import Iterable from typing import Iterable
@@ -35,6 +36,12 @@ PHASE_EXTRACTING_NOTES = "extracting_notes"
PHASE_PPTX_TO_PDF = "pptx_to_pdf" PHASE_PPTX_TO_PDF = "pptx_to_pdf"
PHASE_PDF_TO_IMAGES = "pdf_to_images" PHASE_PDF_TO_IMAGES = "pdf_to_images"
logger = logging.getLogger("uvicorn.error")
class ConversionTimeoutError(RuntimeError):
"""Raised when a conversion subprocess exceeds the configured timeout."""
def convert_pptx_to_pdf(pptx_path: Path, pdf_path: Path, *, timeout_s: int = 120) -> Path: def convert_pptx_to_pdf(pptx_path: Path, pdf_path: Path, *, timeout_s: int = 120) -> Path:
"""Convert a PPTX file to PDF using headless LibreOffice. """Convert a PPTX file to PDF using headless LibreOffice.
@@ -75,10 +82,12 @@ def convert_pptx_to_pdf(pptx_path: Path, pdf_path: Path, *, timeout_s: int = 120
timeout=timeout_s, timeout=timeout_s,
) )
except subprocess.TimeoutExpired as exc: except subprocess.TimeoutExpired as exc:
raise RuntimeError( message = (
"LibreOffice conversion timed out after " "LibreOffice conversion timed out after "
f"{timeout_s} seconds while rendering {pptx_path.name}" f"{timeout_s} seconds while rendering {pptx_path.name}"
) from exc )
logger.error(message, exc_info=True)
raise ConversionTimeoutError(message) from exc
if completed.returncode != 0: if completed.returncode != 0:
raise RuntimeError( raise RuntimeError(
f"LibreOffice conversion failed: {completed.stderr.strip() or completed.stdout.strip()}" f"LibreOffice conversion failed: {completed.stderr.strip() or completed.stdout.strip()}"
@@ -144,11 +153,13 @@ def render_pdf_to_images(
timeout=timeout_s, timeout=timeout_s,
) )
except subprocess.TimeoutExpired as exc: except subprocess.TimeoutExpired as exc:
raise RuntimeError( message = (
"Poppler rasterization timed out after " "Poppler rasterization timed out after "
f"{timeout_s} seconds while rendering {pdf_path.name}; " f"{timeout_s} seconds while rendering {pdf_path.name}; "
"increase conversion PDF render timeout cap or lower image DPI" "increase conversion PDF render timeout cap or lower image DPI"
) from exc )
logger.error(message, exc_info=True)
raise ConversionTimeoutError(message) from exc
if completed.returncode != 0: if completed.returncode != 0:
raise RuntimeError( raise RuntimeError(
f"Poppler rasterization failed: {completed.stderr.strip() or completed.stdout.strip()}" f"Poppler rasterization failed: {completed.stderr.strip() or completed.stdout.strip()}"
@@ -188,11 +199,13 @@ def render_pdf_to_images(
if operation_timeout_s is not None if operation_timeout_s is not None
else f"per-page timeout {timeout_s}s" else f"per-page timeout {timeout_s}s"
) )
raise RuntimeError( message = (
"Poppler rasterization timed out while rendering page " "Poppler rasterization timed out while rendering page "
f"{page_index}/{total_pages} of {pdf_path.name}; " f"{page_index}/{total_pages} of {pdf_path.name}; "
f"{timeout_context}. Increase timeout settings or lower image DPI." f"{timeout_context}. Increase timeout settings or lower image DPI."
) from exc )
logger.error(message, exc_info=True)
raise ConversionTimeoutError(message) from exc
if completed.returncode != 0: if completed.returncode != 0:
raise RuntimeError( raise RuntimeError(
"Poppler rasterization failed on page " "Poppler rasterization failed on page "
@@ -298,16 +311,28 @@ def convert_pptx_to_slidedeck(
_emit_progress(progress_callback, PHASE_PPTX_TO_PDF, 1, 1) _emit_progress(progress_callback, PHASE_PPTX_TO_PDF, 1, 1)
_emit_progress(progress_callback, PHASE_PDF_TO_IMAGES, 0, slide_count) _emit_progress(progress_callback, PHASE_PDF_TO_IMAGES, 0, slide_count)
pdf_to_images_page_timeout = _compute_page_timeout(
total_timeout_s=pdf_to_images_timeout,
page_count=slide_count,
base_timeout_s=pdf_to_images_base_timeout_s,
)
logger.info(
"Conversion plan source=%s slides=%d dpi=%d image_format=%s "
"computed_timeouts_s[pptx_to_pdf_total=%d,pdf_to_images_total=%d,pdf_to_images_per_page=%d]",
pptx_path.name,
slide_count,
dpi,
image_format,
pptx_to_pdf_timeout,
pdf_to_images_timeout,
pdf_to_images_page_timeout,
)
image_paths = render_pdf_to_images( image_paths = render_pdf_to_images(
pdf_path, pdf_path,
image_dir, image_dir,
dpi=dpi, dpi=dpi,
image_format=image_format, image_format=image_format,
timeout_s=_compute_page_timeout( timeout_s=pdf_to_images_page_timeout,
total_timeout_s=pdf_to_images_timeout,
page_count=slide_count,
base_timeout_s=pdf_to_images_base_timeout_s,
),
total_pages=slide_count, total_pages=slide_count,
operation_timeout_s=pdf_to_images_timeout, operation_timeout_s=pdf_to_images_timeout,
page_progress_callback=lambda current, max_pages: _emit_progress( page_progress_callback=lambda current, max_pages: _emit_progress(
@@ -2,15 +2,45 @@
from __future__ import annotations from __future__ import annotations
import logging
import os
from officeconvertapi.v1.conversion_connect import ConversionServiceASGIApplication from officeconvertapi.v1.conversion_connect import ConversionServiceASGIApplication
from officeconvert_server.config import load_server_config from officeconvert_server.config import load_server_config
from officeconvert_server.service import ConversionServiceImpl from officeconvert_server.service import ConversionServiceImpl
from officeconvert_server.storage import S3Store from officeconvert_server.storage import S3Store
logger = logging.getLogger(__name__)
def _configure_application_logging() -> None:
"""Ensure application loggers emit through uvicorn's configured handlers."""
level_name = os.getenv("OFFICECONVERT_LOG_LEVEL", "INFO").upper()
level = logging.getLevelName(level_name)
if not isinstance(level, int):
level = logging.INFO
uvicorn_error_logger = logging.getLogger("uvicorn.error")
uvicorn_handlers = list(uvicorn_error_logger.handlers)
for logger_name in ("officeconvert", "officeconvert_server"):
app_logger = logging.getLogger(logger_name)
app_logger.setLevel(level)
if uvicorn_handlers:
# Route application logs through uvicorn's stderr handlers.
app_logger.handlers = uvicorn_handlers
app_logger.propagate = False
logger.info(
"Application logging configured officeconvert_log_level=%s uvicorn_handlers=%d",
logging.getLevelName(level),
len(uvicorn_handlers),
)
def create_app() -> ConversionServiceASGIApplication: def create_app() -> ConversionServiceASGIApplication:
"""Construct and return the configured Connect ASGI application.""" """Construct and return the configured Connect ASGI application."""
_configure_application_logging()
config = load_server_config() config = load_server_config()
store = S3Store( store = S3Store(
endpoint=config.s3_endpoint, endpoint=config.s3_endpoint,
@@ -5,9 +5,11 @@ from __future__ import annotations
import asyncio import asyncio
from collections.abc import Callable from collections.abc import Callable
from datetime import datetime, timedelta, timezone from datetime import datetime, timedelta, timezone
import logging
from pathlib import Path from pathlib import Path
import shutil import shutil
import tempfile import tempfile
import time
import uuid import uuid
from connectrpc.code import Code from connectrpc.code import Code
@@ -16,6 +18,7 @@ from connectrpc.request import RequestContext
from google.protobuf.timestamp_pb2 import Timestamp from google.protobuf.timestamp_pb2 import Timestamp
from officeconvert import SlideArtifact, convert_pptx_to_slidedeck from officeconvert import SlideArtifact, convert_pptx_to_slidedeck
from officeconvert.conversion import ( from officeconvert.conversion import (
ConversionTimeoutError,
PHASE_EXTRACTING_NOTES, PHASE_EXTRACTING_NOTES,
PHASE_PDF_TO_IMAGES, PHASE_PDF_TO_IMAGES,
PHASE_PPTX_TO_PDF, PHASE_PPTX_TO_PDF,
@@ -26,6 +29,8 @@ from officeconvert_server.config import ServerConfig
from officeconvert_server.models import ConversionSession, utc_now from officeconvert_server.models import ConversionSession, utc_now
from officeconvert_server.storage import S3Store from officeconvert_server.storage import S3Store
logger = logging.getLogger("uvicorn.error")
class ConversionServiceImpl(conversion_connect.ConversionService): class ConversionServiceImpl(conversion_connect.ConversionService):
"""Implements the conversion API with in-memory state and S3 orchestration.""" """Implements the conversion API with in-memory state and S3 orchestration."""
@@ -179,6 +184,16 @@ class ConversionServiceImpl(conversion_connect.ConversionService):
async def _run_conversion(self, session: ConversionSession) -> None: async def _run_conversion(self, session: ConversionSession) -> None:
"""Execute conversion flow and persist terminal state in memory.""" """Execute conversion flow and persist terminal state in memory."""
started_at = time.monotonic()
logger.info(
"Starting conversion conversion_id=%s source_filename=%s dpi=%d "
"timeout_caps_s[pptx_to_pdf_total=%d,pdf_to_images_total=%d]",
session.conversion_id,
session.source_filename,
self._config.conversion_image_dpi,
self._config.conversion_pptx_to_pdf_timeout_seconds,
self._config.conversion_pdf_to_images_timeout_seconds,
)
work_dir = Path( work_dir = Path(
tempfile.mkdtemp(prefix=f"officeconvert-{session.conversion_id}-") tempfile.mkdtemp(prefix=f"officeconvert-{session.conversion_id}-")
).resolve() ).resolve()
@@ -230,17 +245,52 @@ class ConversionServiceImpl(conversion_connect.ConversionService):
session.status = conversion_pb2.CONVERSION_STATUS_SUCCEEDED session.status = conversion_pb2.CONVERSION_STATUS_SUCCEEDED
session.phase = conversion_pb2.CONVERSION_PHASE_INACTIVE session.phase = conversion_pb2.CONVERSION_PHASE_INACTIVE
session.updated_at = utc_now() session.updated_at = utc_now()
elapsed_s = time.monotonic() - started_at
logger.info(
"Conversion succeeded conversion_id=%s source_filename=%s slides=%d elapsed_s=%.3f",
session.conversion_id,
session.source_filename,
len(result.slides),
elapsed_s,
)
except asyncio.CancelledError: except asyncio.CancelledError:
session.status = conversion_pb2.CONVERSION_STATUS_FAILED session.status = conversion_pb2.CONVERSION_STATUS_FAILED
session.phase = conversion_pb2.CONVERSION_PHASE_INACTIVE session.phase = conversion_pb2.CONVERSION_PHASE_INACTIVE
session.error_message = "conversion cancelled" session.error_message = "conversion cancelled"
session.updated_at = utc_now() session.updated_at = utc_now()
elapsed_s = time.monotonic() - started_at
logger.warning(
"Conversion cancelled conversion_id=%s source_filename=%s elapsed_s=%.3f",
session.conversion_id,
session.source_filename,
elapsed_s,
)
raise raise
except ConversionTimeoutError as exc:
session.status = conversion_pb2.CONVERSION_STATUS_FAILED
session.phase = conversion_pb2.CONVERSION_PHASE_INACTIVE
session.error_message = str(exc)
session.updated_at = utc_now()
elapsed_s = time.monotonic() - started_at
logger.error(
"Conversion timed out conversion_id=%s source_filename=%s elapsed_s=%.3f error=%s",
session.conversion_id,
session.source_filename,
elapsed_s,
exc,
)
except Exception as exc: except Exception as exc:
session.status = conversion_pb2.CONVERSION_STATUS_FAILED session.status = conversion_pb2.CONVERSION_STATUS_FAILED
session.phase = conversion_pb2.CONVERSION_PHASE_INACTIVE session.phase = conversion_pb2.CONVERSION_PHASE_INACTIVE
session.error_message = str(exc) session.error_message = str(exc)
session.updated_at = utc_now() session.updated_at = utc_now()
elapsed_s = time.monotonic() - started_at
logger.exception(
"Conversion failed conversion_id=%s source_filename=%s elapsed_s=%.3f",
session.conversion_id,
session.source_filename,
elapsed_s,
)
finally: finally:
await self._cleanup_local_artifacts(session) await self._cleanup_local_artifacts(session)
session.cleanup_task = asyncio.create_task(self._delayed_cleanup(session)) session.cleanup_task = asyncio.create_task(self._delayed_cleanup(session))