add logs at start, end, and errors

This commit is contained in:
2026-03-26 23:56:53 -07:00
parent fa261c90ee
commit f18e9da2dd
3 changed files with 116 additions and 11 deletions
@@ -4,6 +4,7 @@ from __future__ import annotations
from collections.abc import Callable
from dataclasses import dataclass
import logging
from pathlib import Path
import subprocess
from typing import Iterable
@@ -35,6 +36,12 @@ PHASE_EXTRACTING_NOTES = "extracting_notes"
PHASE_PPTX_TO_PDF = "pptx_to_pdf"
PHASE_PDF_TO_IMAGES = "pdf_to_images"
logger = logging.getLogger("uvicorn.error")
class ConversionTimeoutError(RuntimeError):
"""Raised when a conversion subprocess exceeds the configured timeout."""
def convert_pptx_to_pdf(pptx_path: Path, pdf_path: Path, *, timeout_s: int = 120) -> Path:
"""Convert a PPTX file to PDF using headless LibreOffice.
@@ -75,10 +82,12 @@ def convert_pptx_to_pdf(pptx_path: Path, pdf_path: Path, *, timeout_s: int = 120
timeout=timeout_s,
)
except subprocess.TimeoutExpired as exc:
raise RuntimeError(
message = (
"LibreOffice conversion timed out after "
f"{timeout_s} seconds while rendering {pptx_path.name}"
) from exc
)
logger.error(message, exc_info=True)
raise ConversionTimeoutError(message) from exc
if completed.returncode != 0:
raise RuntimeError(
f"LibreOffice conversion failed: {completed.stderr.strip() or completed.stdout.strip()}"
@@ -144,11 +153,13 @@ def render_pdf_to_images(
timeout=timeout_s,
)
except subprocess.TimeoutExpired as exc:
raise RuntimeError(
message = (
"Poppler rasterization timed out after "
f"{timeout_s} seconds while rendering {pdf_path.name}; "
"increase conversion PDF render timeout cap or lower image DPI"
) from exc
)
logger.error(message, exc_info=True)
raise ConversionTimeoutError(message) from exc
if completed.returncode != 0:
raise RuntimeError(
f"Poppler rasterization failed: {completed.stderr.strip() or completed.stdout.strip()}"
@@ -188,11 +199,13 @@ def render_pdf_to_images(
if operation_timeout_s is not None
else f"per-page timeout {timeout_s}s"
)
raise RuntimeError(
message = (
"Poppler rasterization timed out while rendering page "
f"{page_index}/{total_pages} of {pdf_path.name}; "
f"{timeout_context}. Increase timeout settings or lower image DPI."
) from exc
)
logger.error(message, exc_info=True)
raise ConversionTimeoutError(message) from exc
if completed.returncode != 0:
raise RuntimeError(
"Poppler rasterization failed on page "
@@ -298,16 +311,28 @@ def convert_pptx_to_slidedeck(
_emit_progress(progress_callback, PHASE_PPTX_TO_PDF, 1, 1)
_emit_progress(progress_callback, PHASE_PDF_TO_IMAGES, 0, slide_count)
pdf_to_images_page_timeout = _compute_page_timeout(
total_timeout_s=pdf_to_images_timeout,
page_count=slide_count,
base_timeout_s=pdf_to_images_base_timeout_s,
)
logger.info(
"Conversion plan source=%s slides=%d dpi=%d image_format=%s "
"computed_timeouts_s[pptx_to_pdf_total=%d,pdf_to_images_total=%d,pdf_to_images_per_page=%d]",
pptx_path.name,
slide_count,
dpi,
image_format,
pptx_to_pdf_timeout,
pdf_to_images_timeout,
pdf_to_images_page_timeout,
)
image_paths = render_pdf_to_images(
pdf_path,
image_dir,
dpi=dpi,
image_format=image_format,
timeout_s=_compute_page_timeout(
total_timeout_s=pdf_to_images_timeout,
page_count=slide_count,
base_timeout_s=pdf_to_images_base_timeout_s,
),
timeout_s=pdf_to_images_page_timeout,
total_pages=slide_count,
operation_timeout_s=pdf_to_images_timeout,
page_progress_callback=lambda current, max_pages: _emit_progress(
@@ -2,15 +2,45 @@
from __future__ import annotations
import logging
import os
from officeconvertapi.v1.conversion_connect import ConversionServiceASGIApplication
from officeconvert_server.config import load_server_config
from officeconvert_server.service import ConversionServiceImpl
from officeconvert_server.storage import S3Store
logger = logging.getLogger(__name__)
def _configure_application_logging() -> None:
"""Ensure application loggers emit through uvicorn's configured handlers."""
level_name = os.getenv("OFFICECONVERT_LOG_LEVEL", "INFO").upper()
level = logging.getLevelName(level_name)
if not isinstance(level, int):
level = logging.INFO
uvicorn_error_logger = logging.getLogger("uvicorn.error")
uvicorn_handlers = list(uvicorn_error_logger.handlers)
for logger_name in ("officeconvert", "officeconvert_server"):
app_logger = logging.getLogger(logger_name)
app_logger.setLevel(level)
if uvicorn_handlers:
# Route application logs through uvicorn's stderr handlers.
app_logger.handlers = uvicorn_handlers
app_logger.propagate = False
logger.info(
"Application logging configured officeconvert_log_level=%s uvicorn_handlers=%d",
logging.getLevelName(level),
len(uvicorn_handlers),
)
def create_app() -> ConversionServiceASGIApplication:
"""Construct and return the configured Connect ASGI application."""
_configure_application_logging()
config = load_server_config()
store = S3Store(
endpoint=config.s3_endpoint,
@@ -5,9 +5,11 @@ from __future__ import annotations
import asyncio
from collections.abc import Callable
from datetime import datetime, timedelta, timezone
import logging
from pathlib import Path
import shutil
import tempfile
import time
import uuid
from connectrpc.code import Code
@@ -16,6 +18,7 @@ from connectrpc.request import RequestContext
from google.protobuf.timestamp_pb2 import Timestamp
from officeconvert import SlideArtifact, convert_pptx_to_slidedeck
from officeconvert.conversion import (
ConversionTimeoutError,
PHASE_EXTRACTING_NOTES,
PHASE_PDF_TO_IMAGES,
PHASE_PPTX_TO_PDF,
@@ -26,6 +29,8 @@ from officeconvert_server.config import ServerConfig
from officeconvert_server.models import ConversionSession, utc_now
from officeconvert_server.storage import S3Store
logger = logging.getLogger("uvicorn.error")
class ConversionServiceImpl(conversion_connect.ConversionService):
"""Implements the conversion API with in-memory state and S3 orchestration."""
@@ -179,6 +184,16 @@ class ConversionServiceImpl(conversion_connect.ConversionService):
async def _run_conversion(self, session: ConversionSession) -> None:
"""Execute conversion flow and persist terminal state in memory."""
started_at = time.monotonic()
logger.info(
"Starting conversion conversion_id=%s source_filename=%s dpi=%d "
"timeout_caps_s[pptx_to_pdf_total=%d,pdf_to_images_total=%d]",
session.conversion_id,
session.source_filename,
self._config.conversion_image_dpi,
self._config.conversion_pptx_to_pdf_timeout_seconds,
self._config.conversion_pdf_to_images_timeout_seconds,
)
work_dir = Path(
tempfile.mkdtemp(prefix=f"officeconvert-{session.conversion_id}-")
).resolve()
@@ -230,17 +245,52 @@ class ConversionServiceImpl(conversion_connect.ConversionService):
session.status = conversion_pb2.CONVERSION_STATUS_SUCCEEDED
session.phase = conversion_pb2.CONVERSION_PHASE_INACTIVE
session.updated_at = utc_now()
elapsed_s = time.monotonic() - started_at
logger.info(
"Conversion succeeded conversion_id=%s source_filename=%s slides=%d elapsed_s=%.3f",
session.conversion_id,
session.source_filename,
len(result.slides),
elapsed_s,
)
except asyncio.CancelledError:
session.status = conversion_pb2.CONVERSION_STATUS_FAILED
session.phase = conversion_pb2.CONVERSION_PHASE_INACTIVE
session.error_message = "conversion cancelled"
session.updated_at = utc_now()
elapsed_s = time.monotonic() - started_at
logger.warning(
"Conversion cancelled conversion_id=%s source_filename=%s elapsed_s=%.3f",
session.conversion_id,
session.source_filename,
elapsed_s,
)
raise
except ConversionTimeoutError as exc:
session.status = conversion_pb2.CONVERSION_STATUS_FAILED
session.phase = conversion_pb2.CONVERSION_PHASE_INACTIVE
session.error_message = str(exc)
session.updated_at = utc_now()
elapsed_s = time.monotonic() - started_at
logger.error(
"Conversion timed out conversion_id=%s source_filename=%s elapsed_s=%.3f error=%s",
session.conversion_id,
session.source_filename,
elapsed_s,
exc,
)
except Exception as exc:
session.status = conversion_pb2.CONVERSION_STATUS_FAILED
session.phase = conversion_pb2.CONVERSION_PHASE_INACTIVE
session.error_message = str(exc)
session.updated_at = utc_now()
elapsed_s = time.monotonic() - started_at
logger.exception(
"Conversion failed conversion_id=%s source_filename=%s elapsed_s=%.3f",
session.conversion_id,
session.source_filename,
elapsed_s,
)
finally:
await self._cleanup_local_artifacts(session)
session.cleanup_task = asyncio.create_task(self._delayed_cleanup(session))