add logs at start, end, and errors
This commit is contained in:
@@ -4,6 +4,7 @@ from __future__ import annotations
|
|||||||
|
|
||||||
from collections.abc import Callable
|
from collections.abc import Callable
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
|
import logging
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import subprocess
|
import subprocess
|
||||||
from typing import Iterable
|
from typing import Iterable
|
||||||
@@ -35,6 +36,12 @@ PHASE_EXTRACTING_NOTES = "extracting_notes"
|
|||||||
PHASE_PPTX_TO_PDF = "pptx_to_pdf"
|
PHASE_PPTX_TO_PDF = "pptx_to_pdf"
|
||||||
PHASE_PDF_TO_IMAGES = "pdf_to_images"
|
PHASE_PDF_TO_IMAGES = "pdf_to_images"
|
||||||
|
|
||||||
|
logger = logging.getLogger("uvicorn.error")
|
||||||
|
|
||||||
|
|
||||||
|
class ConversionTimeoutError(RuntimeError):
|
||||||
|
"""Raised when a conversion subprocess exceeds the configured timeout."""
|
||||||
|
|
||||||
|
|
||||||
def convert_pptx_to_pdf(pptx_path: Path, pdf_path: Path, *, timeout_s: int = 120) -> Path:
|
def convert_pptx_to_pdf(pptx_path: Path, pdf_path: Path, *, timeout_s: int = 120) -> Path:
|
||||||
"""Convert a PPTX file to PDF using headless LibreOffice.
|
"""Convert a PPTX file to PDF using headless LibreOffice.
|
||||||
@@ -75,10 +82,12 @@ def convert_pptx_to_pdf(pptx_path: Path, pdf_path: Path, *, timeout_s: int = 120
|
|||||||
timeout=timeout_s,
|
timeout=timeout_s,
|
||||||
)
|
)
|
||||||
except subprocess.TimeoutExpired as exc:
|
except subprocess.TimeoutExpired as exc:
|
||||||
raise RuntimeError(
|
message = (
|
||||||
"LibreOffice conversion timed out after "
|
"LibreOffice conversion timed out after "
|
||||||
f"{timeout_s} seconds while rendering {pptx_path.name}"
|
f"{timeout_s} seconds while rendering {pptx_path.name}"
|
||||||
) from exc
|
)
|
||||||
|
logger.error(message, exc_info=True)
|
||||||
|
raise ConversionTimeoutError(message) from exc
|
||||||
if completed.returncode != 0:
|
if completed.returncode != 0:
|
||||||
raise RuntimeError(
|
raise RuntimeError(
|
||||||
f"LibreOffice conversion failed: {completed.stderr.strip() or completed.stdout.strip()}"
|
f"LibreOffice conversion failed: {completed.stderr.strip() or completed.stdout.strip()}"
|
||||||
@@ -144,11 +153,13 @@ def render_pdf_to_images(
|
|||||||
timeout=timeout_s,
|
timeout=timeout_s,
|
||||||
)
|
)
|
||||||
except subprocess.TimeoutExpired as exc:
|
except subprocess.TimeoutExpired as exc:
|
||||||
raise RuntimeError(
|
message = (
|
||||||
"Poppler rasterization timed out after "
|
"Poppler rasterization timed out after "
|
||||||
f"{timeout_s} seconds while rendering {pdf_path.name}; "
|
f"{timeout_s} seconds while rendering {pdf_path.name}; "
|
||||||
"increase conversion PDF render timeout cap or lower image DPI"
|
"increase conversion PDF render timeout cap or lower image DPI"
|
||||||
) from exc
|
)
|
||||||
|
logger.error(message, exc_info=True)
|
||||||
|
raise ConversionTimeoutError(message) from exc
|
||||||
if completed.returncode != 0:
|
if completed.returncode != 0:
|
||||||
raise RuntimeError(
|
raise RuntimeError(
|
||||||
f"Poppler rasterization failed: {completed.stderr.strip() or completed.stdout.strip()}"
|
f"Poppler rasterization failed: {completed.stderr.strip() or completed.stdout.strip()}"
|
||||||
@@ -188,11 +199,13 @@ def render_pdf_to_images(
|
|||||||
if operation_timeout_s is not None
|
if operation_timeout_s is not None
|
||||||
else f"per-page timeout {timeout_s}s"
|
else f"per-page timeout {timeout_s}s"
|
||||||
)
|
)
|
||||||
raise RuntimeError(
|
message = (
|
||||||
"Poppler rasterization timed out while rendering page "
|
"Poppler rasterization timed out while rendering page "
|
||||||
f"{page_index}/{total_pages} of {pdf_path.name}; "
|
f"{page_index}/{total_pages} of {pdf_path.name}; "
|
||||||
f"{timeout_context}. Increase timeout settings or lower image DPI."
|
f"{timeout_context}. Increase timeout settings or lower image DPI."
|
||||||
) from exc
|
)
|
||||||
|
logger.error(message, exc_info=True)
|
||||||
|
raise ConversionTimeoutError(message) from exc
|
||||||
if completed.returncode != 0:
|
if completed.returncode != 0:
|
||||||
raise RuntimeError(
|
raise RuntimeError(
|
||||||
"Poppler rasterization failed on page "
|
"Poppler rasterization failed on page "
|
||||||
@@ -298,16 +311,28 @@ def convert_pptx_to_slidedeck(
|
|||||||
_emit_progress(progress_callback, PHASE_PPTX_TO_PDF, 1, 1)
|
_emit_progress(progress_callback, PHASE_PPTX_TO_PDF, 1, 1)
|
||||||
|
|
||||||
_emit_progress(progress_callback, PHASE_PDF_TO_IMAGES, 0, slide_count)
|
_emit_progress(progress_callback, PHASE_PDF_TO_IMAGES, 0, slide_count)
|
||||||
|
pdf_to_images_page_timeout = _compute_page_timeout(
|
||||||
|
total_timeout_s=pdf_to_images_timeout,
|
||||||
|
page_count=slide_count,
|
||||||
|
base_timeout_s=pdf_to_images_base_timeout_s,
|
||||||
|
)
|
||||||
|
logger.info(
|
||||||
|
"Conversion plan source=%s slides=%d dpi=%d image_format=%s "
|
||||||
|
"computed_timeouts_s[pptx_to_pdf_total=%d,pdf_to_images_total=%d,pdf_to_images_per_page=%d]",
|
||||||
|
pptx_path.name,
|
||||||
|
slide_count,
|
||||||
|
dpi,
|
||||||
|
image_format,
|
||||||
|
pptx_to_pdf_timeout,
|
||||||
|
pdf_to_images_timeout,
|
||||||
|
pdf_to_images_page_timeout,
|
||||||
|
)
|
||||||
image_paths = render_pdf_to_images(
|
image_paths = render_pdf_to_images(
|
||||||
pdf_path,
|
pdf_path,
|
||||||
image_dir,
|
image_dir,
|
||||||
dpi=dpi,
|
dpi=dpi,
|
||||||
image_format=image_format,
|
image_format=image_format,
|
||||||
timeout_s=_compute_page_timeout(
|
timeout_s=pdf_to_images_page_timeout,
|
||||||
total_timeout_s=pdf_to_images_timeout,
|
|
||||||
page_count=slide_count,
|
|
||||||
base_timeout_s=pdf_to_images_base_timeout_s,
|
|
||||||
),
|
|
||||||
total_pages=slide_count,
|
total_pages=slide_count,
|
||||||
operation_timeout_s=pdf_to_images_timeout,
|
operation_timeout_s=pdf_to_images_timeout,
|
||||||
page_progress_callback=lambda current, max_pages: _emit_progress(
|
page_progress_callback=lambda current, max_pages: _emit_progress(
|
||||||
|
|||||||
@@ -2,15 +2,45 @@
|
|||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
|
||||||
from officeconvertapi.v1.conversion_connect import ConversionServiceASGIApplication
|
from officeconvertapi.v1.conversion_connect import ConversionServiceASGIApplication
|
||||||
|
|
||||||
from officeconvert_server.config import load_server_config
|
from officeconvert_server.config import load_server_config
|
||||||
from officeconvert_server.service import ConversionServiceImpl
|
from officeconvert_server.service import ConversionServiceImpl
|
||||||
from officeconvert_server.storage import S3Store
|
from officeconvert_server.storage import S3Store
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def _configure_application_logging() -> None:
|
||||||
|
"""Ensure application loggers emit through uvicorn's configured handlers."""
|
||||||
|
level_name = os.getenv("OFFICECONVERT_LOG_LEVEL", "INFO").upper()
|
||||||
|
level = logging.getLevelName(level_name)
|
||||||
|
if not isinstance(level, int):
|
||||||
|
level = logging.INFO
|
||||||
|
uvicorn_error_logger = logging.getLogger("uvicorn.error")
|
||||||
|
uvicorn_handlers = list(uvicorn_error_logger.handlers)
|
||||||
|
|
||||||
|
for logger_name in ("officeconvert", "officeconvert_server"):
|
||||||
|
app_logger = logging.getLogger(logger_name)
|
||||||
|
app_logger.setLevel(level)
|
||||||
|
if uvicorn_handlers:
|
||||||
|
# Route application logs through uvicorn's stderr handlers.
|
||||||
|
app_logger.handlers = uvicorn_handlers
|
||||||
|
app_logger.propagate = False
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"Application logging configured officeconvert_log_level=%s uvicorn_handlers=%d",
|
||||||
|
logging.getLevelName(level),
|
||||||
|
len(uvicorn_handlers),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def create_app() -> ConversionServiceASGIApplication:
|
def create_app() -> ConversionServiceASGIApplication:
|
||||||
"""Construct and return the configured Connect ASGI application."""
|
"""Construct and return the configured Connect ASGI application."""
|
||||||
|
_configure_application_logging()
|
||||||
config = load_server_config()
|
config = load_server_config()
|
||||||
store = S3Store(
|
store = S3Store(
|
||||||
endpoint=config.s3_endpoint,
|
endpoint=config.s3_endpoint,
|
||||||
|
|||||||
@@ -5,9 +5,11 @@ from __future__ import annotations
|
|||||||
import asyncio
|
import asyncio
|
||||||
from collections.abc import Callable
|
from collections.abc import Callable
|
||||||
from datetime import datetime, timedelta, timezone
|
from datetime import datetime, timedelta, timezone
|
||||||
|
import logging
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import shutil
|
import shutil
|
||||||
import tempfile
|
import tempfile
|
||||||
|
import time
|
||||||
import uuid
|
import uuid
|
||||||
|
|
||||||
from connectrpc.code import Code
|
from connectrpc.code import Code
|
||||||
@@ -16,6 +18,7 @@ from connectrpc.request import RequestContext
|
|||||||
from google.protobuf.timestamp_pb2 import Timestamp
|
from google.protobuf.timestamp_pb2 import Timestamp
|
||||||
from officeconvert import SlideArtifact, convert_pptx_to_slidedeck
|
from officeconvert import SlideArtifact, convert_pptx_to_slidedeck
|
||||||
from officeconvert.conversion import (
|
from officeconvert.conversion import (
|
||||||
|
ConversionTimeoutError,
|
||||||
PHASE_EXTRACTING_NOTES,
|
PHASE_EXTRACTING_NOTES,
|
||||||
PHASE_PDF_TO_IMAGES,
|
PHASE_PDF_TO_IMAGES,
|
||||||
PHASE_PPTX_TO_PDF,
|
PHASE_PPTX_TO_PDF,
|
||||||
@@ -26,6 +29,8 @@ from officeconvert_server.config import ServerConfig
|
|||||||
from officeconvert_server.models import ConversionSession, utc_now
|
from officeconvert_server.models import ConversionSession, utc_now
|
||||||
from officeconvert_server.storage import S3Store
|
from officeconvert_server.storage import S3Store
|
||||||
|
|
||||||
|
logger = logging.getLogger("uvicorn.error")
|
||||||
|
|
||||||
|
|
||||||
class ConversionServiceImpl(conversion_connect.ConversionService):
|
class ConversionServiceImpl(conversion_connect.ConversionService):
|
||||||
"""Implements the conversion API with in-memory state and S3 orchestration."""
|
"""Implements the conversion API with in-memory state and S3 orchestration."""
|
||||||
@@ -179,6 +184,16 @@ class ConversionServiceImpl(conversion_connect.ConversionService):
|
|||||||
|
|
||||||
async def _run_conversion(self, session: ConversionSession) -> None:
|
async def _run_conversion(self, session: ConversionSession) -> None:
|
||||||
"""Execute conversion flow and persist terminal state in memory."""
|
"""Execute conversion flow and persist terminal state in memory."""
|
||||||
|
started_at = time.monotonic()
|
||||||
|
logger.info(
|
||||||
|
"Starting conversion conversion_id=%s source_filename=%s dpi=%d "
|
||||||
|
"timeout_caps_s[pptx_to_pdf_total=%d,pdf_to_images_total=%d]",
|
||||||
|
session.conversion_id,
|
||||||
|
session.source_filename,
|
||||||
|
self._config.conversion_image_dpi,
|
||||||
|
self._config.conversion_pptx_to_pdf_timeout_seconds,
|
||||||
|
self._config.conversion_pdf_to_images_timeout_seconds,
|
||||||
|
)
|
||||||
work_dir = Path(
|
work_dir = Path(
|
||||||
tempfile.mkdtemp(prefix=f"officeconvert-{session.conversion_id}-")
|
tempfile.mkdtemp(prefix=f"officeconvert-{session.conversion_id}-")
|
||||||
).resolve()
|
).resolve()
|
||||||
@@ -230,17 +245,52 @@ class ConversionServiceImpl(conversion_connect.ConversionService):
|
|||||||
session.status = conversion_pb2.CONVERSION_STATUS_SUCCEEDED
|
session.status = conversion_pb2.CONVERSION_STATUS_SUCCEEDED
|
||||||
session.phase = conversion_pb2.CONVERSION_PHASE_INACTIVE
|
session.phase = conversion_pb2.CONVERSION_PHASE_INACTIVE
|
||||||
session.updated_at = utc_now()
|
session.updated_at = utc_now()
|
||||||
|
elapsed_s = time.monotonic() - started_at
|
||||||
|
logger.info(
|
||||||
|
"Conversion succeeded conversion_id=%s source_filename=%s slides=%d elapsed_s=%.3f",
|
||||||
|
session.conversion_id,
|
||||||
|
session.source_filename,
|
||||||
|
len(result.slides),
|
||||||
|
elapsed_s,
|
||||||
|
)
|
||||||
except asyncio.CancelledError:
|
except asyncio.CancelledError:
|
||||||
session.status = conversion_pb2.CONVERSION_STATUS_FAILED
|
session.status = conversion_pb2.CONVERSION_STATUS_FAILED
|
||||||
session.phase = conversion_pb2.CONVERSION_PHASE_INACTIVE
|
session.phase = conversion_pb2.CONVERSION_PHASE_INACTIVE
|
||||||
session.error_message = "conversion cancelled"
|
session.error_message = "conversion cancelled"
|
||||||
session.updated_at = utc_now()
|
session.updated_at = utc_now()
|
||||||
|
elapsed_s = time.monotonic() - started_at
|
||||||
|
logger.warning(
|
||||||
|
"Conversion cancelled conversion_id=%s source_filename=%s elapsed_s=%.3f",
|
||||||
|
session.conversion_id,
|
||||||
|
session.source_filename,
|
||||||
|
elapsed_s,
|
||||||
|
)
|
||||||
raise
|
raise
|
||||||
|
except ConversionTimeoutError as exc:
|
||||||
|
session.status = conversion_pb2.CONVERSION_STATUS_FAILED
|
||||||
|
session.phase = conversion_pb2.CONVERSION_PHASE_INACTIVE
|
||||||
|
session.error_message = str(exc)
|
||||||
|
session.updated_at = utc_now()
|
||||||
|
elapsed_s = time.monotonic() - started_at
|
||||||
|
logger.error(
|
||||||
|
"Conversion timed out conversion_id=%s source_filename=%s elapsed_s=%.3f error=%s",
|
||||||
|
session.conversion_id,
|
||||||
|
session.source_filename,
|
||||||
|
elapsed_s,
|
||||||
|
exc,
|
||||||
|
)
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
session.status = conversion_pb2.CONVERSION_STATUS_FAILED
|
session.status = conversion_pb2.CONVERSION_STATUS_FAILED
|
||||||
session.phase = conversion_pb2.CONVERSION_PHASE_INACTIVE
|
session.phase = conversion_pb2.CONVERSION_PHASE_INACTIVE
|
||||||
session.error_message = str(exc)
|
session.error_message = str(exc)
|
||||||
session.updated_at = utc_now()
|
session.updated_at = utc_now()
|
||||||
|
elapsed_s = time.monotonic() - started_at
|
||||||
|
logger.exception(
|
||||||
|
"Conversion failed conversion_id=%s source_filename=%s elapsed_s=%.3f",
|
||||||
|
session.conversion_id,
|
||||||
|
session.source_filename,
|
||||||
|
elapsed_s,
|
||||||
|
)
|
||||||
finally:
|
finally:
|
||||||
await self._cleanup_local_artifacts(session)
|
await self._cleanup_local_artifacts(session)
|
||||||
session.cleanup_task = asyncio.create_task(self._delayed_cleanup(session))
|
session.cleanup_task = asyncio.create_task(self._delayed_cleanup(session))
|
||||||
|
|||||||
Reference in New Issue
Block a user