use a single bucket rather than one per conversion
This commit is contained in:
@@ -1,5 +1,6 @@
|
|||||||
S3_ENDPOINT=seaweedfs:8333
|
S3_ENDPOINT=seaweedfs:8333
|
||||||
S3_PUBLIC_ENDPOINT=localhost:8333
|
S3_PUBLIC_ENDPOINT=localhost:8333
|
||||||
|
S3_BUCKET=officeconvert
|
||||||
S3_USE_SSL=false
|
S3_USE_SSL=false
|
||||||
# Presigned URLs; omit to match S3_USE_SSL (internal client uses S3_ENDPOINT).
|
# Presigned URLs; omit to match S3_USE_SSL (internal client uses S3_ENDPOINT).
|
||||||
S3_PUBLIC_USE_SSL=false
|
S3_PUBLIC_USE_SSL=false
|
||||||
|
|||||||
@@ -41,6 +41,7 @@ run-server:
|
|||||||
if [ "$${S3_PUBLIC_ENDPOINT:-}" = "seaweedfs:8333" ]; then S3_PUBLIC_ENDPOINT=localhost:8333; fi; \
|
if [ "$${S3_PUBLIC_ENDPOINT:-}" = "seaweedfs:8333" ]; then S3_PUBLIC_ENDPOINT=localhost:8333; fi; \
|
||||||
export S3_ENDPOINT="$${S3_ENDPOINT:-localhost:8333}"; \
|
export S3_ENDPOINT="$${S3_ENDPOINT:-localhost:8333}"; \
|
||||||
export S3_PUBLIC_ENDPOINT="$${S3_PUBLIC_ENDPOINT:-localhost:8333}"; \
|
export S3_PUBLIC_ENDPOINT="$${S3_PUBLIC_ENDPOINT:-localhost:8333}"; \
|
||||||
|
export S3_BUCKET="$${S3_BUCKET:-officeconvert}"; \
|
||||||
export S3_USE_SSL="$${S3_USE_SSL:-false}"; \
|
export S3_USE_SSL="$${S3_USE_SSL:-false}"; \
|
||||||
export S3_ACCESS_KEY="$${S3_ACCESS_KEY:-minioadmin}"; \
|
export S3_ACCESS_KEY="$${S3_ACCESS_KEY:-minioadmin}"; \
|
||||||
export S3_SECRET_KEY="$${S3_SECRET_KEY:-minioadmin}"; \
|
export S3_SECRET_KEY="$${S3_SECRET_KEY:-minioadmin}"; \
|
||||||
|
|||||||
@@ -138,6 +138,7 @@ Use `.env.example` as your baseline env configuration.
|
|||||||
- This project defaults to **SeaweedFS S3 API** for object transit in development and compose deployments.
|
- This project defaults to **SeaweedFS S3 API** for object transit in development and compose deployments.
|
||||||
- The Python server uses the `minio` Python SDK, which is intentional because SeaweedFS is S3-compatible.
|
- The Python server uses the `minio` Python SDK, which is intentional because SeaweedFS is S3-compatible.
|
||||||
- Runtime configuration uses `S3_*` environment variables.
|
- Runtime configuration uses `S3_*` environment variables.
|
||||||
|
- All conversions share one bucket (`S3_BUCKET`, required). Each conversion's objects live under a `{conversion_id}/` key prefix (for example `{conversion_id}/input/source.pptx` and `{conversion_id}/output/slide-0001.jpg`).
|
||||||
|
|
||||||
## Conversion Tuning Notes
|
## Conversion Tuning Notes
|
||||||
|
|
||||||
|
|||||||
@@ -23,6 +23,7 @@ services:
|
|||||||
environment:
|
environment:
|
||||||
S3_ENDPOINT: ${S3_ENDPOINT:-seaweedfs:8333}
|
S3_ENDPOINT: ${S3_ENDPOINT:-seaweedfs:8333}
|
||||||
S3_PUBLIC_ENDPOINT: ${S3_PUBLIC_ENDPOINT:-localhost:8333}
|
S3_PUBLIC_ENDPOINT: ${S3_PUBLIC_ENDPOINT:-localhost:8333}
|
||||||
|
S3_BUCKET: ${S3_BUCKET:-officeconvert}
|
||||||
S3_USE_SSL: ${S3_USE_SSL:-false}
|
S3_USE_SSL: ${S3_USE_SSL:-false}
|
||||||
S3_ACCESS_KEY: ${S3_ACCESS_KEY:-minioadmin}
|
S3_ACCESS_KEY: ${S3_ACCESS_KEY:-minioadmin}
|
||||||
S3_SECRET_KEY: ${S3_SECRET_KEY:-minioadmin}
|
S3_SECRET_KEY: ${S3_SECRET_KEY:-minioadmin}
|
||||||
|
|||||||
@@ -22,6 +22,7 @@ services:
|
|||||||
environment:
|
environment:
|
||||||
S3_ENDPOINT: ${S3_ENDPOINT:-seaweedfs:8333}
|
S3_ENDPOINT: ${S3_ENDPOINT:-seaweedfs:8333}
|
||||||
S3_PUBLIC_ENDPOINT: ${S3_PUBLIC_ENDPOINT:-localhost:8333}
|
S3_PUBLIC_ENDPOINT: ${S3_PUBLIC_ENDPOINT:-localhost:8333}
|
||||||
|
S3_BUCKET: ${S3_BUCKET:-officeconvert}
|
||||||
S3_USE_SSL: ${S3_USE_SSL:-false}
|
S3_USE_SSL: ${S3_USE_SSL:-false}
|
||||||
S3_ACCESS_KEY: ${S3_ACCESS_KEY:-minioadmin}
|
S3_ACCESS_KEY: ${S3_ACCESS_KEY:-minioadmin}
|
||||||
S3_SECRET_KEY: ${S3_SECRET_KEY:-minioadmin}
|
S3_SECRET_KEY: ${S3_SECRET_KEY:-minioadmin}
|
||||||
|
|||||||
@@ -10,7 +10,9 @@ from officeconvertapi.v1.conversion_connect import ConversionServiceASGIApplicat
|
|||||||
|
|
||||||
from officeconvert_server.config import load_server_config
|
from officeconvert_server.config import load_server_config
|
||||||
from officeconvert_server.service import ConversionServiceImpl
|
from officeconvert_server.service import ConversionServiceImpl
|
||||||
from officeconvert_server.storage import S3Store
|
from officeconvert_server.storage import S3Store, log_s3_error
|
||||||
|
|
||||||
|
from minio.error import S3Error
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@@ -55,6 +57,16 @@ def create_app() -> ConversionServiceASGIApplication:
|
|||||||
if os.getenv("OFFICECONVERT_S3_TRACE", "").lower() in ("1", "true", "yes"):
|
if os.getenv("OFFICECONVERT_S3_TRACE", "").lower() in ("1", "true", "yes"):
|
||||||
store.enable_http_trace(sys.stderr)
|
store.enable_http_trace(sys.stderr)
|
||||||
logger.warning("OFFICECONVERT_S3_TRACE enabled: S3 HTTP dumps on stderr")
|
logger.warning("OFFICECONVERT_S3_TRACE enabled: S3 HTTP dumps on stderr")
|
||||||
|
try:
|
||||||
|
store.ensure_bucket(config.s3_bucket)
|
||||||
|
except S3Error as exc:
|
||||||
|
log_s3_error(
|
||||||
|
"ensure_bucket",
|
||||||
|
endpoint=config.s3_endpoint,
|
||||||
|
secure=config.s3_secure,
|
||||||
|
exc=exc,
|
||||||
|
)
|
||||||
|
raise
|
||||||
service = ConversionServiceImpl(config=config, store=store)
|
service = ConversionServiceImpl(config=config, store=store)
|
||||||
return ConversionServiceASGIApplication(service)
|
return ConversionServiceASGIApplication(service)
|
||||||
|
|
||||||
|
|||||||
@@ -10,6 +10,7 @@ import os
|
|||||||
class ServerConfig:
|
class ServerConfig:
|
||||||
"""Defines environment-driven settings for server orchestration."""
|
"""Defines environment-driven settings for server orchestration."""
|
||||||
|
|
||||||
|
s3_bucket: str
|
||||||
s3_endpoint: str
|
s3_endpoint: str
|
||||||
s3_access_key: str
|
s3_access_key: str
|
||||||
s3_secret_key: str
|
s3_secret_key: str
|
||||||
@@ -37,7 +38,11 @@ def load_server_config() -> ServerConfig:
|
|||||||
else s3_secure
|
else s3_secure
|
||||||
)
|
)
|
||||||
region_env = os.getenv("S3_REGION", "").strip()
|
region_env = os.getenv("S3_REGION", "").strip()
|
||||||
|
s3_bucket = os.getenv("S3_BUCKET", "").strip()
|
||||||
|
if not s3_bucket:
|
||||||
|
raise ValueError("S3_BUCKET is required")
|
||||||
return ServerConfig(
|
return ServerConfig(
|
||||||
|
s3_bucket=s3_bucket,
|
||||||
s3_endpoint=os.getenv("S3_ENDPOINT", "localhost:8333"),
|
s3_endpoint=os.getenv("S3_ENDPOINT", "localhost:8333"),
|
||||||
s3_access_key=os.getenv("S3_ACCESS_KEY", "minioadmin"),
|
s3_access_key=os.getenv("S3_ACCESS_KEY", "minioadmin"),
|
||||||
s3_secret_key=os.getenv("S3_SECRET_KEY", "minioadmin"),
|
s3_secret_key=os.getenv("S3_SECRET_KEY", "minioadmin"),
|
||||||
|
|||||||
@@ -23,7 +23,7 @@ class ConversionSession:
|
|||||||
thumbnail_resolution: conversion_pb2.ConversionResolution
|
thumbnail_resolution: conversion_pb2.ConversionResolution
|
||||||
full_jpeg_quality: int
|
full_jpeg_quality: int
|
||||||
thumbnail_jpeg_quality: int
|
thumbnail_jpeg_quality: int
|
||||||
bucket_name: str
|
object_prefix: str
|
||||||
upload_object_key: str
|
upload_object_key: str
|
||||||
status: conversion_pb2.ConversionStatus
|
status: conversion_pb2.ConversionStatus
|
||||||
notes: conversion_pb2.NotesOptions | None = None
|
notes: conversion_pb2.NotesOptions | None = None
|
||||||
|
|||||||
@@ -122,23 +122,13 @@ class ConversionServiceImpl(conversion_connect.ConversionService):
|
|||||||
|
|
||||||
ksuid = Ksuid()
|
ksuid = Ksuid()
|
||||||
conversion_id = str(ksuid)
|
conversion_id = str(ksuid)
|
||||||
bucket_name = f"oc-{bytes(ksuid).hex()}"
|
object_prefix = f"{conversion_id}/"
|
||||||
upload_key = "input/source.pptx"
|
upload_key = f"{object_prefix}input/source.pptx"
|
||||||
expires_at = utc_now() + timedelta(seconds=self._config.s3_session_ttl_seconds)
|
expires_at = utc_now() + timedelta(seconds=self._config.s3_session_ttl_seconds)
|
||||||
|
|
||||||
try:
|
|
||||||
self._store.ensure_bucket(bucket_name)
|
|
||||||
except S3Error as exc:
|
|
||||||
log_s3_error(
|
|
||||||
"ensure_bucket",
|
|
||||||
endpoint=self._config.s3_endpoint,
|
|
||||||
secure=self._config.s3_secure,
|
|
||||||
exc=exc,
|
|
||||||
)
|
|
||||||
raise
|
|
||||||
try:
|
try:
|
||||||
upload_url = self._store.presigned_put_url(
|
upload_url = self._store.presigned_put_url(
|
||||||
bucket_name,
|
self._config.s3_bucket,
|
||||||
upload_key,
|
upload_key,
|
||||||
ttl_seconds=self._config.s3_session_ttl_seconds,
|
ttl_seconds=self._config.s3_session_ttl_seconds,
|
||||||
)
|
)
|
||||||
@@ -159,7 +149,7 @@ class ConversionServiceImpl(conversion_connect.ConversionService):
|
|||||||
full_jpeg_quality=full_jpeg_quality,
|
full_jpeg_quality=full_jpeg_quality,
|
||||||
thumbnail_jpeg_quality=thumbnail_jpeg_quality,
|
thumbnail_jpeg_quality=thumbnail_jpeg_quality,
|
||||||
notes=request.notes if request.HasField("notes") else None,
|
notes=request.notes if request.HasField("notes") else None,
|
||||||
bucket_name=bucket_name,
|
object_prefix=object_prefix,
|
||||||
upload_object_key=upload_key,
|
upload_object_key=upload_key,
|
||||||
status=conversion_pb2.CONVERSION_STATUS_PENDING,
|
status=conversion_pb2.CONVERSION_STATUS_PENDING,
|
||||||
)
|
)
|
||||||
@@ -168,7 +158,7 @@ class ConversionServiceImpl(conversion_connect.ConversionService):
|
|||||||
|
|
||||||
return conversion_pb2.CreateConversionResponse(
|
return conversion_pb2.CreateConversionResponse(
|
||||||
conversion_id=conversion_id,
|
conversion_id=conversion_id,
|
||||||
upload_bucket=bucket_name,
|
upload_bucket=self._config.s3_bucket,
|
||||||
upload_object_key=upload_key,
|
upload_object_key=upload_key,
|
||||||
upload_url=upload_url,
|
upload_url=upload_url,
|
||||||
expires_at=_to_timestamp(expires_at),
|
expires_at=_to_timestamp(expires_at),
|
||||||
@@ -265,7 +255,11 @@ class ConversionServiceImpl(conversion_connect.ConversionService):
|
|||||||
if session.conversion_task is not None and not session.conversion_task.done():
|
if session.conversion_task is not None and not session.conversion_task.done():
|
||||||
session.conversion_task.cancel()
|
session.conversion_task.cancel()
|
||||||
await self._cleanup_local_artifacts(session)
|
await self._cleanup_local_artifacts(session)
|
||||||
await asyncio.to_thread(self._store.remove_bucket_tree, session.bucket_name)
|
await asyncio.to_thread(
|
||||||
|
self._store.remove_prefix,
|
||||||
|
self._config.s3_bucket,
|
||||||
|
session.object_prefix,
|
||||||
|
)
|
||||||
return conversion_pb2.DeleteConversionResponse(
|
return conversion_pb2.DeleteConversionResponse(
|
||||||
conversion_id=session.conversion_id,
|
conversion_id=session.conversion_id,
|
||||||
deleted=True,
|
deleted=True,
|
||||||
@@ -295,7 +289,7 @@ class ConversionServiceImpl(conversion_connect.ConversionService):
|
|||||||
try:
|
try:
|
||||||
await asyncio.to_thread(
|
await asyncio.to_thread(
|
||||||
self._store.fget_object,
|
self._store.fget_object,
|
||||||
session.bucket_name,
|
self._config.s3_bucket,
|
||||||
session.upload_object_key,
|
session.upload_object_key,
|
||||||
source_path,
|
source_path,
|
||||||
)
|
)
|
||||||
@@ -436,10 +430,12 @@ class ConversionServiceImpl(conversion_connect.ConversionService):
|
|||||||
upload_total = slide_total * 2
|
upload_total = slide_total * 2
|
||||||
upload_index = 0
|
upload_index = 0
|
||||||
for slide in slides:
|
for slide in slides:
|
||||||
object_key = f"output/slide-{slide.index:04d}{slide.image_path.suffix}"
|
object_key = (
|
||||||
self._store.fput_object(session.bucket_name, object_key, slide.image_path)
|
f"{session.object_prefix}output/slide-{slide.index:04d}{slide.image_path.suffix}"
|
||||||
|
)
|
||||||
|
self._store.fput_object(self._config.s3_bucket, object_key, slide.image_path)
|
||||||
image_url = self._store.presigned_get_url(
|
image_url = self._store.presigned_get_url(
|
||||||
session.bucket_name,
|
self._config.s3_bucket,
|
||||||
object_key,
|
object_key,
|
||||||
ttl_seconds=self._config.s3_session_ttl_seconds,
|
ttl_seconds=self._config.s3_session_ttl_seconds,
|
||||||
)
|
)
|
||||||
@@ -447,15 +443,16 @@ class ConversionServiceImpl(conversion_connect.ConversionService):
|
|||||||
if progress_callback is not None:
|
if progress_callback is not None:
|
||||||
progress_callback(upload_index, upload_total)
|
progress_callback(upload_index, upload_total)
|
||||||
thumbnail_object_key = (
|
thumbnail_object_key = (
|
||||||
f"output/thumb/slide-{slide.index:04d}{slide.thumbnail_path.suffix}"
|
f"{session.object_prefix}output/thumb/slide-{slide.index:04d}"
|
||||||
|
f"{slide.thumbnail_path.suffix}"
|
||||||
)
|
)
|
||||||
self._store.fput_object(
|
self._store.fput_object(
|
||||||
session.bucket_name,
|
self._config.s3_bucket,
|
||||||
thumbnail_object_key,
|
thumbnail_object_key,
|
||||||
slide.thumbnail_path,
|
slide.thumbnail_path,
|
||||||
)
|
)
|
||||||
thumbnail_image_url = self._store.presigned_get_url(
|
thumbnail_image_url = self._store.presigned_get_url(
|
||||||
session.bucket_name,
|
self._config.s3_bucket,
|
||||||
thumbnail_object_key,
|
thumbnail_object_key,
|
||||||
ttl_seconds=self._config.s3_session_ttl_seconds,
|
ttl_seconds=self._config.s3_session_ttl_seconds,
|
||||||
)
|
)
|
||||||
@@ -515,7 +512,11 @@ class ConversionServiceImpl(conversion_connect.ConversionService):
|
|||||||
"""Delete storage resources after the configured session retention period."""
|
"""Delete storage resources after the configured session retention period."""
|
||||||
try:
|
try:
|
||||||
await asyncio.sleep(self._config.conversion_cleanup_delay_seconds)
|
await asyncio.sleep(self._config.conversion_cleanup_delay_seconds)
|
||||||
await asyncio.to_thread(self._store.remove_bucket_tree, session.bucket_name)
|
await asyncio.to_thread(
|
||||||
|
self._store.remove_prefix,
|
||||||
|
self._config.s3_bucket,
|
||||||
|
session.object_prefix,
|
||||||
|
)
|
||||||
except asyncio.CancelledError:
|
except asyncio.CancelledError:
|
||||||
return
|
return
|
||||||
finally:
|
finally:
|
||||||
|
|||||||
@@ -123,16 +123,25 @@ class S3Store:
|
|||||||
"""Upload one local filesystem object to storage."""
|
"""Upload one local filesystem object to storage."""
|
||||||
self._client.fput_object(bucket_name, object_key, str(source_path))
|
self._client.fput_object(bucket_name, object_key, str(source_path))
|
||||||
|
|
||||||
def remove_bucket_tree(self, bucket_name: str) -> None:
|
def remove_prefix(self, bucket_name: str, prefix: str) -> None:
|
||||||
"""Remove all objects in a bucket and then delete the bucket."""
|
"""Remove all objects under a key prefix within a bucket."""
|
||||||
objects = list(self._client.list_objects(bucket_name, recursive=True))
|
normalized_prefix = prefix if prefix.endswith("/") else f"{prefix}/"
|
||||||
if objects:
|
objects = list(
|
||||||
|
self._client.list_objects(
|
||||||
|
bucket_name,
|
||||||
|
prefix=normalized_prefix,
|
||||||
|
recursive=True,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
if not objects:
|
||||||
|
return
|
||||||
|
|
||||||
delete_requests: list[DeleteObject] = []
|
delete_requests: list[DeleteObject] = []
|
||||||
for obj in objects:
|
for obj in objects:
|
||||||
object_name = obj.object_name
|
object_name = obj.object_name
|
||||||
if object_name is None:
|
if object_name is None:
|
||||||
raise RuntimeError(
|
raise RuntimeError(
|
||||||
"encountered unnamed object while removing bucket contents"
|
"encountered unnamed object while removing prefix contents"
|
||||||
)
|
)
|
||||||
delete_requests.append(DeleteObject(object_name))
|
delete_requests.append(DeleteObject(object_name))
|
||||||
|
|
||||||
@@ -146,12 +155,6 @@ class S3Store:
|
|||||||
raise RuntimeError(
|
raise RuntimeError(
|
||||||
f"failed to delete object {object_name}: {message}"
|
f"failed to delete object {object_name}: {message}"
|
||||||
)
|
)
|
||||||
try:
|
|
||||||
self._client.remove_bucket(bucket_name)
|
|
||||||
except S3Error as exc:
|
|
||||||
# Concurrent cleanup paths may race to remove the same bucket.
|
|
||||||
if exc.code != "NoSuchBucket":
|
|
||||||
raise
|
|
||||||
|
|
||||||
|
|
||||||
def object_key_from_presigned_url(url: str) -> str:
|
def object_key_from_presigned_url(url: str) -> str:
|
||||||
|
|||||||
Reference in New Issue
Block a user