don't use S3 CreateBucket and clean up
Docker server image / build-and-push (push) Successful in 1m6s

This commit is contained in:
2026-06-17 16:58:02 -07:00
parent 26cd0ef071
commit c0ff6ad635
11 changed files with 113 additions and 105 deletions
+16 -2
View File
@@ -2,7 +2,7 @@ SHELL := /bin/sh
BUF ?= buf
.PHONY: buf-lint buf-generate py-sync py-test go-test test compose-up compose-up-dev run-server
.PHONY: buf-lint buf-generate py-sync py-test go-test test compose-up compose-up-dev s3-init run-server
buf-lint:
$(BUF) lint
@@ -32,7 +32,21 @@ compose-up:
compose-up-dev:
docker compose --env-file .env.example -f deploy/docker-compose.dev.yml up
run-server:
s3-init:
@set -a; \
if [ -f .env ]; then . ./.env; fi; \
set +a; \
endpoint="$${S3_ENDPOINT:-localhost:8333}"; \
case "$$endpoint" in seaweedfs:8333) endpoint=localhost:8333 ;; esac; \
bucket="$${S3_BUCKET:-officeconvert}"; \
access_key="$${S3_ACCESS_KEY:-minioadmin}"; \
secret_key="$${S3_SECRET_KEY:-minioadmin}"; \
port="$${endpoint#*:}"; \
docker run --rm --add-host=host.docker.internal:host-gateway minio/mc:latest /bin/sh -c " \
mc alias set local http://host.docker.internal:$$port '$$access_key' '$$secret_key' && \
mc mb local/$$bucket --ignore-existing"
run-server: s3-init
@set -a; \
if [ -f .env ]; then . ./.env; fi; \
set +a; \
+4 -6
View File
@@ -135,7 +135,7 @@ Use `.env.example` as your baseline env configuration.
## Storage Backend Notes
- Local development defaults to **SeaweedFS** (S3-compatible) via Docker Compose.
- Local development defaults to **SeaweedFS** (S3-compatible) via Docker Compose. Compose runs an `s3-init` step that creates the dev bucket before the server starts.
- Production can use any S3-compatible provider; **AWS S3** is the expected choice.
- The Python server uses the `minio` Python SDK against the S3 API.
- Runtime configuration uses `S3_*` environment variables.
@@ -166,11 +166,11 @@ S3_SECRET_KEY=...
Use your bucket's regional hostname for both endpoints unless you deliberately split internal vs client-facing access. `S3_PUBLIC_ENDPOINT` must be reachable by whatever uploads and downloads via presigned URLs (clients, not just the server).
On startup the server calls `CreateBucket` if the bucket is missing. In AWS it is simpler to **pre-create the bucket** and grant object permissions only (see IAM below).
On startup the server verifies the bucket exists via HeadBucket and fails fast if it is missing. **Pre-create the bucket** before deploying (see IAM below).
**IAM permissions**
Scope access to the single bucket. Object keys are per-conversion prefixes, so list/delete can target the whole bucket:
Scope access to the single bucket. Object keys are per-conversion prefixes, so list/delete can target the whole bucket. Startup verification uses HeadBucket, which is satisfied by `s3:ListBucket` on the bucket ARN:
```json
{
@@ -178,7 +178,7 @@ Scope access to the single bucket. Object keys are per-conversion prefixes, so l
"Statement": [
{
"Effect": "Allow",
"Action": ["s3:ListBucket"],
"Action": ["s3:ListBucket", "s3:HeadBucket"],
"Resource": "arn:aws:s3:::officeconvert-prod"
},
{
@@ -190,8 +190,6 @@ Scope access to the single bucket. Object keys are per-conversion prefixes, so l
}
```
Add `s3:CreateBucket` on `arn:aws:s3:::officeconvert-prod` only if you want the server to create the bucket on first boot.
**CORS**
Required only if uploads or downloads go **directly from a browser** to presigned URLs. Server-side clients (`curl`, the Go client) do not need CORS. Allow `PUT` and `GET` for your web origin on the bucket.
+16 -1
View File
@@ -14,12 +14,27 @@ services:
volumes:
- seaweedfs_data:/data
s3-init:
image: minio/mc:latest
depends_on:
- seaweedfs
environment:
AWS_ACCESS_KEY_ID: ${S3_ACCESS_KEY:-minioadmin}
AWS_SECRET_ACCESS_KEY: ${S3_SECRET_KEY:-minioadmin}
S3_BUCKET: ${S3_BUCKET:-officeconvert}
entrypoint: >
/bin/sh -c "
until mc alias set local http://seaweedfs:8333 $$AWS_ACCESS_KEY_ID $$AWS_SECRET_ACCESS_KEY; do sleep 1; done &&
mc mb local/$$S3_BUCKET --ignore-existing
"
server:
build:
context: ..
dockerfile: Dockerfile.server
depends_on:
- seaweedfs
s3-init:
condition: service_completed_successfully
environment:
S3_ENDPOINT: ${S3_ENDPOINT:-seaweedfs:8333}
S3_PUBLIC_ENDPOINT: ${S3_PUBLIC_ENDPOINT:-localhost:8333}
+16 -1
View File
@@ -15,10 +15,25 @@ services:
volumes:
- seaweedfs_data:/data
s3-init:
image: minio/mc:latest
depends_on:
- seaweedfs
environment:
AWS_ACCESS_KEY_ID: ${S3_ACCESS_KEY:-minioadmin}
AWS_SECRET_ACCESS_KEY: ${S3_SECRET_KEY:-minioadmin}
S3_BUCKET: ${S3_BUCKET:-officeconvert}
entrypoint: >
/bin/sh -c "
until mc alias set local http://seaweedfs:8333 $$AWS_ACCESS_KEY_ID $$AWS_SECRET_ACCESS_KEY; do sleep 1; done &&
mc mb local/$$S3_BUCKET --ignore-existing
"
server:
image: gitea.auvem.com/end/officeconvert-server:${OFFICECONVERT_IMAGE_TAG:-latest}
depends_on:
- seaweedfs
s3-init:
condition: service_completed_successfully
environment:
S3_ENDPOINT: ${S3_ENDPOINT:-seaweedfs:8333}
S3_PUBLIC_ENDPOINT: ${S3_PUBLIC_ENDPOINT:-localhost:8333}
+3 -12
View File
@@ -777,7 +777,6 @@ type CreateConversionResponse struct {
state protoimpl.MessageState `protogen:"open.v1"`
// Session identifier: KSUID in standard base62 text form. Well-formed values are at most 27 characters (see https://github.com/segmentio/ksuid).
ConversionId string `protobuf:"bytes,1,opt,name=conversion_id,json=conversionId,proto3" json:"conversion_id,omitempty"`
UploadBucket string `protobuf:"bytes,2,opt,name=upload_bucket,json=uploadBucket,proto3" json:"upload_bucket,omitempty"`
UploadObjectKey string `protobuf:"bytes,3,opt,name=upload_object_key,json=uploadObjectKey,proto3" json:"upload_object_key,omitempty"`
UploadUrl string `protobuf:"bytes,4,opt,name=upload_url,json=uploadUrl,proto3" json:"upload_url,omitempty"`
ExpiresAt *timestamppb.Timestamp `protobuf:"bytes,5,opt,name=expires_at,json=expiresAt,proto3" json:"expires_at,omitempty"`
@@ -822,13 +821,6 @@ func (x *CreateConversionResponse) GetConversionId() string {
return ""
}
func (x *CreateConversionResponse) GetUploadBucket() string {
if x != nil {
return x.UploadBucket
}
return ""
}
func (x *CreateConversionResponse) GetUploadObjectKey() string {
if x != nil {
return x.UploadObjectKey
@@ -1330,15 +1322,14 @@ const file_officeconvertapi_v1_conversion_proto_rawDesc = "" +
"\x0fsource_filename\x18\x01 \x01(\tR\x0esourceFilename\x12;\n" +
"\x04full\x18\x02 \x01(\v2'.officeconvertapi.v1.SlideRasterOptionsR\x04full\x12E\n" +
"\tthumbnail\x18\x03 \x01(\v2'.officeconvertapi.v1.SlideRasterOptionsR\tthumbnail\x127\n" +
"\x05notes\x18\x04 \x01(\v2!.officeconvertapi.v1.NotesOptionsR\x05notes\"\xea\x01\n" +
"\x05notes\x18\x04 \x01(\v2!.officeconvertapi.v1.NotesOptionsR\x05notes\"\xda\x01\n" +
"\x18CreateConversionResponse\x12#\n" +
"\rconversion_id\x18\x01 \x01(\tR\fconversionId\x12#\n" +
"\rupload_bucket\x18\x02 \x01(\tR\fuploadBucket\x12*\n" +
"\rconversion_id\x18\x01 \x01(\tR\fconversionId\x12*\n" +
"\x11upload_object_key\x18\x03 \x01(\tR\x0fuploadObjectKey\x12\x1d\n" +
"\n" +
"upload_url\x18\x04 \x01(\tR\tuploadUrl\x129\n" +
"\n" +
"expires_at\x18\x05 \x01(\v2\x1a.google.protobuf.TimestampR\texpiresAt\"=\n" +
"expires_at\x18\x05 \x01(\v2\x1a.google.protobuf.TimestampR\texpiresAtJ\x04\b\x02\x10\x03R\rupload_bucket\"=\n" +
"\x16StartConversionRequest\x12#\n" +
"\rconversion_id\x18\x01 \x01(\tR\fconversionId\"}\n" +
"\x17StartConversionResponse\x12#\n" +
File diff suppressed because one or more lines are too long
@@ -149,18 +149,16 @@ class CreateConversionRequest(_message.Message):
def __init__(self, source_filename: _Optional[str] = ..., full: _Optional[_Union[SlideRasterOptions, _Mapping]] = ..., thumbnail: _Optional[_Union[SlideRasterOptions, _Mapping]] = ..., notes: _Optional[_Union[NotesOptions, _Mapping]] = ...) -> None: ...
class CreateConversionResponse(_message.Message):
__slots__ = ("conversion_id", "upload_bucket", "upload_object_key", "upload_url", "expires_at")
__slots__ = ("conversion_id", "upload_object_key", "upload_url", "expires_at")
CONVERSION_ID_FIELD_NUMBER: _ClassVar[int]
UPLOAD_BUCKET_FIELD_NUMBER: _ClassVar[int]
UPLOAD_OBJECT_KEY_FIELD_NUMBER: _ClassVar[int]
UPLOAD_URL_FIELD_NUMBER: _ClassVar[int]
EXPIRES_AT_FIELD_NUMBER: _ClassVar[int]
conversion_id: str
upload_bucket: str
upload_object_key: str
upload_url: str
expires_at: _timestamp_pb2.Timestamp
def __init__(self, conversion_id: _Optional[str] = ..., upload_bucket: _Optional[str] = ..., upload_object_key: _Optional[str] = ..., upload_url: _Optional[str] = ..., expires_at: _Optional[_Union[datetime.datetime, _timestamp_pb2.Timestamp, _Mapping]] = ...) -> None: ...
def __init__(self, conversion_id: _Optional[str] = ..., upload_object_key: _Optional[str] = ..., upload_url: _Optional[str] = ..., expires_at: _Optional[_Union[datetime.datetime, _timestamp_pb2.Timestamp, _Mapping]] = ...) -> None: ...
class StartConversionRequest(_message.Message):
__slots__ = ("conversion_id",)
+2 -1
View File
@@ -136,7 +136,8 @@ message CreateConversionRequest {
message CreateConversionResponse {
// Session identifier: KSUID in standard base62 text form. Well-formed values are at most 27 characters (see https://github.com/segmentio/ksuid).
string conversion_id = 1;
string upload_bucket = 2;
reserved 2;
reserved "upload_bucket";
string upload_object_key = 3;
string upload_url = 4;
google.protobuf.Timestamp expires_at = 5;
@@ -46,6 +46,7 @@ def create_app() -> ConversionServiceASGIApplication:
_configure_application_logging()
config = load_server_config()
store = S3Store(
bucket=config.s3_bucket,
endpoint=config.s3_endpoint,
access_key=config.s3_access_key,
secret_key=config.s3_secret_key,
@@ -58,10 +59,10 @@ def create_app() -> ConversionServiceASGIApplication:
store.enable_http_trace(sys.stderr)
logger.warning("OFFICECONVERT_S3_TRACE enabled: S3 HTTP dumps on stderr")
try:
store.ensure_bucket(config.s3_bucket)
store.require_bucket()
except S3Error as exc:
log_s3_error(
"ensure_bucket",
"require_bucket",
endpoint=config.s3_endpoint,
secure=config.s3_secure,
exc=exc,
@@ -128,7 +128,6 @@ class ConversionServiceImpl(conversion_connect.ConversionService):
try:
upload_url = self._store.presigned_put_url(
self._config.s3_bucket,
upload_key,
ttl_seconds=self._config.s3_session_ttl_seconds,
)
@@ -158,7 +157,6 @@ class ConversionServiceImpl(conversion_connect.ConversionService):
return conversion_pb2.CreateConversionResponse(
conversion_id=conversion_id,
upload_bucket=self._config.s3_bucket,
upload_object_key=upload_key,
upload_url=upload_url,
expires_at=_to_timestamp(expires_at),
@@ -257,7 +255,6 @@ class ConversionServiceImpl(conversion_connect.ConversionService):
await self._cleanup_local_artifacts(session)
await asyncio.to_thread(
self._store.remove_prefix,
self._config.s3_bucket,
session.object_prefix,
)
return conversion_pb2.DeleteConversionResponse(
@@ -289,7 +286,6 @@ class ConversionServiceImpl(conversion_connect.ConversionService):
try:
await asyncio.to_thread(
self._store.fget_object,
self._config.s3_bucket,
session.upload_object_key,
source_path,
)
@@ -433,9 +429,8 @@ class ConversionServiceImpl(conversion_connect.ConversionService):
object_key = (
f"{session.object_prefix}output/slide-{slide.index:04d}{slide.image_path.suffix}"
)
self._store.fput_object(self._config.s3_bucket, object_key, slide.image_path)
self._store.fput_object(object_key, slide.image_path)
image_url = self._store.presigned_get_url(
self._config.s3_bucket,
object_key,
ttl_seconds=self._config.s3_session_ttl_seconds,
)
@@ -447,12 +442,10 @@ class ConversionServiceImpl(conversion_connect.ConversionService):
f"{slide.thumbnail_path.suffix}"
)
self._store.fput_object(
self._config.s3_bucket,
thumbnail_object_key,
slide.thumbnail_path,
)
thumbnail_image_url = self._store.presigned_get_url(
self._config.s3_bucket,
thumbnail_object_key,
ttl_seconds=self._config.s3_session_ttl_seconds,
)
@@ -514,7 +507,6 @@ class ConversionServiceImpl(conversion_connect.ConversionService):
await asyncio.sleep(self._config.conversion_cleanup_delay_seconds)
await asyncio.to_thread(
self._store.remove_prefix,
self._config.s3_bucket,
session.object_prefix,
)
except asyncio.CancelledError:
@@ -6,7 +6,6 @@ import logging
from datetime import timedelta
from pathlib import Path
from typing import TextIO
from urllib.parse import urlparse
from minio import Minio
from minio.deleteobjects import DeleteObject
@@ -54,6 +53,7 @@ class S3Store:
def __init__(
self,
*,
bucket: str,
endpoint: str,
access_key: str,
secret_key: str,
@@ -63,6 +63,7 @@ class S3Store:
public_secure: bool,
) -> None:
"""Initialize S3 clients for internal and public URL generation."""
self._bucket = bucket
self._client = Minio(
endpoint,
access_key=access_key,
@@ -83,55 +84,44 @@ class S3Store:
self._client.trace_on(stream)
self._public_client.trace_on(stream)
def ensure_bucket(self, bucket_name: str) -> None:
"""Create a bucket if it does not already exist.
def require_bucket(self) -> None:
"""Verify the configured bucket exists before serving traffic."""
if not self._client.bucket_exists(self._bucket):
raise RuntimeError(
f"S3 bucket {self._bucket!r} does not exist; create it before starting the server"
)
Tries CreateBucket first (idempotent on SeaweedFS and when the caller
owns the bucket). AWS production IAM often grants object access only on
a pre-provisioned bucket; in that case CreateBucket returns
AccessDenied even though HeadBucket succeeds.
"""
try:
self._client.make_bucket(bucket_name)
except S3Error as exc:
if exc.code in ("BucketAlreadyOwnedByYou", "BucketAlreadyExists"):
return
if exc.code in ("AccessDenied", "Forbidden"):
if self._client.bucket_exists(bucket_name):
return
raise
def presigned_put_url(self, bucket_name: str, object_key: str, *, ttl_seconds: int) -> str:
def presigned_put_url(self, object_key: str, *, ttl_seconds: int) -> str:
"""Generate a presigned PUT URL for a single object upload."""
return self._public_client.presigned_put_object(
bucket_name,
self._bucket,
object_key,
expires=timedelta(seconds=ttl_seconds),
)
def presigned_get_url(self, bucket_name: str, object_key: str, *, ttl_seconds: int) -> str:
def presigned_get_url(self, object_key: str, *, ttl_seconds: int) -> str:
"""Generate a presigned GET URL for downloading one object."""
return self._public_client.presigned_get_object(
bucket_name,
self._bucket,
object_key,
expires=timedelta(seconds=ttl_seconds),
)
def fget_object(self, bucket_name: str, object_key: str, output_path: Path) -> None:
def fget_object(self, object_key: str, output_path: Path) -> None:
"""Download one object from storage to a local filesystem path."""
output_path.parent.mkdir(parents=True, exist_ok=True)
self._client.fget_object(bucket_name, object_key, str(output_path))
self._client.fget_object(self._bucket, object_key, str(output_path))
def fput_object(self, bucket_name: str, object_key: str, source_path: Path) -> None:
def fput_object(self, object_key: str, source_path: Path) -> None:
"""Upload one local filesystem object to storage."""
self._client.fput_object(bucket_name, object_key, str(source_path))
self._client.fput_object(self._bucket, object_key, str(source_path))
def remove_prefix(self, bucket_name: str, prefix: str) -> None:
"""Remove all objects under a key prefix within a bucket."""
def remove_prefix(self, prefix: str) -> None:
"""Remove all objects under a key prefix within the configured bucket."""
normalized_prefix = prefix if prefix.endswith("/") else f"{prefix}/"
objects = list(
self._client.list_objects(
bucket_name,
self._bucket,
prefix=normalized_prefix,
recursive=True,
)
@@ -149,7 +139,7 @@ class S3Store:
delete_requests.append(DeleteObject(object_name))
errors = self._client.remove_objects(
bucket_name,
self._bucket,
delete_requests,
)
for err in errors:
@@ -158,10 +148,3 @@ class S3Store:
raise RuntimeError(
f"failed to delete object {object_name}: {message}"
)
def object_key_from_presigned_url(url: str) -> str:
"""Extract object key from a presigned URL path for diagnostics."""
path = urlparse(url).path
path_parts = [part for part in path.split("/") if part]
return "/".join(path_parts[1:]) if len(path_parts) >= 2 else ""