diff --git a/Makefile b/Makefile index 30473d3..2eb5338 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ SHELL := /bin/sh BUF ?= buf -.PHONY: buf-lint buf-generate py-sync py-test go-test test compose-up compose-up-dev run-server +.PHONY: buf-lint buf-generate py-sync py-test go-test test compose-up compose-up-dev s3-init run-server buf-lint: $(BUF) lint @@ -32,7 +32,21 @@ compose-up: compose-up-dev: docker compose --env-file .env.example -f deploy/docker-compose.dev.yml up -run-server: +s3-init: + @set -a; \ + if [ -f .env ]; then . ./.env; fi; \ + set +a; \ + endpoint="$${S3_ENDPOINT:-localhost:8333}"; \ + case "$$endpoint" in seaweedfs:8333) endpoint=localhost:8333 ;; esac; \ + bucket="$${S3_BUCKET:-officeconvert}"; \ + access_key="$${S3_ACCESS_KEY:-minioadmin}"; \ + secret_key="$${S3_SECRET_KEY:-minioadmin}"; \ + port="$${endpoint#*:}"; \ + docker run --rm --add-host=host.docker.internal:host-gateway minio/mc:latest /bin/sh -c " \ + mc alias set local http://host.docker.internal:$$port '$$access_key' '$$secret_key' && \ + mc mb local/$$bucket --ignore-existing" + +run-server: s3-init @set -a; \ if [ -f .env ]; then . ./.env; fi; \ set +a; \ diff --git a/README.md b/README.md index f9320d7..6ab1048 100644 --- a/README.md +++ b/README.md @@ -135,7 +135,7 @@ Use `.env.example` as your baseline env configuration. ## Storage Backend Notes -- Local development defaults to **SeaweedFS** (S3-compatible) via Docker Compose. +- Local development defaults to **SeaweedFS** (S3-compatible) via Docker Compose. Compose runs an `s3-init` step that creates the dev bucket before the server starts. - Production can use any S3-compatible provider; **AWS S3** is the expected choice. - The Python server uses the `minio` Python SDK against the S3 API. - Runtime configuration uses `S3_*` environment variables. @@ -166,11 +166,11 @@ S3_SECRET_KEY=... Use your bucket's regional hostname for both endpoints unless you deliberately split internal vs client-facing access. `S3_PUBLIC_ENDPOINT` must be reachable by whatever uploads and downloads via presigned URLs (clients, not just the server). -On startup the server calls `CreateBucket` if the bucket is missing. In AWS it is simpler to **pre-create the bucket** and grant object permissions only (see IAM below). +On startup the server verifies the bucket exists via HeadBucket and fails fast if it is missing. **Pre-create the bucket** before deploying (see IAM below). **IAM permissions** -Scope access to the single bucket. Object keys are per-conversion prefixes, so list/delete can target the whole bucket: +Scope access to the single bucket. Object keys are per-conversion prefixes, so list/delete can target the whole bucket. Startup verification uses HeadBucket, which is satisfied by `s3:ListBucket` on the bucket ARN: ```json { @@ -178,7 +178,7 @@ Scope access to the single bucket. Object keys are per-conversion prefixes, so l "Statement": [ { "Effect": "Allow", - "Action": ["s3:ListBucket"], + "Action": ["s3:ListBucket", "s3:HeadBucket"], "Resource": "arn:aws:s3:::officeconvert-prod" }, { @@ -190,8 +190,6 @@ Scope access to the single bucket. Object keys are per-conversion prefixes, so l } ``` -Add `s3:CreateBucket` on `arn:aws:s3:::officeconvert-prod` only if you want the server to create the bucket on first boot. - **CORS** Required only if uploads or downloads go **directly from a browser** to presigned URLs. Server-side clients (`curl`, the Go client) do not need CORS. Allow `PUT` and `GET` for your web origin on the bucket. diff --git a/deploy/docker-compose.yml b/deploy/docker-compose.yml index 162a9dd..f309045 100644 --- a/deploy/docker-compose.yml +++ b/deploy/docker-compose.yml @@ -14,12 +14,27 @@ services: volumes: - seaweedfs_data:/data + s3-init: + image: minio/mc:latest + depends_on: + - seaweedfs + environment: + AWS_ACCESS_KEY_ID: ${S3_ACCESS_KEY:-minioadmin} + AWS_SECRET_ACCESS_KEY: ${S3_SECRET_KEY:-minioadmin} + S3_BUCKET: ${S3_BUCKET:-officeconvert} + entrypoint: > + /bin/sh -c " + until mc alias set local http://seaweedfs:8333 $$AWS_ACCESS_KEY_ID $$AWS_SECRET_ACCESS_KEY; do sleep 1; done && + mc mb local/$$S3_BUCKET --ignore-existing + " + server: build: context: .. dockerfile: Dockerfile.server depends_on: - - seaweedfs + s3-init: + condition: service_completed_successfully environment: S3_ENDPOINT: ${S3_ENDPOINT:-seaweedfs:8333} S3_PUBLIC_ENDPOINT: ${S3_PUBLIC_ENDPOINT:-localhost:8333} diff --git a/docker-compose.yml b/docker-compose.yml index 5fa108f..cb798a4 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -15,10 +15,25 @@ services: volumes: - seaweedfs_data:/data + s3-init: + image: minio/mc:latest + depends_on: + - seaweedfs + environment: + AWS_ACCESS_KEY_ID: ${S3_ACCESS_KEY:-minioadmin} + AWS_SECRET_ACCESS_KEY: ${S3_SECRET_KEY:-minioadmin} + S3_BUCKET: ${S3_BUCKET:-officeconvert} + entrypoint: > + /bin/sh -c " + until mc alias set local http://seaweedfs:8333 $$AWS_ACCESS_KEY_ID $$AWS_SECRET_ACCESS_KEY; do sleep 1; done && + mc mb local/$$S3_BUCKET --ignore-existing + " + server: image: gitea.auvem.com/end/officeconvert-server:${OFFICECONVERT_IMAGE_TAG:-latest} depends_on: - - seaweedfs + s3-init: + condition: service_completed_successfully environment: S3_ENDPOINT: ${S3_ENDPOINT:-seaweedfs:8333} S3_PUBLIC_ENDPOINT: ${S3_PUBLIC_ENDPOINT:-localhost:8333} diff --git a/gen/go/officeconvertapi/v1/conversion.pb.go b/gen/go/officeconvertapi/v1/conversion.pb.go index 550c2c2..64f768d 100644 --- a/gen/go/officeconvertapi/v1/conversion.pb.go +++ b/gen/go/officeconvertapi/v1/conversion.pb.go @@ -777,7 +777,6 @@ type CreateConversionResponse struct { state protoimpl.MessageState `protogen:"open.v1"` // Session identifier: KSUID in standard base62 text form. Well-formed values are at most 27 characters (see https://github.com/segmentio/ksuid). ConversionId string `protobuf:"bytes,1,opt,name=conversion_id,json=conversionId,proto3" json:"conversion_id,omitempty"` - UploadBucket string `protobuf:"bytes,2,opt,name=upload_bucket,json=uploadBucket,proto3" json:"upload_bucket,omitempty"` UploadObjectKey string `protobuf:"bytes,3,opt,name=upload_object_key,json=uploadObjectKey,proto3" json:"upload_object_key,omitempty"` UploadUrl string `protobuf:"bytes,4,opt,name=upload_url,json=uploadUrl,proto3" json:"upload_url,omitempty"` ExpiresAt *timestamppb.Timestamp `protobuf:"bytes,5,opt,name=expires_at,json=expiresAt,proto3" json:"expires_at,omitempty"` @@ -822,13 +821,6 @@ func (x *CreateConversionResponse) GetConversionId() string { return "" } -func (x *CreateConversionResponse) GetUploadBucket() string { - if x != nil { - return x.UploadBucket - } - return "" -} - func (x *CreateConversionResponse) GetUploadObjectKey() string { if x != nil { return x.UploadObjectKey @@ -1330,15 +1322,14 @@ const file_officeconvertapi_v1_conversion_proto_rawDesc = "" + "\x0fsource_filename\x18\x01 \x01(\tR\x0esourceFilename\x12;\n" + "\x04full\x18\x02 \x01(\v2'.officeconvertapi.v1.SlideRasterOptionsR\x04full\x12E\n" + "\tthumbnail\x18\x03 \x01(\v2'.officeconvertapi.v1.SlideRasterOptionsR\tthumbnail\x127\n" + - "\x05notes\x18\x04 \x01(\v2!.officeconvertapi.v1.NotesOptionsR\x05notes\"\xea\x01\n" + + "\x05notes\x18\x04 \x01(\v2!.officeconvertapi.v1.NotesOptionsR\x05notes\"\xda\x01\n" + "\x18CreateConversionResponse\x12#\n" + - "\rconversion_id\x18\x01 \x01(\tR\fconversionId\x12#\n" + - "\rupload_bucket\x18\x02 \x01(\tR\fuploadBucket\x12*\n" + + "\rconversion_id\x18\x01 \x01(\tR\fconversionId\x12*\n" + "\x11upload_object_key\x18\x03 \x01(\tR\x0fuploadObjectKey\x12\x1d\n" + "\n" + "upload_url\x18\x04 \x01(\tR\tuploadUrl\x129\n" + "\n" + - "expires_at\x18\x05 \x01(\v2\x1a.google.protobuf.TimestampR\texpiresAt\"=\n" + + "expires_at\x18\x05 \x01(\v2\x1a.google.protobuf.TimestampR\texpiresAtJ\x04\b\x02\x10\x03R\rupload_bucket\"=\n" + "\x16StartConversionRequest\x12#\n" + "\rconversion_id\x18\x01 \x01(\tR\fconversionId\"}\n" + "\x17StartConversionResponse\x12#\n" + diff --git a/gen/python/officeconvertapi/v1/conversion_pb2.py b/gen/python/officeconvertapi/v1/conversion_pb2.py index 0e45201..e129040 100644 --- a/gen/python/officeconvertapi/v1/conversion_pb2.py +++ b/gen/python/officeconvertapi/v1/conversion_pb2.py @@ -2,7 +2,7 @@ # Generated by the protocol buffer compiler. DO NOT EDIT! # NO CHECKED-IN PROTOBUF GENCODE # source: officeconvertapi/v1/conversion.proto -# Protobuf Python Version: 7.35.0 +# Protobuf Python Version: 7.35.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -13,7 +13,7 @@ _runtime_version.ValidateProtobufRuntimeVersion( _runtime_version.Domain.PUBLIC, 7, 35, - 0, + 1, '', 'officeconvertapi/v1/conversion.proto' ) @@ -25,7 +25,7 @@ _sym_db = _symbol_database.Default() from google.protobuf import timestamp_pb2 as google_dot_protobuf_dot_timestamp__pb2 -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n$officeconvertapi/v1/conversion.proto\x12\x13officeconvertapi.v1\x1a\x1fgoogle/protobuf/timestamp.proto\"-\n\x11JpegOutputOptions\x12\x18\n\x07quality\x18\x01 \x01(\x05R\x07quality\"\xa7\x01\n\x12SlideRasterOptions\x12I\n\nresolution\x18\x01 \x01(\x0e\x32).officeconvertapi.v1.ConversionResolutionR\nresolution\x12<\n\x04jpeg\x18\x02 \x01(\x0b\x32&.officeconvertapi.v1.JpegOutputOptionsH\x00R\x04jpegB\x08\n\x06\x66ormat\"\x87\x02\n\x14HtmlFormattingPolicy\x12\x1f\n\x0bignore_bold\x18\x01 \x01(\x08R\nignoreBold\x12#\n\rignore_italic\x18\x02 \x01(\x08R\x0cignoreItalic\x12)\n\x10ignore_underline\x18\x03 \x01(\x08R\x0fignoreUnderline\x12\x31\n\x14ignore_strikethrough\x18\x04 \x01(\x08R\x13ignoreStrikethrough\x12(\n\x10ignore_font_size\x18\x05 \x01(\x08R\x0eignoreFontSize\x12!\n\x0cignore_color\x18\x06 \x01(\x08R\x0bignoreColor\"\xec\x01\n\x0cNotesOptions\x12\x38\n\x06\x66ormat\x18\x01 \x01(\x0e\x32 .officeconvertapi.v1.NotesFormatR\x06\x66ormat\x12:\n\x17html_use_paragraph_tags\x18\x02 \x01(\x08H\x00R\x14htmlUseParagraphTags\x88\x01\x01\x12J\n\x0bhtml_policy\x18\x03 \x01(\x0b\x32).officeconvertapi.v1.HtmlFormattingPolicyR\nhtmlPolicyB\x1a\n\x18_html_use_paragraph_tags\"\xaa\x01\n\x05Slide\x12\x14\n\x05index\x18\x01 \x01(\x05R\x05index\x12\x1f\n\x0bnotes_plain\x18\x02 \x01(\tR\nnotesPlain\x12\x1b\n\timage_url\x18\x03 \x01(\tR\x08imageUrl\x12.\n\x13thumbnail_image_url\x18\x04 \x01(\tR\x11thumbnailImageUrl\x12\x1d\n\nnotes_html\x18\x05 \x01(\tR\tnotesHtml\"\xca\x02\n\tSlideDeck\x12#\n\rconversion_id\x18\x01 \x01(\tR\x0c\x63onversionId\x12\'\n\x0fsource_filename\x18\x02 \x01(\tR\x0esourceFilename\x12\x32\n\x06slides\x18\x03 \x03(\x0b\x32\x1a.officeconvertapi.v1.SlideR\x06slides\x12\x39\n\ncreated_at\x18\x04 \x01(\x0b\x32\x1a.google.protobuf.TimestampR\tcreatedAt\x12\x14\n\x05width\x18\x05 \x01(\x05R\x05width\x12\x16\n\x06height\x18\x06 \x01(\x05R\x06height\x12\'\n\x0fthumbnail_width\x18\x07 \x01(\x05R\x0ethumbnailWidth\x12)\n\x10thumbnail_height\x18\x08 \x01(\x05R\x0fthumbnailHeight\"\xff\x01\n\x17\x43reateConversionRequest\x12\'\n\x0fsource_filename\x18\x01 \x01(\tR\x0esourceFilename\x12;\n\x04\x66ull\x18\x02 \x01(\x0b\x32\'.officeconvertapi.v1.SlideRasterOptionsR\x04\x66ull\x12\x45\n\tthumbnail\x18\x03 \x01(\x0b\x32\'.officeconvertapi.v1.SlideRasterOptionsR\tthumbnail\x12\x37\n\x05notes\x18\x04 \x01(\x0b\x32!.officeconvertapi.v1.NotesOptionsR\x05notes\"\xea\x01\n\x18\x43reateConversionResponse\x12#\n\rconversion_id\x18\x01 \x01(\tR\x0c\x63onversionId\x12#\n\rupload_bucket\x18\x02 \x01(\tR\x0cuploadBucket\x12*\n\x11upload_object_key\x18\x03 \x01(\tR\x0fuploadObjectKey\x12\x1d\n\nupload_url\x18\x04 \x01(\tR\tuploadUrl\x12\x39\n\nexpires_at\x18\x05 \x01(\x0b\x32\x1a.google.protobuf.TimestampR\texpiresAt\"=\n\x16StartConversionRequest\x12#\n\rconversion_id\x18\x01 \x01(\tR\x0c\x63onversionId\"}\n\x17StartConversionResponse\x12#\n\rconversion_id\x18\x01 \x01(\tR\x0c\x63onversionId\x12=\n\x06status\x18\x02 \x01(\x0e\x32%.officeconvertapi.v1.ConversionStatusR\x06status\"A\n\x1aGetConversionStatusRequest\x12#\n\rconversion_id\x18\x01 \x01(\tR\x0c\x63onversionId\"\xeb\x02\n\x1bGetConversionStatusResponse\x12#\n\rconversion_id\x18\x01 \x01(\tR\x0c\x63onversionId\x12=\n\x06status\x18\x02 \x01(\x0e\x32%.officeconvertapi.v1.ConversionStatusR\x06status\x12#\n\rerror_message\x18\x03 \x01(\tR\x0c\x65rrorMessage\x12\x39\n\nupdated_at\x18\x04 \x01(\x0b\x32\x1a.google.protobuf.TimestampR\tupdatedAt\x12:\n\x05phase\x18\x05 \x01(\x0e\x32$.officeconvertapi.v1.ConversionPhaseR\x05phase\x12)\n\x10\x63urrent_progress\x18\x06 \x01(\x05R\x0f\x63urrentProgress\x12!\n\x0cmax_progress\x18\x07 \x01(\x05R\x0bmaxProgress\":\n\x13GetSlideDeckRequest\x12#\n\rconversion_id\x18\x01 \x01(\tR\x0c\x63onversionId\"U\n\x14GetSlideDeckResponse\x12=\n\nslide_deck\x18\x01 \x01(\x0b\x32\x1e.officeconvertapi.v1.SlideDeckR\tslideDeck\">\n\x17\x44\x65leteConversionRequest\x12#\n\rconversion_id\x18\x01 \x01(\tR\x0c\x63onversionId\"Y\n\x18\x44\x65leteConversionResponse\x12#\n\rconversion_id\x18\x01 \x01(\tR\x0c\x63onversionId\x12\x18\n\x07\x64\x65leted\x18\x02 \x01(\x08R\x07\x64\x65leted*\xb2\x01\n\x10\x43onversionStatus\x12!\n\x1d\x43ONVERSION_STATUS_UNSPECIFIED\x10\x00\x12\x1d\n\x19\x43ONVERSION_STATUS_PENDING\x10\x01\x12\x1d\n\x19\x43ONVERSION_STATUS_RUNNING\x10\x02\x12\x1f\n\x1b\x43ONVERSION_STATUS_SUCCEEDED\x10\x03\x12\x1c\n\x18\x43ONVERSION_STATUS_FAILED\x10\x04*\xe7\x01\n\x0f\x43onversionPhase\x12 \n\x1c\x43ONVERSION_PHASE_UNSPECIFIED\x10\x00\x12\x1d\n\x19\x43ONVERSION_PHASE_INACTIVE\x10\x01\x12%\n!CONVERSION_PHASE_EXTRACTING_NOTES\x10\x02\x12 \n\x1c\x43ONVERSION_PHASE_PPTX_TO_PDF\x10\x03\x12\"\n\x1e\x43ONVERSION_PHASE_PDF_TO_IMAGES\x10\x04\x12&\n\"CONVERSION_PHASE_UPLOADING_RESULTS\x10\x05*\xd6\x01\n\x14\x43onversionResolution\x12%\n!CONVERSION_RESOLUTION_UNSPECIFIED\x10\x00\x12\x1c\n\x18\x43ONVERSION_RESOLUTION_SD\x10\x01\x12\x1c\n\x18\x43ONVERSION_RESOLUTION_HD\x10\x02\x12\x1d\n\x19\x43ONVERSION_RESOLUTION_FHD\x10\x03\x12\x1d\n\x19\x43ONVERSION_RESOLUTION_QHD\x10\x04\x12\x1d\n\x19\x43ONVERSION_RESOLUTION_UHD\x10\x05*Z\n\x0bNotesFormat\x12\x1c\n\x18NOTES_FORMAT_UNSPECIFIED\x10\x00\x12\x16\n\x12NOTES_FORMAT_PLAIN\x10\x01\x12\x15\n\x11NOTES_FORMAT_HTML\x10\x02\x32\xcc\x04\n\x11\x43onversionService\x12q\n\x10\x43reateConversion\x12,.officeconvertapi.v1.CreateConversionRequest\x1a-.officeconvertapi.v1.CreateConversionResponse\"\x00\x12n\n\x0fStartConversion\x12+.officeconvertapi.v1.StartConversionRequest\x1a,.officeconvertapi.v1.StartConversionResponse\"\x00\x12z\n\x13GetConversionStatus\x12/.officeconvertapi.v1.GetConversionStatusRequest\x1a\x30.officeconvertapi.v1.GetConversionStatusResponse\"\x00\x12\x65\n\x0cGetSlideDeck\x12(.officeconvertapi.v1.GetSlideDeckRequest\x1a).officeconvertapi.v1.GetSlideDeckResponse\"\x00\x12q\n\x10\x44\x65leteConversion\x12,.officeconvertapi.v1.DeleteConversionRequest\x1a-.officeconvertapi.v1.DeleteConversionResponse\"\x00\x42QZOgitea.auvem.com/end/officeconvert/gen/go/officeconvertapi/v1;officeconvertapiv1b\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n$officeconvertapi/v1/conversion.proto\x12\x13officeconvertapi.v1\x1a\x1fgoogle/protobuf/timestamp.proto\"-\n\x11JpegOutputOptions\x12\x18\n\x07quality\x18\x01 \x01(\x05R\x07quality\"\xa7\x01\n\x12SlideRasterOptions\x12I\n\nresolution\x18\x01 \x01(\x0e\x32).officeconvertapi.v1.ConversionResolutionR\nresolution\x12<\n\x04jpeg\x18\x02 \x01(\x0b\x32&.officeconvertapi.v1.JpegOutputOptionsH\x00R\x04jpegB\x08\n\x06\x66ormat\"\x87\x02\n\x14HtmlFormattingPolicy\x12\x1f\n\x0bignore_bold\x18\x01 \x01(\x08R\nignoreBold\x12#\n\rignore_italic\x18\x02 \x01(\x08R\x0cignoreItalic\x12)\n\x10ignore_underline\x18\x03 \x01(\x08R\x0fignoreUnderline\x12\x31\n\x14ignore_strikethrough\x18\x04 \x01(\x08R\x13ignoreStrikethrough\x12(\n\x10ignore_font_size\x18\x05 \x01(\x08R\x0eignoreFontSize\x12!\n\x0cignore_color\x18\x06 \x01(\x08R\x0bignoreColor\"\xec\x01\n\x0cNotesOptions\x12\x38\n\x06\x66ormat\x18\x01 \x01(\x0e\x32 .officeconvertapi.v1.NotesFormatR\x06\x66ormat\x12:\n\x17html_use_paragraph_tags\x18\x02 \x01(\x08H\x00R\x14htmlUseParagraphTags\x88\x01\x01\x12J\n\x0bhtml_policy\x18\x03 \x01(\x0b\x32).officeconvertapi.v1.HtmlFormattingPolicyR\nhtmlPolicyB\x1a\n\x18_html_use_paragraph_tags\"\xaa\x01\n\x05Slide\x12\x14\n\x05index\x18\x01 \x01(\x05R\x05index\x12\x1f\n\x0bnotes_plain\x18\x02 \x01(\tR\nnotesPlain\x12\x1b\n\timage_url\x18\x03 \x01(\tR\x08imageUrl\x12.\n\x13thumbnail_image_url\x18\x04 \x01(\tR\x11thumbnailImageUrl\x12\x1d\n\nnotes_html\x18\x05 \x01(\tR\tnotesHtml\"\xca\x02\n\tSlideDeck\x12#\n\rconversion_id\x18\x01 \x01(\tR\x0c\x63onversionId\x12\'\n\x0fsource_filename\x18\x02 \x01(\tR\x0esourceFilename\x12\x32\n\x06slides\x18\x03 \x03(\x0b\x32\x1a.officeconvertapi.v1.SlideR\x06slides\x12\x39\n\ncreated_at\x18\x04 \x01(\x0b\x32\x1a.google.protobuf.TimestampR\tcreatedAt\x12\x14\n\x05width\x18\x05 \x01(\x05R\x05width\x12\x16\n\x06height\x18\x06 \x01(\x05R\x06height\x12\'\n\x0fthumbnail_width\x18\x07 \x01(\x05R\x0ethumbnailWidth\x12)\n\x10thumbnail_height\x18\x08 \x01(\x05R\x0fthumbnailHeight\"\xff\x01\n\x17\x43reateConversionRequest\x12\'\n\x0fsource_filename\x18\x01 \x01(\tR\x0esourceFilename\x12;\n\x04\x66ull\x18\x02 \x01(\x0b\x32\'.officeconvertapi.v1.SlideRasterOptionsR\x04\x66ull\x12\x45\n\tthumbnail\x18\x03 \x01(\x0b\x32\'.officeconvertapi.v1.SlideRasterOptionsR\tthumbnail\x12\x37\n\x05notes\x18\x04 \x01(\x0b\x32!.officeconvertapi.v1.NotesOptionsR\x05notes\"\xda\x01\n\x18\x43reateConversionResponse\x12#\n\rconversion_id\x18\x01 \x01(\tR\x0c\x63onversionId\x12*\n\x11upload_object_key\x18\x03 \x01(\tR\x0fuploadObjectKey\x12\x1d\n\nupload_url\x18\x04 \x01(\tR\tuploadUrl\x12\x39\n\nexpires_at\x18\x05 \x01(\x0b\x32\x1a.google.protobuf.TimestampR\texpiresAtJ\x04\x08\x02\x10\x03R\rupload_bucket\"=\n\x16StartConversionRequest\x12#\n\rconversion_id\x18\x01 \x01(\tR\x0c\x63onversionId\"}\n\x17StartConversionResponse\x12#\n\rconversion_id\x18\x01 \x01(\tR\x0c\x63onversionId\x12=\n\x06status\x18\x02 \x01(\x0e\x32%.officeconvertapi.v1.ConversionStatusR\x06status\"A\n\x1aGetConversionStatusRequest\x12#\n\rconversion_id\x18\x01 \x01(\tR\x0c\x63onversionId\"\xeb\x02\n\x1bGetConversionStatusResponse\x12#\n\rconversion_id\x18\x01 \x01(\tR\x0c\x63onversionId\x12=\n\x06status\x18\x02 \x01(\x0e\x32%.officeconvertapi.v1.ConversionStatusR\x06status\x12#\n\rerror_message\x18\x03 \x01(\tR\x0c\x65rrorMessage\x12\x39\n\nupdated_at\x18\x04 \x01(\x0b\x32\x1a.google.protobuf.TimestampR\tupdatedAt\x12:\n\x05phase\x18\x05 \x01(\x0e\x32$.officeconvertapi.v1.ConversionPhaseR\x05phase\x12)\n\x10\x63urrent_progress\x18\x06 \x01(\x05R\x0f\x63urrentProgress\x12!\n\x0cmax_progress\x18\x07 \x01(\x05R\x0bmaxProgress\":\n\x13GetSlideDeckRequest\x12#\n\rconversion_id\x18\x01 \x01(\tR\x0c\x63onversionId\"U\n\x14GetSlideDeckResponse\x12=\n\nslide_deck\x18\x01 \x01(\x0b\x32\x1e.officeconvertapi.v1.SlideDeckR\tslideDeck\">\n\x17\x44\x65leteConversionRequest\x12#\n\rconversion_id\x18\x01 \x01(\tR\x0c\x63onversionId\"Y\n\x18\x44\x65leteConversionResponse\x12#\n\rconversion_id\x18\x01 \x01(\tR\x0c\x63onversionId\x12\x18\n\x07\x64\x65leted\x18\x02 \x01(\x08R\x07\x64\x65leted*\xb2\x01\n\x10\x43onversionStatus\x12!\n\x1d\x43ONVERSION_STATUS_UNSPECIFIED\x10\x00\x12\x1d\n\x19\x43ONVERSION_STATUS_PENDING\x10\x01\x12\x1d\n\x19\x43ONVERSION_STATUS_RUNNING\x10\x02\x12\x1f\n\x1b\x43ONVERSION_STATUS_SUCCEEDED\x10\x03\x12\x1c\n\x18\x43ONVERSION_STATUS_FAILED\x10\x04*\xe7\x01\n\x0f\x43onversionPhase\x12 \n\x1c\x43ONVERSION_PHASE_UNSPECIFIED\x10\x00\x12\x1d\n\x19\x43ONVERSION_PHASE_INACTIVE\x10\x01\x12%\n!CONVERSION_PHASE_EXTRACTING_NOTES\x10\x02\x12 \n\x1c\x43ONVERSION_PHASE_PPTX_TO_PDF\x10\x03\x12\"\n\x1e\x43ONVERSION_PHASE_PDF_TO_IMAGES\x10\x04\x12&\n\"CONVERSION_PHASE_UPLOADING_RESULTS\x10\x05*\xd6\x01\n\x14\x43onversionResolution\x12%\n!CONVERSION_RESOLUTION_UNSPECIFIED\x10\x00\x12\x1c\n\x18\x43ONVERSION_RESOLUTION_SD\x10\x01\x12\x1c\n\x18\x43ONVERSION_RESOLUTION_HD\x10\x02\x12\x1d\n\x19\x43ONVERSION_RESOLUTION_FHD\x10\x03\x12\x1d\n\x19\x43ONVERSION_RESOLUTION_QHD\x10\x04\x12\x1d\n\x19\x43ONVERSION_RESOLUTION_UHD\x10\x05*Z\n\x0bNotesFormat\x12\x1c\n\x18NOTES_FORMAT_UNSPECIFIED\x10\x00\x12\x16\n\x12NOTES_FORMAT_PLAIN\x10\x01\x12\x15\n\x11NOTES_FORMAT_HTML\x10\x02\x32\xcc\x04\n\x11\x43onversionService\x12q\n\x10\x43reateConversion\x12,.officeconvertapi.v1.CreateConversionRequest\x1a-.officeconvertapi.v1.CreateConversionResponse\"\x00\x12n\n\x0fStartConversion\x12+.officeconvertapi.v1.StartConversionRequest\x1a,.officeconvertapi.v1.StartConversionResponse\"\x00\x12z\n\x13GetConversionStatus\x12/.officeconvertapi.v1.GetConversionStatusRequest\x1a\x30.officeconvertapi.v1.GetConversionStatusResponse\"\x00\x12\x65\n\x0cGetSlideDeck\x12(.officeconvertapi.v1.GetSlideDeckRequest\x1a).officeconvertapi.v1.GetSlideDeckResponse\"\x00\x12q\n\x10\x44\x65leteConversion\x12,.officeconvertapi.v1.DeleteConversionRequest\x1a-.officeconvertapi.v1.DeleteConversionResponse\"\x00\x42QZOgitea.auvem.com/end/officeconvert/gen/go/officeconvertapi/v1;officeconvertapiv1b\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) @@ -33,14 +33,14 @@ _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'officeconvertapi.v1.convers if not _descriptor._USE_C_DESCRIPTORS: _globals['DESCRIPTOR']._loaded_options = None _globals['DESCRIPTOR']._serialized_options = b'ZOgitea.auvem.com/end/officeconvert/gen/go/officeconvertapi/v1;officeconvertapiv1' - _globals['_CONVERSIONSTATUS']._serialized_start=2743 - _globals['_CONVERSIONSTATUS']._serialized_end=2921 - _globals['_CONVERSIONPHASE']._serialized_start=2924 - _globals['_CONVERSIONPHASE']._serialized_end=3155 - _globals['_CONVERSIONRESOLUTION']._serialized_start=3158 - _globals['_CONVERSIONRESOLUTION']._serialized_end=3372 - _globals['_NOTESFORMAT']._serialized_start=3374 - _globals['_NOTESFORMAT']._serialized_end=3464 + _globals['_CONVERSIONSTATUS']._serialized_start=2727 + _globals['_CONVERSIONSTATUS']._serialized_end=2905 + _globals['_CONVERSIONPHASE']._serialized_start=2908 + _globals['_CONVERSIONPHASE']._serialized_end=3139 + _globals['_CONVERSIONRESOLUTION']._serialized_start=3142 + _globals['_CONVERSIONRESOLUTION']._serialized_end=3356 + _globals['_NOTESFORMAT']._serialized_start=3358 + _globals['_NOTESFORMAT']._serialized_end=3448 _globals['_JPEGOUTPUTOPTIONS']._serialized_start=94 _globals['_JPEGOUTPUTOPTIONS']._serialized_end=139 _globals['_SLIDERASTEROPTIONS']._serialized_start=142 @@ -56,23 +56,23 @@ if not _descriptor._USE_C_DESCRIPTORS: _globals['_CREATECONVERSIONREQUEST']._serialized_start=1323 _globals['_CREATECONVERSIONREQUEST']._serialized_end=1578 _globals['_CREATECONVERSIONRESPONSE']._serialized_start=1581 - _globals['_CREATECONVERSIONRESPONSE']._serialized_end=1815 - _globals['_STARTCONVERSIONREQUEST']._serialized_start=1817 - _globals['_STARTCONVERSIONREQUEST']._serialized_end=1878 - _globals['_STARTCONVERSIONRESPONSE']._serialized_start=1880 - _globals['_STARTCONVERSIONRESPONSE']._serialized_end=2005 - _globals['_GETCONVERSIONSTATUSREQUEST']._serialized_start=2007 - _globals['_GETCONVERSIONSTATUSREQUEST']._serialized_end=2072 - _globals['_GETCONVERSIONSTATUSRESPONSE']._serialized_start=2075 - _globals['_GETCONVERSIONSTATUSRESPONSE']._serialized_end=2438 - _globals['_GETSLIDEDECKREQUEST']._serialized_start=2440 - _globals['_GETSLIDEDECKREQUEST']._serialized_end=2498 - _globals['_GETSLIDEDECKRESPONSE']._serialized_start=2500 - _globals['_GETSLIDEDECKRESPONSE']._serialized_end=2585 - _globals['_DELETECONVERSIONREQUEST']._serialized_start=2587 - _globals['_DELETECONVERSIONREQUEST']._serialized_end=2649 - _globals['_DELETECONVERSIONRESPONSE']._serialized_start=2651 - _globals['_DELETECONVERSIONRESPONSE']._serialized_end=2740 - _globals['_CONVERSIONSERVICE']._serialized_start=3467 - _globals['_CONVERSIONSERVICE']._serialized_end=4055 + _globals['_CREATECONVERSIONRESPONSE']._serialized_end=1799 + _globals['_STARTCONVERSIONREQUEST']._serialized_start=1801 + _globals['_STARTCONVERSIONREQUEST']._serialized_end=1862 + _globals['_STARTCONVERSIONRESPONSE']._serialized_start=1864 + _globals['_STARTCONVERSIONRESPONSE']._serialized_end=1989 + _globals['_GETCONVERSIONSTATUSREQUEST']._serialized_start=1991 + _globals['_GETCONVERSIONSTATUSREQUEST']._serialized_end=2056 + _globals['_GETCONVERSIONSTATUSRESPONSE']._serialized_start=2059 + _globals['_GETCONVERSIONSTATUSRESPONSE']._serialized_end=2422 + _globals['_GETSLIDEDECKREQUEST']._serialized_start=2424 + _globals['_GETSLIDEDECKREQUEST']._serialized_end=2482 + _globals['_GETSLIDEDECKRESPONSE']._serialized_start=2484 + _globals['_GETSLIDEDECKRESPONSE']._serialized_end=2569 + _globals['_DELETECONVERSIONREQUEST']._serialized_start=2571 + _globals['_DELETECONVERSIONREQUEST']._serialized_end=2633 + _globals['_DELETECONVERSIONRESPONSE']._serialized_start=2635 + _globals['_DELETECONVERSIONRESPONSE']._serialized_end=2724 + _globals['_CONVERSIONSERVICE']._serialized_start=3451 + _globals['_CONVERSIONSERVICE']._serialized_end=4039 # @@protoc_insertion_point(module_scope) diff --git a/gen/python/officeconvertapi/v1/conversion_pb2.pyi b/gen/python/officeconvertapi/v1/conversion_pb2.pyi index bda4a28..9278062 100644 --- a/gen/python/officeconvertapi/v1/conversion_pb2.pyi +++ b/gen/python/officeconvertapi/v1/conversion_pb2.pyi @@ -149,18 +149,16 @@ class CreateConversionRequest(_message.Message): def __init__(self, source_filename: _Optional[str] = ..., full: _Optional[_Union[SlideRasterOptions, _Mapping]] = ..., thumbnail: _Optional[_Union[SlideRasterOptions, _Mapping]] = ..., notes: _Optional[_Union[NotesOptions, _Mapping]] = ...) -> None: ... class CreateConversionResponse(_message.Message): - __slots__ = ("conversion_id", "upload_bucket", "upload_object_key", "upload_url", "expires_at") + __slots__ = ("conversion_id", "upload_object_key", "upload_url", "expires_at") CONVERSION_ID_FIELD_NUMBER: _ClassVar[int] - UPLOAD_BUCKET_FIELD_NUMBER: _ClassVar[int] UPLOAD_OBJECT_KEY_FIELD_NUMBER: _ClassVar[int] UPLOAD_URL_FIELD_NUMBER: _ClassVar[int] EXPIRES_AT_FIELD_NUMBER: _ClassVar[int] conversion_id: str - upload_bucket: str upload_object_key: str upload_url: str expires_at: _timestamp_pb2.Timestamp - def __init__(self, conversion_id: _Optional[str] = ..., upload_bucket: _Optional[str] = ..., upload_object_key: _Optional[str] = ..., upload_url: _Optional[str] = ..., expires_at: _Optional[_Union[datetime.datetime, _timestamp_pb2.Timestamp, _Mapping]] = ...) -> None: ... + def __init__(self, conversion_id: _Optional[str] = ..., upload_object_key: _Optional[str] = ..., upload_url: _Optional[str] = ..., expires_at: _Optional[_Union[datetime.datetime, _timestamp_pb2.Timestamp, _Mapping]] = ...) -> None: ... class StartConversionRequest(_message.Message): __slots__ = ("conversion_id",) diff --git a/proto/officeconvertapi/v1/conversion.proto b/proto/officeconvertapi/v1/conversion.proto index 12362f7..1cf238b 100644 --- a/proto/officeconvertapi/v1/conversion.proto +++ b/proto/officeconvertapi/v1/conversion.proto @@ -136,7 +136,8 @@ message CreateConversionRequest { message CreateConversionResponse { // Session identifier: KSUID in standard base62 text form. Well-formed values are at most 27 characters (see https://github.com/segmentio/ksuid). string conversion_id = 1; - string upload_bucket = 2; + reserved 2; + reserved "upload_bucket"; string upload_object_key = 3; string upload_url = 4; google.protobuf.Timestamp expires_at = 5; diff --git a/python/packages/server/src/officeconvert_server/app.py b/python/packages/server/src/officeconvert_server/app.py index 735c2a2..33b32ac 100644 --- a/python/packages/server/src/officeconvert_server/app.py +++ b/python/packages/server/src/officeconvert_server/app.py @@ -46,6 +46,7 @@ def create_app() -> ConversionServiceASGIApplication: _configure_application_logging() config = load_server_config() store = S3Store( + bucket=config.s3_bucket, endpoint=config.s3_endpoint, access_key=config.s3_access_key, secret_key=config.s3_secret_key, @@ -58,10 +59,10 @@ def create_app() -> ConversionServiceASGIApplication: store.enable_http_trace(sys.stderr) logger.warning("OFFICECONVERT_S3_TRACE enabled: S3 HTTP dumps on stderr") try: - store.ensure_bucket(config.s3_bucket) + store.require_bucket() except S3Error as exc: log_s3_error( - "ensure_bucket", + "require_bucket", endpoint=config.s3_endpoint, secure=config.s3_secure, exc=exc, diff --git a/python/packages/server/src/officeconvert_server/service.py b/python/packages/server/src/officeconvert_server/service.py index 584c224..50517b6 100644 --- a/python/packages/server/src/officeconvert_server/service.py +++ b/python/packages/server/src/officeconvert_server/service.py @@ -128,7 +128,6 @@ class ConversionServiceImpl(conversion_connect.ConversionService): try: upload_url = self._store.presigned_put_url( - self._config.s3_bucket, upload_key, ttl_seconds=self._config.s3_session_ttl_seconds, ) @@ -158,7 +157,6 @@ class ConversionServiceImpl(conversion_connect.ConversionService): return conversion_pb2.CreateConversionResponse( conversion_id=conversion_id, - upload_bucket=self._config.s3_bucket, upload_object_key=upload_key, upload_url=upload_url, expires_at=_to_timestamp(expires_at), @@ -257,7 +255,6 @@ class ConversionServiceImpl(conversion_connect.ConversionService): await self._cleanup_local_artifacts(session) await asyncio.to_thread( self._store.remove_prefix, - self._config.s3_bucket, session.object_prefix, ) return conversion_pb2.DeleteConversionResponse( @@ -289,7 +286,6 @@ class ConversionServiceImpl(conversion_connect.ConversionService): try: await asyncio.to_thread( self._store.fget_object, - self._config.s3_bucket, session.upload_object_key, source_path, ) @@ -433,9 +429,8 @@ class ConversionServiceImpl(conversion_connect.ConversionService): object_key = ( f"{session.object_prefix}output/slide-{slide.index:04d}{slide.image_path.suffix}" ) - self._store.fput_object(self._config.s3_bucket, object_key, slide.image_path) + self._store.fput_object(object_key, slide.image_path) image_url = self._store.presigned_get_url( - self._config.s3_bucket, object_key, ttl_seconds=self._config.s3_session_ttl_seconds, ) @@ -447,12 +442,10 @@ class ConversionServiceImpl(conversion_connect.ConversionService): f"{slide.thumbnail_path.suffix}" ) self._store.fput_object( - self._config.s3_bucket, thumbnail_object_key, slide.thumbnail_path, ) thumbnail_image_url = self._store.presigned_get_url( - self._config.s3_bucket, thumbnail_object_key, ttl_seconds=self._config.s3_session_ttl_seconds, ) @@ -514,7 +507,6 @@ class ConversionServiceImpl(conversion_connect.ConversionService): await asyncio.sleep(self._config.conversion_cleanup_delay_seconds) await asyncio.to_thread( self._store.remove_prefix, - self._config.s3_bucket, session.object_prefix, ) except asyncio.CancelledError: diff --git a/python/packages/server/src/officeconvert_server/storage.py b/python/packages/server/src/officeconvert_server/storage.py index e18ba84..07e59b0 100644 --- a/python/packages/server/src/officeconvert_server/storage.py +++ b/python/packages/server/src/officeconvert_server/storage.py @@ -6,7 +6,6 @@ import logging from datetime import timedelta from pathlib import Path from typing import TextIO -from urllib.parse import urlparse from minio import Minio from minio.deleteobjects import DeleteObject @@ -54,6 +53,7 @@ class S3Store: def __init__( self, *, + bucket: str, endpoint: str, access_key: str, secret_key: str, @@ -63,6 +63,7 @@ class S3Store: public_secure: bool, ) -> None: """Initialize S3 clients for internal and public URL generation.""" + self._bucket = bucket self._client = Minio( endpoint, access_key=access_key, @@ -83,55 +84,44 @@ class S3Store: self._client.trace_on(stream) self._public_client.trace_on(stream) - def ensure_bucket(self, bucket_name: str) -> None: - """Create a bucket if it does not already exist. + def require_bucket(self) -> None: + """Verify the configured bucket exists before serving traffic.""" + if not self._client.bucket_exists(self._bucket): + raise RuntimeError( + f"S3 bucket {self._bucket!r} does not exist; create it before starting the server" + ) - Tries CreateBucket first (idempotent on SeaweedFS and when the caller - owns the bucket). AWS production IAM often grants object access only on - a pre-provisioned bucket; in that case CreateBucket returns - AccessDenied even though HeadBucket succeeds. - """ - try: - self._client.make_bucket(bucket_name) - except S3Error as exc: - if exc.code in ("BucketAlreadyOwnedByYou", "BucketAlreadyExists"): - return - if exc.code in ("AccessDenied", "Forbidden"): - if self._client.bucket_exists(bucket_name): - return - raise - - def presigned_put_url(self, bucket_name: str, object_key: str, *, ttl_seconds: int) -> str: + def presigned_put_url(self, object_key: str, *, ttl_seconds: int) -> str: """Generate a presigned PUT URL for a single object upload.""" return self._public_client.presigned_put_object( - bucket_name, + self._bucket, object_key, expires=timedelta(seconds=ttl_seconds), ) - def presigned_get_url(self, bucket_name: str, object_key: str, *, ttl_seconds: int) -> str: + def presigned_get_url(self, object_key: str, *, ttl_seconds: int) -> str: """Generate a presigned GET URL for downloading one object.""" return self._public_client.presigned_get_object( - bucket_name, + self._bucket, object_key, expires=timedelta(seconds=ttl_seconds), ) - def fget_object(self, bucket_name: str, object_key: str, output_path: Path) -> None: + def fget_object(self, object_key: str, output_path: Path) -> None: """Download one object from storage to a local filesystem path.""" output_path.parent.mkdir(parents=True, exist_ok=True) - self._client.fget_object(bucket_name, object_key, str(output_path)) + self._client.fget_object(self._bucket, object_key, str(output_path)) - def fput_object(self, bucket_name: str, object_key: str, source_path: Path) -> None: + def fput_object(self, object_key: str, source_path: Path) -> None: """Upload one local filesystem object to storage.""" - self._client.fput_object(bucket_name, object_key, str(source_path)) + self._client.fput_object(self._bucket, object_key, str(source_path)) - def remove_prefix(self, bucket_name: str, prefix: str) -> None: - """Remove all objects under a key prefix within a bucket.""" + def remove_prefix(self, prefix: str) -> None: + """Remove all objects under a key prefix within the configured bucket.""" normalized_prefix = prefix if prefix.endswith("/") else f"{prefix}/" objects = list( self._client.list_objects( - bucket_name, + self._bucket, prefix=normalized_prefix, recursive=True, ) @@ -149,7 +139,7 @@ class S3Store: delete_requests.append(DeleteObject(object_name)) errors = self._client.remove_objects( - bucket_name, + self._bucket, delete_requests, ) for err in errors: @@ -158,10 +148,3 @@ class S3Store: raise RuntimeError( f"failed to delete object {object_name}: {message}" ) - - -def object_key_from_presigned_url(url: str) -> str: - """Extract object key from a presigned URL path for diagnostics.""" - path = urlparse(url).path - path_parts = [part for part in path.split("/") if part] - return "/".join(path_parts[1:]) if len(path_parts) >= 2 else ""