add conversion phase & progress metrics, timeout heuristics
This commit is contained in:
@@ -7,4 +7,8 @@ S3_SESSION_TTL_SECONDS=3600
|
||||
CONVERSION_IMAGE_DPI=150
|
||||
CONVERSION_PPTX_TO_PDF_TIMEOUT_SECONDS=180
|
||||
CONVERSION_PDF_TO_IMAGES_TIMEOUT_SECONDS=600
|
||||
CONVERSION_PPTX_TO_PDF_BASE_TIMEOUT_SECONDS=45
|
||||
CONVERSION_PPTX_TO_PDF_PER_SLIDE_TIMEOUT_SECONDS=3
|
||||
CONVERSION_PDF_TO_IMAGES_BASE_TIMEOUT_SECONDS=30
|
||||
CONVERSION_PDF_TO_IMAGES_PER_SLIDE_TIMEOUT_SECONDS=8
|
||||
CONVERSION_CLEANUP_DELAY_SECONDS=3600
|
||||
|
||||
@@ -44,3 +44,4 @@ coverage.out
|
||||
# But never track Python bytecode/cache artifacts from generated code.
|
||||
gen/**/__pycache__/
|
||||
gen/**/*.py[cod]
|
||||
.cache/
|
||||
|
||||
@@ -35,8 +35,12 @@ run-server:
|
||||
export S3_ACCESS_KEY="$${S3_ACCESS_KEY:-minioadmin}"; \
|
||||
export S3_SECRET_KEY="$${S3_SECRET_KEY:-minioadmin}"; \
|
||||
export S3_SESSION_TTL_SECONDS="$${S3_SESSION_TTL_SECONDS:-3600}"; \
|
||||
export CONVERSION_IMAGE_DPI="$${CONVERSION_IMAGE_DPI:-150}"; \
|
||||
export CONVERSION_IMAGE_DPI="$${CONVERSION_IMAGE_DPI:-72}"; \
|
||||
export CONVERSION_PPTX_TO_PDF_TIMEOUT_SECONDS="$${CONVERSION_PPTX_TO_PDF_TIMEOUT_SECONDS:-180}"; \
|
||||
export CONVERSION_PDF_TO_IMAGES_TIMEOUT_SECONDS="$${CONVERSION_PDF_TO_IMAGES_TIMEOUT_SECONDS:-600}"; \
|
||||
export CONVERSION_PPTX_TO_PDF_BASE_TIMEOUT_SECONDS="$${CONVERSION_PPTX_TO_PDF_BASE_TIMEOUT_SECONDS:-45}"; \
|
||||
export CONVERSION_PPTX_TO_PDF_PER_SLIDE_TIMEOUT_SECONDS="$${CONVERSION_PPTX_TO_PDF_PER_SLIDE_TIMEOUT_SECONDS:-3}"; \
|
||||
export CONVERSION_PDF_TO_IMAGES_BASE_TIMEOUT_SECONDS="$${CONVERSION_PDF_TO_IMAGES_BASE_TIMEOUT_SECONDS:-30}"; \
|
||||
export CONVERSION_PDF_TO_IMAGES_PER_SLIDE_TIMEOUT_SECONDS="$${CONVERSION_PDF_TO_IMAGES_PER_SLIDE_TIMEOUT_SECONDS:-8}"; \
|
||||
export CONVERSION_CLEANUP_DELAY_SECONDS="$${CONVERSION_CLEANUP_DELAY_SECONDS:-3600}"; \
|
||||
uv run --project python --package officeconvert-server python -m uvicorn officeconvert_server.app:app --host "$${UVICORN_HOST:-0.0.0.0}" --port "$${UVICORN_PORT:-8080}"
|
||||
|
||||
@@ -27,9 +27,13 @@ services:
|
||||
S3_ACCESS_KEY: ${S3_ACCESS_KEY:-minioadmin}
|
||||
S3_SECRET_KEY: ${S3_SECRET_KEY:-minioadmin}
|
||||
S3_SESSION_TTL_SECONDS: ${S3_SESSION_TTL_SECONDS:-3600}
|
||||
CONVERSION_IMAGE_DPI: ${CONVERSION_IMAGE_DPI:-150}
|
||||
CONVERSION_IMAGE_DPI: ${CONVERSION_IMAGE_DPI:-72}
|
||||
CONVERSION_PPTX_TO_PDF_TIMEOUT_SECONDS: ${CONVERSION_PPTX_TO_PDF_TIMEOUT_SECONDS:-180}
|
||||
CONVERSION_PDF_TO_IMAGES_TIMEOUT_SECONDS: ${CONVERSION_PDF_TO_IMAGES_TIMEOUT_SECONDS:-600}
|
||||
CONVERSION_PPTX_TO_PDF_BASE_TIMEOUT_SECONDS: ${CONVERSION_PPTX_TO_PDF_BASE_TIMEOUT_SECONDS:-45}
|
||||
CONVERSION_PPTX_TO_PDF_PER_SLIDE_TIMEOUT_SECONDS: ${CONVERSION_PPTX_TO_PDF_PER_SLIDE_TIMEOUT_SECONDS:-3}
|
||||
CONVERSION_PDF_TO_IMAGES_BASE_TIMEOUT_SECONDS: ${CONVERSION_PDF_TO_IMAGES_BASE_TIMEOUT_SECONDS:-30}
|
||||
CONVERSION_PDF_TO_IMAGES_PER_SLIDE_TIMEOUT_SECONDS: ${CONVERSION_PDF_TO_IMAGES_PER_SLIDE_TIMEOUT_SECONDS:-8}
|
||||
CONVERSION_CLEANUP_DELAY_SECONDS: ${CONVERSION_CLEANUP_DELAY_SECONDS:-3600}
|
||||
ports:
|
||||
- "8080:8080"
|
||||
|
||||
@@ -78,6 +78,65 @@ func (ConversionStatus) EnumDescriptor() ([]byte, []int) {
|
||||
return file_officeconvertapi_v1_conversion_proto_rawDescGZIP(), []int{0}
|
||||
}
|
||||
|
||||
// ConversionPhase represents the active stage for a running conversion.
|
||||
type ConversionPhase int32
|
||||
|
||||
const (
|
||||
ConversionPhase_CONVERSION_PHASE_UNSPECIFIED ConversionPhase = 0
|
||||
ConversionPhase_CONVERSION_PHASE_INACTIVE ConversionPhase = 1
|
||||
ConversionPhase_CONVERSION_PHASE_EXTRACTING_NOTES ConversionPhase = 2
|
||||
ConversionPhase_CONVERSION_PHASE_PPTX_TO_PDF ConversionPhase = 3
|
||||
ConversionPhase_CONVERSION_PHASE_PDF_TO_IMAGES ConversionPhase = 4
|
||||
ConversionPhase_CONVERSION_PHASE_UPLOADING_RESULTS ConversionPhase = 5
|
||||
)
|
||||
|
||||
// Enum value maps for ConversionPhase.
|
||||
var (
|
||||
ConversionPhase_name = map[int32]string{
|
||||
0: "CONVERSION_PHASE_UNSPECIFIED",
|
||||
1: "CONVERSION_PHASE_INACTIVE",
|
||||
2: "CONVERSION_PHASE_EXTRACTING_NOTES",
|
||||
3: "CONVERSION_PHASE_PPTX_TO_PDF",
|
||||
4: "CONVERSION_PHASE_PDF_TO_IMAGES",
|
||||
5: "CONVERSION_PHASE_UPLOADING_RESULTS",
|
||||
}
|
||||
ConversionPhase_value = map[string]int32{
|
||||
"CONVERSION_PHASE_UNSPECIFIED": 0,
|
||||
"CONVERSION_PHASE_INACTIVE": 1,
|
||||
"CONVERSION_PHASE_EXTRACTING_NOTES": 2,
|
||||
"CONVERSION_PHASE_PPTX_TO_PDF": 3,
|
||||
"CONVERSION_PHASE_PDF_TO_IMAGES": 4,
|
||||
"CONVERSION_PHASE_UPLOADING_RESULTS": 5,
|
||||
}
|
||||
)
|
||||
|
||||
func (x ConversionPhase) Enum() *ConversionPhase {
|
||||
p := new(ConversionPhase)
|
||||
*p = x
|
||||
return p
|
||||
}
|
||||
|
||||
func (x ConversionPhase) String() string {
|
||||
return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x))
|
||||
}
|
||||
|
||||
func (ConversionPhase) Descriptor() protoreflect.EnumDescriptor {
|
||||
return file_officeconvertapi_v1_conversion_proto_enumTypes[1].Descriptor()
|
||||
}
|
||||
|
||||
func (ConversionPhase) Type() protoreflect.EnumType {
|
||||
return &file_officeconvertapi_v1_conversion_proto_enumTypes[1]
|
||||
}
|
||||
|
||||
func (x ConversionPhase) Number() protoreflect.EnumNumber {
|
||||
return protoreflect.EnumNumber(x)
|
||||
}
|
||||
|
||||
// Deprecated: Use ConversionPhase.Descriptor instead.
|
||||
func (ConversionPhase) EnumDescriptor() ([]byte, []int) {
|
||||
return file_officeconvertapi_v1_conversion_proto_rawDescGZIP(), []int{1}
|
||||
}
|
||||
|
||||
// Slide contains extracted notes and the rendered image URL for one slide.
|
||||
type Slide struct {
|
||||
state protoimpl.MessageState `protogen:"open.v1"`
|
||||
@@ -480,6 +539,9 @@ type GetConversionStatusResponse struct {
|
||||
Status ConversionStatus `protobuf:"varint,2,opt,name=status,proto3,enum=officeconvertapi.v1.ConversionStatus" json:"status,omitempty"`
|
||||
ErrorMessage string `protobuf:"bytes,3,opt,name=error_message,json=errorMessage,proto3" json:"error_message,omitempty"`
|
||||
UpdatedAt *timestamppb.Timestamp `protobuf:"bytes,4,opt,name=updated_at,json=updatedAt,proto3" json:"updated_at,omitempty"`
|
||||
Phase ConversionPhase `protobuf:"varint,5,opt,name=phase,proto3,enum=officeconvertapi.v1.ConversionPhase" json:"phase,omitempty"`
|
||||
CurrentProgress int32 `protobuf:"varint,6,opt,name=current_progress,json=currentProgress,proto3" json:"current_progress,omitempty"`
|
||||
MaxProgress int32 `protobuf:"varint,7,opt,name=max_progress,json=maxProgress,proto3" json:"max_progress,omitempty"`
|
||||
unknownFields protoimpl.UnknownFields
|
||||
sizeCache protoimpl.SizeCache
|
||||
}
|
||||
@@ -542,6 +604,27 @@ func (x *GetConversionStatusResponse) GetUpdatedAt() *timestamppb.Timestamp {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (x *GetConversionStatusResponse) GetPhase() ConversionPhase {
|
||||
if x != nil {
|
||||
return x.Phase
|
||||
}
|
||||
return ConversionPhase_CONVERSION_PHASE_UNSPECIFIED
|
||||
}
|
||||
|
||||
func (x *GetConversionStatusResponse) GetCurrentProgress() int32 {
|
||||
if x != nil {
|
||||
return x.CurrentProgress
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (x *GetConversionStatusResponse) GetMaxProgress() int32 {
|
||||
if x != nil {
|
||||
return x.MaxProgress
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// GetSlideDeckRequest fetches a completed deck.
|
||||
type GetSlideDeckRequest struct {
|
||||
state protoimpl.MessageState `protogen:"open.v1"`
|
||||
@@ -762,13 +845,16 @@ const file_officeconvertapi_v1_conversion_proto_rawDesc = "" +
|
||||
"\rconversion_id\x18\x01 \x01(\tR\fconversionId\x12=\n" +
|
||||
"\x06status\x18\x02 \x01(\x0e2%.officeconvertapi.v1.ConversionStatusR\x06status\"A\n" +
|
||||
"\x1aGetConversionStatusRequest\x12#\n" +
|
||||
"\rconversion_id\x18\x01 \x01(\tR\fconversionId\"\xe1\x01\n" +
|
||||
"\rconversion_id\x18\x01 \x01(\tR\fconversionId\"\xeb\x02\n" +
|
||||
"\x1bGetConversionStatusResponse\x12#\n" +
|
||||
"\rconversion_id\x18\x01 \x01(\tR\fconversionId\x12=\n" +
|
||||
"\x06status\x18\x02 \x01(\x0e2%.officeconvertapi.v1.ConversionStatusR\x06status\x12#\n" +
|
||||
"\rerror_message\x18\x03 \x01(\tR\ferrorMessage\x129\n" +
|
||||
"\n" +
|
||||
"updated_at\x18\x04 \x01(\v2\x1a.google.protobuf.TimestampR\tupdatedAt\":\n" +
|
||||
"updated_at\x18\x04 \x01(\v2\x1a.google.protobuf.TimestampR\tupdatedAt\x12:\n" +
|
||||
"\x05phase\x18\x05 \x01(\x0e2$.officeconvertapi.v1.ConversionPhaseR\x05phase\x12)\n" +
|
||||
"\x10current_progress\x18\x06 \x01(\x05R\x0fcurrentProgress\x12!\n" +
|
||||
"\fmax_progress\x18\a \x01(\x05R\vmaxProgress\":\n" +
|
||||
"\x13GetSlideDeckRequest\x12#\n" +
|
||||
"\rconversion_id\x18\x01 \x01(\tR\fconversionId\"U\n" +
|
||||
"\x14GetSlideDeckResponse\x12=\n" +
|
||||
@@ -784,7 +870,14 @@ const file_officeconvertapi_v1_conversion_proto_rawDesc = "" +
|
||||
"\x19CONVERSION_STATUS_PENDING\x10\x01\x12\x1d\n" +
|
||||
"\x19CONVERSION_STATUS_RUNNING\x10\x02\x12\x1f\n" +
|
||||
"\x1bCONVERSION_STATUS_SUCCEEDED\x10\x03\x12\x1c\n" +
|
||||
"\x18CONVERSION_STATUS_FAILED\x10\x042\xcc\x04\n" +
|
||||
"\x18CONVERSION_STATUS_FAILED\x10\x04*\xe7\x01\n" +
|
||||
"\x0fConversionPhase\x12 \n" +
|
||||
"\x1cCONVERSION_PHASE_UNSPECIFIED\x10\x00\x12\x1d\n" +
|
||||
"\x19CONVERSION_PHASE_INACTIVE\x10\x01\x12%\n" +
|
||||
"!CONVERSION_PHASE_EXTRACTING_NOTES\x10\x02\x12 \n" +
|
||||
"\x1cCONVERSION_PHASE_PPTX_TO_PDF\x10\x03\x12\"\n" +
|
||||
"\x1eCONVERSION_PHASE_PDF_TO_IMAGES\x10\x04\x12&\n" +
|
||||
"\"CONVERSION_PHASE_UPLOADING_RESULTS\x10\x052\xcc\x04\n" +
|
||||
"\x11ConversionService\x12q\n" +
|
||||
"\x10CreateConversion\x12,.officeconvertapi.v1.CreateConversionRequest\x1a-.officeconvertapi.v1.CreateConversionResponse\"\x00\x12n\n" +
|
||||
"\x0fStartConversion\x12+.officeconvertapi.v1.StartConversionRequest\x1a,.officeconvertapi.v1.StartConversionResponse\"\x00\x12z\n" +
|
||||
@@ -804,47 +897,49 @@ func file_officeconvertapi_v1_conversion_proto_rawDescGZIP() []byte {
|
||||
return file_officeconvertapi_v1_conversion_proto_rawDescData
|
||||
}
|
||||
|
||||
var file_officeconvertapi_v1_conversion_proto_enumTypes = make([]protoimpl.EnumInfo, 1)
|
||||
var file_officeconvertapi_v1_conversion_proto_enumTypes = make([]protoimpl.EnumInfo, 2)
|
||||
var file_officeconvertapi_v1_conversion_proto_msgTypes = make([]protoimpl.MessageInfo, 12)
|
||||
var file_officeconvertapi_v1_conversion_proto_goTypes = []any{
|
||||
(ConversionStatus)(0), // 0: officeconvertapi.v1.ConversionStatus
|
||||
(*Slide)(nil), // 1: officeconvertapi.v1.Slide
|
||||
(*SlideDeck)(nil), // 2: officeconvertapi.v1.SlideDeck
|
||||
(*CreateConversionRequest)(nil), // 3: officeconvertapi.v1.CreateConversionRequest
|
||||
(*CreateConversionResponse)(nil), // 4: officeconvertapi.v1.CreateConversionResponse
|
||||
(*StartConversionRequest)(nil), // 5: officeconvertapi.v1.StartConversionRequest
|
||||
(*StartConversionResponse)(nil), // 6: officeconvertapi.v1.StartConversionResponse
|
||||
(*GetConversionStatusRequest)(nil), // 7: officeconvertapi.v1.GetConversionStatusRequest
|
||||
(*GetConversionStatusResponse)(nil), // 8: officeconvertapi.v1.GetConversionStatusResponse
|
||||
(*GetSlideDeckRequest)(nil), // 9: officeconvertapi.v1.GetSlideDeckRequest
|
||||
(*GetSlideDeckResponse)(nil), // 10: officeconvertapi.v1.GetSlideDeckResponse
|
||||
(*DeleteConversionRequest)(nil), // 11: officeconvertapi.v1.DeleteConversionRequest
|
||||
(*DeleteConversionResponse)(nil), // 12: officeconvertapi.v1.DeleteConversionResponse
|
||||
(*timestamppb.Timestamp)(nil), // 13: google.protobuf.Timestamp
|
||||
(ConversionPhase)(0), // 1: officeconvertapi.v1.ConversionPhase
|
||||
(*Slide)(nil), // 2: officeconvertapi.v1.Slide
|
||||
(*SlideDeck)(nil), // 3: officeconvertapi.v1.SlideDeck
|
||||
(*CreateConversionRequest)(nil), // 4: officeconvertapi.v1.CreateConversionRequest
|
||||
(*CreateConversionResponse)(nil), // 5: officeconvertapi.v1.CreateConversionResponse
|
||||
(*StartConversionRequest)(nil), // 6: officeconvertapi.v1.StartConversionRequest
|
||||
(*StartConversionResponse)(nil), // 7: officeconvertapi.v1.StartConversionResponse
|
||||
(*GetConversionStatusRequest)(nil), // 8: officeconvertapi.v1.GetConversionStatusRequest
|
||||
(*GetConversionStatusResponse)(nil), // 9: officeconvertapi.v1.GetConversionStatusResponse
|
||||
(*GetSlideDeckRequest)(nil), // 10: officeconvertapi.v1.GetSlideDeckRequest
|
||||
(*GetSlideDeckResponse)(nil), // 11: officeconvertapi.v1.GetSlideDeckResponse
|
||||
(*DeleteConversionRequest)(nil), // 12: officeconvertapi.v1.DeleteConversionRequest
|
||||
(*DeleteConversionResponse)(nil), // 13: officeconvertapi.v1.DeleteConversionResponse
|
||||
(*timestamppb.Timestamp)(nil), // 14: google.protobuf.Timestamp
|
||||
}
|
||||
var file_officeconvertapi_v1_conversion_proto_depIdxs = []int32{
|
||||
1, // 0: officeconvertapi.v1.SlideDeck.slides:type_name -> officeconvertapi.v1.Slide
|
||||
13, // 1: officeconvertapi.v1.SlideDeck.created_at:type_name -> google.protobuf.Timestamp
|
||||
13, // 2: officeconvertapi.v1.CreateConversionResponse.expires_at:type_name -> google.protobuf.Timestamp
|
||||
2, // 0: officeconvertapi.v1.SlideDeck.slides:type_name -> officeconvertapi.v1.Slide
|
||||
14, // 1: officeconvertapi.v1.SlideDeck.created_at:type_name -> google.protobuf.Timestamp
|
||||
14, // 2: officeconvertapi.v1.CreateConversionResponse.expires_at:type_name -> google.protobuf.Timestamp
|
||||
0, // 3: officeconvertapi.v1.StartConversionResponse.status:type_name -> officeconvertapi.v1.ConversionStatus
|
||||
0, // 4: officeconvertapi.v1.GetConversionStatusResponse.status:type_name -> officeconvertapi.v1.ConversionStatus
|
||||
13, // 5: officeconvertapi.v1.GetConversionStatusResponse.updated_at:type_name -> google.protobuf.Timestamp
|
||||
2, // 6: officeconvertapi.v1.GetSlideDeckResponse.slide_deck:type_name -> officeconvertapi.v1.SlideDeck
|
||||
3, // 7: officeconvertapi.v1.ConversionService.CreateConversion:input_type -> officeconvertapi.v1.CreateConversionRequest
|
||||
5, // 8: officeconvertapi.v1.ConversionService.StartConversion:input_type -> officeconvertapi.v1.StartConversionRequest
|
||||
7, // 9: officeconvertapi.v1.ConversionService.GetConversionStatus:input_type -> officeconvertapi.v1.GetConversionStatusRequest
|
||||
9, // 10: officeconvertapi.v1.ConversionService.GetSlideDeck:input_type -> officeconvertapi.v1.GetSlideDeckRequest
|
||||
11, // 11: officeconvertapi.v1.ConversionService.DeleteConversion:input_type -> officeconvertapi.v1.DeleteConversionRequest
|
||||
4, // 12: officeconvertapi.v1.ConversionService.CreateConversion:output_type -> officeconvertapi.v1.CreateConversionResponse
|
||||
6, // 13: officeconvertapi.v1.ConversionService.StartConversion:output_type -> officeconvertapi.v1.StartConversionResponse
|
||||
8, // 14: officeconvertapi.v1.ConversionService.GetConversionStatus:output_type -> officeconvertapi.v1.GetConversionStatusResponse
|
||||
10, // 15: officeconvertapi.v1.ConversionService.GetSlideDeck:output_type -> officeconvertapi.v1.GetSlideDeckResponse
|
||||
12, // 16: officeconvertapi.v1.ConversionService.DeleteConversion:output_type -> officeconvertapi.v1.DeleteConversionResponse
|
||||
12, // [12:17] is the sub-list for method output_type
|
||||
7, // [7:12] is the sub-list for method input_type
|
||||
7, // [7:7] is the sub-list for extension type_name
|
||||
7, // [7:7] is the sub-list for extension extendee
|
||||
0, // [0:7] is the sub-list for field type_name
|
||||
14, // 5: officeconvertapi.v1.GetConversionStatusResponse.updated_at:type_name -> google.protobuf.Timestamp
|
||||
1, // 6: officeconvertapi.v1.GetConversionStatusResponse.phase:type_name -> officeconvertapi.v1.ConversionPhase
|
||||
3, // 7: officeconvertapi.v1.GetSlideDeckResponse.slide_deck:type_name -> officeconvertapi.v1.SlideDeck
|
||||
4, // 8: officeconvertapi.v1.ConversionService.CreateConversion:input_type -> officeconvertapi.v1.CreateConversionRequest
|
||||
6, // 9: officeconvertapi.v1.ConversionService.StartConversion:input_type -> officeconvertapi.v1.StartConversionRequest
|
||||
8, // 10: officeconvertapi.v1.ConversionService.GetConversionStatus:input_type -> officeconvertapi.v1.GetConversionStatusRequest
|
||||
10, // 11: officeconvertapi.v1.ConversionService.GetSlideDeck:input_type -> officeconvertapi.v1.GetSlideDeckRequest
|
||||
12, // 12: officeconvertapi.v1.ConversionService.DeleteConversion:input_type -> officeconvertapi.v1.DeleteConversionRequest
|
||||
5, // 13: officeconvertapi.v1.ConversionService.CreateConversion:output_type -> officeconvertapi.v1.CreateConversionResponse
|
||||
7, // 14: officeconvertapi.v1.ConversionService.StartConversion:output_type -> officeconvertapi.v1.StartConversionResponse
|
||||
9, // 15: officeconvertapi.v1.ConversionService.GetConversionStatus:output_type -> officeconvertapi.v1.GetConversionStatusResponse
|
||||
11, // 16: officeconvertapi.v1.ConversionService.GetSlideDeck:output_type -> officeconvertapi.v1.GetSlideDeckResponse
|
||||
13, // 17: officeconvertapi.v1.ConversionService.DeleteConversion:output_type -> officeconvertapi.v1.DeleteConversionResponse
|
||||
13, // [13:18] is the sub-list for method output_type
|
||||
8, // [8:13] is the sub-list for method input_type
|
||||
8, // [8:8] is the sub-list for extension type_name
|
||||
8, // [8:8] is the sub-list for extension extendee
|
||||
0, // [0:8] is the sub-list for field type_name
|
||||
}
|
||||
|
||||
func init() { file_officeconvertapi_v1_conversion_proto_init() }
|
||||
@@ -857,7 +952,7 @@ func file_officeconvertapi_v1_conversion_proto_init() {
|
||||
File: protoimpl.DescBuilder{
|
||||
GoPackagePath: reflect.TypeOf(x{}).PkgPath(),
|
||||
RawDescriptor: unsafe.Slice(unsafe.StringData(file_officeconvertapi_v1_conversion_proto_rawDesc), len(file_officeconvertapi_v1_conversion_proto_rawDesc)),
|
||||
NumEnums: 1,
|
||||
NumEnums: 2,
|
||||
NumMessages: 12,
|
||||
NumExtensions: 0,
|
||||
NumServices: 1,
|
||||
|
||||
@@ -25,7 +25,7 @@ _sym_db = _symbol_database.Default()
|
||||
from google.protobuf import timestamp_pb2 as google_dot_protobuf_dot_timestamp__pb2
|
||||
|
||||
|
||||
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n$officeconvertapi/v1/conversion.proto\x12\x13officeconvertapi.v1\x1a\x1fgoogle/protobuf/timestamp.proto\"[\n\x05Slide\x12\x14\n\x05index\x18\x01 \x01(\x05R\x05index\x12\x1f\n\x0bnotes_plain\x18\x02 \x01(\tR\nnotesPlain\x12\x1b\n\timage_url\x18\x03 \x01(\tR\x08imageUrl\"\xc8\x01\n\tSlideDeck\x12#\n\rconversion_id\x18\x01 \x01(\tR\x0c\x63onversionId\x12\'\n\x0fsource_filename\x18\x02 \x01(\tR\x0esourceFilename\x12\x32\n\x06slides\x18\x03 \x03(\x0b\x32\x1a.officeconvertapi.v1.SlideR\x06slides\x12\x39\n\ncreated_at\x18\x04 \x01(\x0b\x32\x1a.google.protobuf.TimestampR\tcreatedAt\"B\n\x17\x43reateConversionRequest\x12\'\n\x0fsource_filename\x18\x01 \x01(\tR\x0esourceFilename\"\xea\x01\n\x18\x43reateConversionResponse\x12#\n\rconversion_id\x18\x01 \x01(\tR\x0c\x63onversionId\x12#\n\rupload_bucket\x18\x02 \x01(\tR\x0cuploadBucket\x12*\n\x11upload_object_key\x18\x03 \x01(\tR\x0fuploadObjectKey\x12\x1d\n\nupload_url\x18\x04 \x01(\tR\tuploadUrl\x12\x39\n\nexpires_at\x18\x05 \x01(\x0b\x32\x1a.google.protobuf.TimestampR\texpiresAt\"=\n\x16StartConversionRequest\x12#\n\rconversion_id\x18\x01 \x01(\tR\x0c\x63onversionId\"}\n\x17StartConversionResponse\x12#\n\rconversion_id\x18\x01 \x01(\tR\x0c\x63onversionId\x12=\n\x06status\x18\x02 \x01(\x0e\x32%.officeconvertapi.v1.ConversionStatusR\x06status\"A\n\x1aGetConversionStatusRequest\x12#\n\rconversion_id\x18\x01 \x01(\tR\x0c\x63onversionId\"\xe1\x01\n\x1bGetConversionStatusResponse\x12#\n\rconversion_id\x18\x01 \x01(\tR\x0c\x63onversionId\x12=\n\x06status\x18\x02 \x01(\x0e\x32%.officeconvertapi.v1.ConversionStatusR\x06status\x12#\n\rerror_message\x18\x03 \x01(\tR\x0c\x65rrorMessage\x12\x39\n\nupdated_at\x18\x04 \x01(\x0b\x32\x1a.google.protobuf.TimestampR\tupdatedAt\":\n\x13GetSlideDeckRequest\x12#\n\rconversion_id\x18\x01 \x01(\tR\x0c\x63onversionId\"U\n\x14GetSlideDeckResponse\x12=\n\nslide_deck\x18\x01 \x01(\x0b\x32\x1e.officeconvertapi.v1.SlideDeckR\tslideDeck\">\n\x17\x44\x65leteConversionRequest\x12#\n\rconversion_id\x18\x01 \x01(\tR\x0c\x63onversionId\"Y\n\x18\x44\x65leteConversionResponse\x12#\n\rconversion_id\x18\x01 \x01(\tR\x0c\x63onversionId\x12\x18\n\x07\x64\x65leted\x18\x02 \x01(\x08R\x07\x64\x65leted*\xb2\x01\n\x10\x43onversionStatus\x12!\n\x1d\x43ONVERSION_STATUS_UNSPECIFIED\x10\x00\x12\x1d\n\x19\x43ONVERSION_STATUS_PENDING\x10\x01\x12\x1d\n\x19\x43ONVERSION_STATUS_RUNNING\x10\x02\x12\x1f\n\x1b\x43ONVERSION_STATUS_SUCCEEDED\x10\x03\x12\x1c\n\x18\x43ONVERSION_STATUS_FAILED\x10\x04\x32\xcc\x04\n\x11\x43onversionService\x12q\n\x10\x43reateConversion\x12,.officeconvertapi.v1.CreateConversionRequest\x1a-.officeconvertapi.v1.CreateConversionResponse\"\x00\x12n\n\x0fStartConversion\x12+.officeconvertapi.v1.StartConversionRequest\x1a,.officeconvertapi.v1.StartConversionResponse\"\x00\x12z\n\x13GetConversionStatus\x12/.officeconvertapi.v1.GetConversionStatusRequest\x1a\x30.officeconvertapi.v1.GetConversionStatusResponse\"\x00\x12\x65\n\x0cGetSlideDeck\x12(.officeconvertapi.v1.GetSlideDeckRequest\x1a).officeconvertapi.v1.GetSlideDeckResponse\"\x00\x12q\n\x10\x44\x65leteConversion\x12,.officeconvertapi.v1.DeleteConversionRequest\x1a-.officeconvertapi.v1.DeleteConversionResponse\"\x00\x42LZJgithub.com/end/officeconvert/gen/go/officeconvertapi/v1;officeconvertapiv1b\x06proto3')
|
||||
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n$officeconvertapi/v1/conversion.proto\x12\x13officeconvertapi.v1\x1a\x1fgoogle/protobuf/timestamp.proto\"[\n\x05Slide\x12\x14\n\x05index\x18\x01 \x01(\x05R\x05index\x12\x1f\n\x0bnotes_plain\x18\x02 \x01(\tR\nnotesPlain\x12\x1b\n\timage_url\x18\x03 \x01(\tR\x08imageUrl\"\xc8\x01\n\tSlideDeck\x12#\n\rconversion_id\x18\x01 \x01(\tR\x0c\x63onversionId\x12\'\n\x0fsource_filename\x18\x02 \x01(\tR\x0esourceFilename\x12\x32\n\x06slides\x18\x03 \x03(\x0b\x32\x1a.officeconvertapi.v1.SlideR\x06slides\x12\x39\n\ncreated_at\x18\x04 \x01(\x0b\x32\x1a.google.protobuf.TimestampR\tcreatedAt\"B\n\x17\x43reateConversionRequest\x12\'\n\x0fsource_filename\x18\x01 \x01(\tR\x0esourceFilename\"\xea\x01\n\x18\x43reateConversionResponse\x12#\n\rconversion_id\x18\x01 \x01(\tR\x0c\x63onversionId\x12#\n\rupload_bucket\x18\x02 \x01(\tR\x0cuploadBucket\x12*\n\x11upload_object_key\x18\x03 \x01(\tR\x0fuploadObjectKey\x12\x1d\n\nupload_url\x18\x04 \x01(\tR\tuploadUrl\x12\x39\n\nexpires_at\x18\x05 \x01(\x0b\x32\x1a.google.protobuf.TimestampR\texpiresAt\"=\n\x16StartConversionRequest\x12#\n\rconversion_id\x18\x01 \x01(\tR\x0c\x63onversionId\"}\n\x17StartConversionResponse\x12#\n\rconversion_id\x18\x01 \x01(\tR\x0c\x63onversionId\x12=\n\x06status\x18\x02 \x01(\x0e\x32%.officeconvertapi.v1.ConversionStatusR\x06status\"A\n\x1aGetConversionStatusRequest\x12#\n\rconversion_id\x18\x01 \x01(\tR\x0c\x63onversionId\"\xeb\x02\n\x1bGetConversionStatusResponse\x12#\n\rconversion_id\x18\x01 \x01(\tR\x0c\x63onversionId\x12=\n\x06status\x18\x02 \x01(\x0e\x32%.officeconvertapi.v1.ConversionStatusR\x06status\x12#\n\rerror_message\x18\x03 \x01(\tR\x0c\x65rrorMessage\x12\x39\n\nupdated_at\x18\x04 \x01(\x0b\x32\x1a.google.protobuf.TimestampR\tupdatedAt\x12:\n\x05phase\x18\x05 \x01(\x0e\x32$.officeconvertapi.v1.ConversionPhaseR\x05phase\x12)\n\x10\x63urrent_progress\x18\x06 \x01(\x05R\x0f\x63urrentProgress\x12!\n\x0cmax_progress\x18\x07 \x01(\x05R\x0bmaxProgress\":\n\x13GetSlideDeckRequest\x12#\n\rconversion_id\x18\x01 \x01(\tR\x0c\x63onversionId\"U\n\x14GetSlideDeckResponse\x12=\n\nslide_deck\x18\x01 \x01(\x0b\x32\x1e.officeconvertapi.v1.SlideDeckR\tslideDeck\">\n\x17\x44\x65leteConversionRequest\x12#\n\rconversion_id\x18\x01 \x01(\tR\x0c\x63onversionId\"Y\n\x18\x44\x65leteConversionResponse\x12#\n\rconversion_id\x18\x01 \x01(\tR\x0c\x63onversionId\x12\x18\n\x07\x64\x65leted\x18\x02 \x01(\x08R\x07\x64\x65leted*\xb2\x01\n\x10\x43onversionStatus\x12!\n\x1d\x43ONVERSION_STATUS_UNSPECIFIED\x10\x00\x12\x1d\n\x19\x43ONVERSION_STATUS_PENDING\x10\x01\x12\x1d\n\x19\x43ONVERSION_STATUS_RUNNING\x10\x02\x12\x1f\n\x1b\x43ONVERSION_STATUS_SUCCEEDED\x10\x03\x12\x1c\n\x18\x43ONVERSION_STATUS_FAILED\x10\x04*\xe7\x01\n\x0f\x43onversionPhase\x12 \n\x1c\x43ONVERSION_PHASE_UNSPECIFIED\x10\x00\x12\x1d\n\x19\x43ONVERSION_PHASE_INACTIVE\x10\x01\x12%\n!CONVERSION_PHASE_EXTRACTING_NOTES\x10\x02\x12 \n\x1c\x43ONVERSION_PHASE_PPTX_TO_PDF\x10\x03\x12\"\n\x1e\x43ONVERSION_PHASE_PDF_TO_IMAGES\x10\x04\x12&\n\"CONVERSION_PHASE_UPLOADING_RESULTS\x10\x05\x32\xcc\x04\n\x11\x43onversionService\x12q\n\x10\x43reateConversion\x12,.officeconvertapi.v1.CreateConversionRequest\x1a-.officeconvertapi.v1.CreateConversionResponse\"\x00\x12n\n\x0fStartConversion\x12+.officeconvertapi.v1.StartConversionRequest\x1a,.officeconvertapi.v1.StartConversionResponse\"\x00\x12z\n\x13GetConversionStatus\x12/.officeconvertapi.v1.GetConversionStatusRequest\x1a\x30.officeconvertapi.v1.GetConversionStatusResponse\"\x00\x12\x65\n\x0cGetSlideDeck\x12(.officeconvertapi.v1.GetSlideDeckRequest\x1a).officeconvertapi.v1.GetSlideDeckResponse\"\x00\x12q\n\x10\x44\x65leteConversion\x12,.officeconvertapi.v1.DeleteConversionRequest\x1a-.officeconvertapi.v1.DeleteConversionResponse\"\x00\x42LZJgithub.com/end/officeconvert/gen/go/officeconvertapi/v1;officeconvertapiv1b\x06proto3')
|
||||
|
||||
_globals = globals()
|
||||
_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
|
||||
@@ -33,8 +33,10 @@ _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'officeconvertapi.v1.convers
|
||||
if not _descriptor._USE_C_DESCRIPTORS:
|
||||
_globals['DESCRIPTOR']._loaded_options = None
|
||||
_globals['DESCRIPTOR']._serialized_options = b'ZJgithub.com/end/officeconvert/gen/go/officeconvertapi/v1;officeconvertapiv1'
|
||||
_globals['_CONVERSIONSTATUS']._serialized_start=1483
|
||||
_globals['_CONVERSIONSTATUS']._serialized_end=1661
|
||||
_globals['_CONVERSIONSTATUS']._serialized_start=1621
|
||||
_globals['_CONVERSIONSTATUS']._serialized_end=1799
|
||||
_globals['_CONVERSIONPHASE']._serialized_start=1802
|
||||
_globals['_CONVERSIONPHASE']._serialized_end=2033
|
||||
_globals['_SLIDE']._serialized_start=94
|
||||
_globals['_SLIDE']._serialized_end=185
|
||||
_globals['_SLIDEDECK']._serialized_start=188
|
||||
@@ -50,15 +52,15 @@ if not _descriptor._USE_C_DESCRIPTORS:
|
||||
_globals['_GETCONVERSIONSTATUSREQUEST']._serialized_start=885
|
||||
_globals['_GETCONVERSIONSTATUSREQUEST']._serialized_end=950
|
||||
_globals['_GETCONVERSIONSTATUSRESPONSE']._serialized_start=953
|
||||
_globals['_GETCONVERSIONSTATUSRESPONSE']._serialized_end=1178
|
||||
_globals['_GETSLIDEDECKREQUEST']._serialized_start=1180
|
||||
_globals['_GETSLIDEDECKREQUEST']._serialized_end=1238
|
||||
_globals['_GETSLIDEDECKRESPONSE']._serialized_start=1240
|
||||
_globals['_GETSLIDEDECKRESPONSE']._serialized_end=1325
|
||||
_globals['_DELETECONVERSIONREQUEST']._serialized_start=1327
|
||||
_globals['_DELETECONVERSIONREQUEST']._serialized_end=1389
|
||||
_globals['_DELETECONVERSIONRESPONSE']._serialized_start=1391
|
||||
_globals['_DELETECONVERSIONRESPONSE']._serialized_end=1480
|
||||
_globals['_CONVERSIONSERVICE']._serialized_start=1664
|
||||
_globals['_CONVERSIONSERVICE']._serialized_end=2252
|
||||
_globals['_GETCONVERSIONSTATUSRESPONSE']._serialized_end=1316
|
||||
_globals['_GETSLIDEDECKREQUEST']._serialized_start=1318
|
||||
_globals['_GETSLIDEDECKREQUEST']._serialized_end=1376
|
||||
_globals['_GETSLIDEDECKRESPONSE']._serialized_start=1378
|
||||
_globals['_GETSLIDEDECKRESPONSE']._serialized_end=1463
|
||||
_globals['_DELETECONVERSIONREQUEST']._serialized_start=1465
|
||||
_globals['_DELETECONVERSIONREQUEST']._serialized_end=1527
|
||||
_globals['_DELETECONVERSIONRESPONSE']._serialized_start=1529
|
||||
_globals['_DELETECONVERSIONRESPONSE']._serialized_end=1618
|
||||
_globals['_CONVERSIONSERVICE']._serialized_start=2036
|
||||
_globals['_CONVERSIONSERVICE']._serialized_end=2624
|
||||
# @@protoc_insertion_point(module_scope)
|
||||
|
||||
@@ -17,11 +17,26 @@ class ConversionStatus(int, metaclass=_enum_type_wrapper.EnumTypeWrapper):
|
||||
CONVERSION_STATUS_RUNNING: _ClassVar[ConversionStatus]
|
||||
CONVERSION_STATUS_SUCCEEDED: _ClassVar[ConversionStatus]
|
||||
CONVERSION_STATUS_FAILED: _ClassVar[ConversionStatus]
|
||||
|
||||
class ConversionPhase(int, metaclass=_enum_type_wrapper.EnumTypeWrapper):
|
||||
__slots__ = ()
|
||||
CONVERSION_PHASE_UNSPECIFIED: _ClassVar[ConversionPhase]
|
||||
CONVERSION_PHASE_INACTIVE: _ClassVar[ConversionPhase]
|
||||
CONVERSION_PHASE_EXTRACTING_NOTES: _ClassVar[ConversionPhase]
|
||||
CONVERSION_PHASE_PPTX_TO_PDF: _ClassVar[ConversionPhase]
|
||||
CONVERSION_PHASE_PDF_TO_IMAGES: _ClassVar[ConversionPhase]
|
||||
CONVERSION_PHASE_UPLOADING_RESULTS: _ClassVar[ConversionPhase]
|
||||
CONVERSION_STATUS_UNSPECIFIED: ConversionStatus
|
||||
CONVERSION_STATUS_PENDING: ConversionStatus
|
||||
CONVERSION_STATUS_RUNNING: ConversionStatus
|
||||
CONVERSION_STATUS_SUCCEEDED: ConversionStatus
|
||||
CONVERSION_STATUS_FAILED: ConversionStatus
|
||||
CONVERSION_PHASE_UNSPECIFIED: ConversionPhase
|
||||
CONVERSION_PHASE_INACTIVE: ConversionPhase
|
||||
CONVERSION_PHASE_EXTRACTING_NOTES: ConversionPhase
|
||||
CONVERSION_PHASE_PPTX_TO_PDF: ConversionPhase
|
||||
CONVERSION_PHASE_PDF_TO_IMAGES: ConversionPhase
|
||||
CONVERSION_PHASE_UPLOADING_RESULTS: ConversionPhase
|
||||
|
||||
class Slide(_message.Message):
|
||||
__slots__ = ("index", "notes_plain", "image_url")
|
||||
@@ -86,16 +101,22 @@ class GetConversionStatusRequest(_message.Message):
|
||||
def __init__(self, conversion_id: _Optional[str] = ...) -> None: ...
|
||||
|
||||
class GetConversionStatusResponse(_message.Message):
|
||||
__slots__ = ("conversion_id", "status", "error_message", "updated_at")
|
||||
__slots__ = ("conversion_id", "status", "error_message", "updated_at", "phase", "current_progress", "max_progress")
|
||||
CONVERSION_ID_FIELD_NUMBER: _ClassVar[int]
|
||||
STATUS_FIELD_NUMBER: _ClassVar[int]
|
||||
ERROR_MESSAGE_FIELD_NUMBER: _ClassVar[int]
|
||||
UPDATED_AT_FIELD_NUMBER: _ClassVar[int]
|
||||
PHASE_FIELD_NUMBER: _ClassVar[int]
|
||||
CURRENT_PROGRESS_FIELD_NUMBER: _ClassVar[int]
|
||||
MAX_PROGRESS_FIELD_NUMBER: _ClassVar[int]
|
||||
conversion_id: str
|
||||
status: ConversionStatus
|
||||
error_message: str
|
||||
updated_at: _timestamp_pb2.Timestamp
|
||||
def __init__(self, conversion_id: _Optional[str] = ..., status: _Optional[_Union[ConversionStatus, str]] = ..., error_message: _Optional[str] = ..., updated_at: _Optional[_Union[datetime.datetime, _timestamp_pb2.Timestamp, _Mapping]] = ...) -> None: ...
|
||||
phase: ConversionPhase
|
||||
current_progress: int
|
||||
max_progress: int
|
||||
def __init__(self, conversion_id: _Optional[str] = ..., status: _Optional[_Union[ConversionStatus, str]] = ..., error_message: _Optional[str] = ..., updated_at: _Optional[_Union[datetime.datetime, _timestamp_pb2.Timestamp, _Mapping]] = ..., phase: _Optional[_Union[ConversionPhase, str]] = ..., current_progress: _Optional[int] = ..., max_progress: _Optional[int] = ...) -> None: ...
|
||||
|
||||
class GetSlideDeckRequest(_message.Message):
|
||||
__slots__ = ("conversion_id",)
|
||||
|
||||
@@ -33,6 +33,16 @@ enum ConversionStatus {
|
||||
CONVERSION_STATUS_FAILED = 4;
|
||||
}
|
||||
|
||||
// ConversionPhase represents the active stage for a running conversion.
|
||||
enum ConversionPhase {
|
||||
CONVERSION_PHASE_UNSPECIFIED = 0;
|
||||
CONVERSION_PHASE_INACTIVE = 1;
|
||||
CONVERSION_PHASE_EXTRACTING_NOTES = 2;
|
||||
CONVERSION_PHASE_PPTX_TO_PDF = 3;
|
||||
CONVERSION_PHASE_PDF_TO_IMAGES = 4;
|
||||
CONVERSION_PHASE_UPLOADING_RESULTS = 5;
|
||||
}
|
||||
|
||||
// Slide contains extracted notes and the rendered image URL for one slide.
|
||||
message Slide {
|
||||
int32 index = 1;
|
||||
@@ -84,6 +94,9 @@ message GetConversionStatusResponse {
|
||||
ConversionStatus status = 2;
|
||||
string error_message = 3;
|
||||
google.protobuf.Timestamp updated_at = 4;
|
||||
ConversionPhase phase = 5;
|
||||
int32 current_progress = 6;
|
||||
int32 max_progress = 7;
|
||||
}
|
||||
|
||||
// GetSlideDeckRequest fetches a completed deck.
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Callable
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
import subprocess
|
||||
@@ -27,6 +28,14 @@ class SlideDeckResult:
|
||||
slides: list[SlideArtifact]
|
||||
|
||||
|
||||
ProgressCallback = Callable[[str, int, int], None]
|
||||
PageProgressCallback = Callable[[int, int], None]
|
||||
|
||||
PHASE_EXTRACTING_NOTES = "extracting_notes"
|
||||
PHASE_PPTX_TO_PDF = "pptx_to_pdf"
|
||||
PHASE_PDF_TO_IMAGES = "pdf_to_images"
|
||||
|
||||
|
||||
def convert_pptx_to_pdf(pptx_path: Path, pdf_path: Path, *, timeout_s: int = 120) -> Path:
|
||||
"""Convert a PPTX file to PDF using headless LibreOffice.
|
||||
|
||||
@@ -92,6 +101,8 @@ def render_pdf_to_images(
|
||||
dpi: int = 180,
|
||||
image_format: str = "png",
|
||||
timeout_s: int = 120,
|
||||
total_pages: int | None = None,
|
||||
page_progress_callback: PageProgressCallback | None = None,
|
||||
) -> list[Path]:
|
||||
"""Render each PDF page into an image using Poppler's `pdftoppm`.
|
||||
|
||||
@@ -113,6 +124,7 @@ def render_pdf_to_images(
|
||||
raise FileNotFoundError(f"source PDF does not exist: {pdf_path}")
|
||||
|
||||
out_dir.mkdir(parents=True, exist_ok=True)
|
||||
if total_pages is None:
|
||||
prefix_path = out_dir / "slide"
|
||||
command = [
|
||||
"pdftoppm",
|
||||
@@ -140,8 +152,55 @@ def render_pdf_to_images(
|
||||
raise RuntimeError(
|
||||
f"Poppler rasterization failed: {completed.stderr.strip() or completed.stdout.strip()}"
|
||||
)
|
||||
|
||||
images = sorted(out_dir.glob(f"slide-*.{image_format}"))
|
||||
else:
|
||||
if total_pages < 0:
|
||||
raise ValueError("total_pages must be zero or greater")
|
||||
images = []
|
||||
for page_index in range(1, total_pages + 1):
|
||||
page_prefix = out_dir / f"slide-{page_index:04d}"
|
||||
command = [
|
||||
"pdftoppm",
|
||||
"-r",
|
||||
str(dpi),
|
||||
f"-{image_format}",
|
||||
"-f",
|
||||
str(page_index),
|
||||
"-l",
|
||||
str(page_index),
|
||||
"-singlefile",
|
||||
str(pdf_path.resolve()),
|
||||
str(page_prefix),
|
||||
]
|
||||
try:
|
||||
completed = subprocess.run(
|
||||
command,
|
||||
check=False,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=timeout_s,
|
||||
)
|
||||
except subprocess.TimeoutExpired as exc:
|
||||
raise RuntimeError(
|
||||
"Poppler rasterization timed out after "
|
||||
f"{timeout_s} seconds while rendering page {page_index} "
|
||||
f"of {pdf_path.name}; increase conversion PDF render timeout "
|
||||
"or lower image DPI"
|
||||
) from exc
|
||||
if completed.returncode != 0:
|
||||
raise RuntimeError(
|
||||
"Poppler rasterization failed on page "
|
||||
f"{page_index}: {completed.stderr.strip() or completed.stdout.strip()}"
|
||||
)
|
||||
image_path = page_prefix.with_suffix(f".{image_format}")
|
||||
if not image_path.exists():
|
||||
raise RuntimeError(
|
||||
f"Poppler did not create expected page image: {image_path}"
|
||||
)
|
||||
images.append(image_path.resolve())
|
||||
if page_progress_callback is not None:
|
||||
page_progress_callback(page_index, total_pages)
|
||||
|
||||
if not images:
|
||||
raise RuntimeError(f"no rendered images found in {out_dir}")
|
||||
return [image.resolve() for image in images]
|
||||
@@ -180,6 +239,11 @@ def convert_pptx_to_slidedeck(
|
||||
image_format: str = "png",
|
||||
pptx_to_pdf_timeout_s: int = 180,
|
||||
pdf_to_images_timeout_s: int = 600,
|
||||
pptx_to_pdf_base_timeout_s: int = 45,
|
||||
pptx_to_pdf_per_slide_timeout_s: int = 3,
|
||||
pdf_to_images_base_timeout_s: int = 30,
|
||||
pdf_to_images_per_slide_timeout_s: int = 8,
|
||||
progress_callback: ProgressCallback | None = None,
|
||||
) -> SlideDeckResult:
|
||||
"""Convert a PPTX into rendered images and extracted notes.
|
||||
|
||||
@@ -206,15 +270,45 @@ def convert_pptx_to_slidedeck(
|
||||
pdf_path = work_dir / f"{pptx_path.stem}.pdf"
|
||||
image_dir = work_dir / "slides"
|
||||
|
||||
convert_pptx_to_pdf(pptx_path, pdf_path, timeout_s=pptx_to_pdf_timeout_s)
|
||||
_emit_progress(progress_callback, PHASE_EXTRACTING_NOTES, 0, 1)
|
||||
notes = extract_slide_notes(pptx_path)
|
||||
_emit_progress(progress_callback, PHASE_EXTRACTING_NOTES, 1, 1)
|
||||
slide_count = len(notes)
|
||||
pptx_to_pdf_timeout = _compute_adaptive_timeout(
|
||||
slide_count=slide_count,
|
||||
timeout_cap_s=pptx_to_pdf_timeout_s,
|
||||
base_timeout_s=pptx_to_pdf_base_timeout_s,
|
||||
per_slide_timeout_s=pptx_to_pdf_per_slide_timeout_s,
|
||||
)
|
||||
pdf_to_images_timeout = _compute_adaptive_timeout(
|
||||
slide_count=slide_count,
|
||||
timeout_cap_s=pdf_to_images_timeout_s,
|
||||
base_timeout_s=pdf_to_images_base_timeout_s,
|
||||
per_slide_timeout_s=pdf_to_images_per_slide_timeout_s,
|
||||
)
|
||||
|
||||
_emit_progress(progress_callback, PHASE_PPTX_TO_PDF, 0, 1)
|
||||
convert_pptx_to_pdf(pptx_path, pdf_path, timeout_s=pptx_to_pdf_timeout)
|
||||
_emit_progress(progress_callback, PHASE_PPTX_TO_PDF, 1, 1)
|
||||
|
||||
_emit_progress(progress_callback, PHASE_PDF_TO_IMAGES, 0, slide_count)
|
||||
image_paths = render_pdf_to_images(
|
||||
pdf_path,
|
||||
image_dir,
|
||||
dpi=dpi,
|
||||
image_format=image_format,
|
||||
timeout_s=pdf_to_images_timeout_s,
|
||||
timeout_s=_compute_page_timeout(
|
||||
total_timeout_s=pdf_to_images_timeout,
|
||||
page_count=slide_count,
|
||||
),
|
||||
total_pages=slide_count,
|
||||
page_progress_callback=lambda current, max_pages: _emit_progress(
|
||||
progress_callback,
|
||||
PHASE_PDF_TO_IMAGES,
|
||||
current,
|
||||
max_pages,
|
||||
),
|
||||
)
|
||||
notes = extract_slide_notes(pptx_path)
|
||||
|
||||
if len(image_paths) != len(notes):
|
||||
raise ValueError(
|
||||
@@ -229,6 +323,40 @@ def convert_pptx_to_slidedeck(
|
||||
return SlideDeckResult(source_filename=pptx_path.name, slides=slides)
|
||||
|
||||
|
||||
def _compute_adaptive_timeout(
|
||||
*,
|
||||
slide_count: int,
|
||||
timeout_cap_s: int,
|
||||
base_timeout_s: int,
|
||||
per_slide_timeout_s: int,
|
||||
) -> int:
|
||||
"""Compute a bounded timeout that scales linearly with slide count."""
|
||||
normalized_slides = max(1, slide_count)
|
||||
adaptive_timeout = base_timeout_s + (normalized_slides * per_slide_timeout_s)
|
||||
bounded_timeout = min(timeout_cap_s, adaptive_timeout)
|
||||
return max(1, bounded_timeout)
|
||||
|
||||
|
||||
def _compute_page_timeout(*, total_timeout_s: int, page_count: int) -> int:
|
||||
"""Split total PDF raster timeout into a bounded per-page timeout."""
|
||||
if page_count <= 0:
|
||||
return max(1, total_timeout_s)
|
||||
timeout = (total_timeout_s + page_count - 1) // page_count
|
||||
return max(15, timeout)
|
||||
|
||||
|
||||
def _emit_progress(
|
||||
progress_callback: ProgressCallback | None,
|
||||
phase: str,
|
||||
current_progress: int,
|
||||
max_progress: int,
|
||||
) -> None:
|
||||
"""Emit phase/progress updates when a callback is configured."""
|
||||
if progress_callback is None:
|
||||
return
|
||||
progress_callback(phase, current_progress, max_progress)
|
||||
|
||||
|
||||
def _extract_notes_text(shapes: Iterable[object]) -> str:
|
||||
"""Extract plain text from note shapes while preserving paragraph breaks."""
|
||||
segments: list[str] = []
|
||||
|
||||
@@ -19,6 +19,10 @@ class ServerConfig:
|
||||
conversion_image_dpi: int
|
||||
conversion_pptx_to_pdf_timeout_seconds: int
|
||||
conversion_pdf_to_images_timeout_seconds: int
|
||||
conversion_pptx_to_pdf_base_timeout_seconds: int
|
||||
conversion_pptx_to_pdf_per_slide_timeout_seconds: int
|
||||
conversion_pdf_to_images_base_timeout_seconds: int
|
||||
conversion_pdf_to_images_per_slide_timeout_seconds: int
|
||||
conversion_cleanup_delay_seconds: int
|
||||
|
||||
|
||||
@@ -31,13 +35,25 @@ def load_server_config() -> ServerConfig:
|
||||
s3_secure=os.getenv("S3_USE_SSL", "false").lower() == "true",
|
||||
s3_public_endpoint=os.getenv("S3_PUBLIC_ENDPOINT", "localhost:8333"),
|
||||
s3_session_ttl_seconds=int(os.getenv("S3_SESSION_TTL_SECONDS", "3600")),
|
||||
conversion_image_dpi=int(os.getenv("CONVERSION_IMAGE_DPI", "150")),
|
||||
conversion_image_dpi=int(os.getenv("CONVERSION_IMAGE_DPI", "72")),
|
||||
conversion_pptx_to_pdf_timeout_seconds=int(
|
||||
os.getenv("CONVERSION_PPTX_TO_PDF_TIMEOUT_SECONDS", "180")
|
||||
),
|
||||
conversion_pdf_to_images_timeout_seconds=int(
|
||||
os.getenv("CONVERSION_PDF_TO_IMAGES_TIMEOUT_SECONDS", "600")
|
||||
),
|
||||
conversion_pptx_to_pdf_base_timeout_seconds=int(
|
||||
os.getenv("CONVERSION_PPTX_TO_PDF_BASE_TIMEOUT_SECONDS", "45")
|
||||
),
|
||||
conversion_pptx_to_pdf_per_slide_timeout_seconds=int(
|
||||
os.getenv("CONVERSION_PPTX_TO_PDF_PER_SLIDE_TIMEOUT_SECONDS", "3")
|
||||
),
|
||||
conversion_pdf_to_images_base_timeout_seconds=int(
|
||||
os.getenv("CONVERSION_PDF_TO_IMAGES_BASE_TIMEOUT_SECONDS", "30")
|
||||
),
|
||||
conversion_pdf_to_images_per_slide_timeout_seconds=int(
|
||||
os.getenv("CONVERSION_PDF_TO_IMAGES_PER_SLIDE_TIMEOUT_SECONDS", "8")
|
||||
),
|
||||
conversion_cleanup_delay_seconds=int(
|
||||
os.getenv("CONVERSION_CLEANUP_DELAY_SECONDS", "3600")
|
||||
),
|
||||
|
||||
@@ -22,6 +22,9 @@ class ConversionSession:
|
||||
bucket_name: str
|
||||
upload_object_key: str
|
||||
status: conversion_pb2.ConversionStatus
|
||||
phase: conversion_pb2.ConversionPhase = conversion_pb2.CONVERSION_PHASE_INACTIVE
|
||||
current_progress: int = 0
|
||||
max_progress: int = 0
|
||||
created_at: datetime = field(default_factory=utc_now)
|
||||
updated_at: datetime = field(default_factory=utc_now)
|
||||
error_message: str = ""
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
from collections.abc import Callable
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from pathlib import Path
|
||||
import shutil
|
||||
@@ -14,6 +15,11 @@ from connectrpc.errors import ConnectError
|
||||
from connectrpc.request import RequestContext
|
||||
from google.protobuf.timestamp_pb2 import Timestamp
|
||||
from officeconvert import SlideArtifact, convert_pptx_to_slidedeck
|
||||
from officeconvert.conversion import (
|
||||
PHASE_EXTRACTING_NOTES,
|
||||
PHASE_PDF_TO_IMAGES,
|
||||
PHASE_PPTX_TO_PDF,
|
||||
)
|
||||
from officeconvertapi.v1 import conversion_connect, conversion_pb2
|
||||
|
||||
from officeconvert_server.config import ServerConfig
|
||||
@@ -98,6 +104,10 @@ class ConversionServiceImpl(conversion_connect.ConversionService):
|
||||
)
|
||||
|
||||
session.status = conversion_pb2.CONVERSION_STATUS_RUNNING
|
||||
session.phase = conversion_pb2.CONVERSION_PHASE_INACTIVE
|
||||
session.current_progress = 0
|
||||
session.max_progress = 0
|
||||
session.error_message = ""
|
||||
session.updated_at = utc_now()
|
||||
session.conversion_task = asyncio.create_task(self._run_conversion(session))
|
||||
|
||||
@@ -119,6 +129,9 @@ class ConversionServiceImpl(conversion_connect.ConversionService):
|
||||
status=session.status,
|
||||
error_message=session.error_message,
|
||||
updated_at=_to_timestamp(session.updated_at),
|
||||
phase=session.phase,
|
||||
current_progress=session.current_progress,
|
||||
max_progress=session.max_progress,
|
||||
)
|
||||
|
||||
async def get_slide_deck(
|
||||
@@ -185,22 +198,47 @@ class ConversionServiceImpl(conversion_connect.ConversionService):
|
||||
dpi=self._config.conversion_image_dpi,
|
||||
pptx_to_pdf_timeout_s=self._config.conversion_pptx_to_pdf_timeout_seconds,
|
||||
pdf_to_images_timeout_s=self._config.conversion_pdf_to_images_timeout_seconds,
|
||||
pptx_to_pdf_base_timeout_s=self._config.conversion_pptx_to_pdf_base_timeout_seconds,
|
||||
pptx_to_pdf_per_slide_timeout_s=self._config.conversion_pptx_to_pdf_per_slide_timeout_seconds,
|
||||
pdf_to_images_base_timeout_s=self._config.conversion_pdf_to_images_base_timeout_seconds,
|
||||
pdf_to_images_per_slide_timeout_s=self._config.conversion_pdf_to_images_per_slide_timeout_seconds,
|
||||
progress_callback=lambda phase_name, current, max_value: self._set_session_progress_from_name(
|
||||
session,
|
||||
phase_name=phase_name,
|
||||
current_progress=current,
|
||||
max_progress=max_value,
|
||||
),
|
||||
)
|
||||
self._set_session_progress(
|
||||
session,
|
||||
phase=conversion_pb2.CONVERSION_PHASE_UPLOADING_RESULTS,
|
||||
current_progress=0,
|
||||
max_progress=len(result.slides),
|
||||
)
|
||||
session.slide_deck = await asyncio.to_thread(
|
||||
self._upload_and_build_slide_deck,
|
||||
session,
|
||||
result.slides,
|
||||
result.source_filename,
|
||||
lambda current, max_value: self._set_session_progress(
|
||||
session,
|
||||
phase=conversion_pb2.CONVERSION_PHASE_UPLOADING_RESULTS,
|
||||
current_progress=current,
|
||||
max_progress=max_value,
|
||||
),
|
||||
)
|
||||
session.status = conversion_pb2.CONVERSION_STATUS_SUCCEEDED
|
||||
session.phase = conversion_pb2.CONVERSION_PHASE_INACTIVE
|
||||
session.updated_at = utc_now()
|
||||
except asyncio.CancelledError:
|
||||
session.status = conversion_pb2.CONVERSION_STATUS_FAILED
|
||||
session.phase = conversion_pb2.CONVERSION_PHASE_INACTIVE
|
||||
session.error_message = "conversion cancelled"
|
||||
session.updated_at = utc_now()
|
||||
raise
|
||||
except Exception as exc:
|
||||
session.status = conversion_pb2.CONVERSION_STATUS_FAILED
|
||||
session.phase = conversion_pb2.CONVERSION_PHASE_INACTIVE
|
||||
session.error_message = str(exc)
|
||||
session.updated_at = utc_now()
|
||||
finally:
|
||||
@@ -212,10 +250,12 @@ class ConversionServiceImpl(conversion_connect.ConversionService):
|
||||
session: ConversionSession,
|
||||
slides: list[SlideArtifact],
|
||||
source_filename: str,
|
||||
progress_callback: Callable[[int, int], None] | None = None,
|
||||
) -> conversion_pb2.SlideDeck:
|
||||
"""Upload generated slide images and construct API response payload."""
|
||||
response_slides: list[conversion_pb2.Slide] = []
|
||||
for slide in slides:
|
||||
slide_total = len(slides)
|
||||
for slide_index, slide in enumerate(slides, start=1):
|
||||
object_key = f"output/slide-{slide.index:04d}{slide.image_path.suffix}"
|
||||
self._store.fput_object(session.bucket_name, object_key, slide.image_path)
|
||||
image_url = self._store.presigned_get_url(
|
||||
@@ -230,6 +270,8 @@ class ConversionServiceImpl(conversion_connect.ConversionService):
|
||||
image_url=image_url,
|
||||
)
|
||||
)
|
||||
if progress_callback is not None:
|
||||
progress_callback(slide_index, slide_total)
|
||||
|
||||
return conversion_pb2.SlideDeck(
|
||||
conversion_id=session.conversion_id,
|
||||
@@ -263,6 +305,45 @@ class ConversionServiceImpl(conversion_connect.ConversionService):
|
||||
raise ConnectError(Code.NOT_FOUND, "conversion_id not found")
|
||||
return session
|
||||
|
||||
def _set_session_progress_from_name(
|
||||
self,
|
||||
session: ConversionSession,
|
||||
*,
|
||||
phase_name: str,
|
||||
current_progress: int,
|
||||
max_progress: int,
|
||||
) -> None:
|
||||
"""Map conversion-library phase names onto API enum phases."""
|
||||
phase_map = {
|
||||
PHASE_EXTRACTING_NOTES: conversion_pb2.CONVERSION_PHASE_EXTRACTING_NOTES,
|
||||
PHASE_PPTX_TO_PDF: conversion_pb2.CONVERSION_PHASE_PPTX_TO_PDF,
|
||||
PHASE_PDF_TO_IMAGES: conversion_pb2.CONVERSION_PHASE_PDF_TO_IMAGES,
|
||||
}
|
||||
self._set_session_progress(
|
||||
session,
|
||||
phase=phase_map.get(phase_name, conversion_pb2.CONVERSION_PHASE_INACTIVE),
|
||||
current_progress=current_progress,
|
||||
max_progress=max_progress,
|
||||
)
|
||||
|
||||
def _set_session_progress(
|
||||
self,
|
||||
session: ConversionSession,
|
||||
*,
|
||||
phase: conversion_pb2.ConversionPhase,
|
||||
current_progress: int,
|
||||
max_progress: int,
|
||||
) -> None:
|
||||
"""Set normalized phase/progress counters and touch update timestamp."""
|
||||
normalized_max = max(0, max_progress)
|
||||
normalized_current = max(0, current_progress)
|
||||
if normalized_max > 0:
|
||||
normalized_current = min(normalized_current, normalized_max)
|
||||
session.phase = phase
|
||||
session.current_progress = normalized_current
|
||||
session.max_progress = normalized_max
|
||||
session.updated_at = utc_now()
|
||||
|
||||
|
||||
def _to_timestamp(value: datetime) -> Timestamp:
|
||||
"""Convert a timezone-aware datetime to protobuf Timestamp."""
|
||||
|
||||
Reference in New Issue
Block a user