-
Notifications
You must be signed in to change notification settings - Fork 388
feat(upload): auto-detect file content-type if not sent #5361
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 8 commits
6fb186a
b405b73
1c6cb71
147ce4b
fe1e6a3
10580df
bfaf1e7
8dbae20
7e687cb
3735903
1206113
c544efa
4ca4f22
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -5,10 +5,13 @@ | |
| package api | ||
|
|
||
| import ( | ||
| "bytes" | ||
| "context" | ||
| "encoding/hex" | ||
| "errors" | ||
| "fmt" | ||
| "io" | ||
| "mime" | ||
| "net/http" | ||
| "path" | ||
| "path/filepath" | ||
|
|
@@ -51,6 +54,8 @@ const ( | |
| largeFileBufferSize = 16 * 32 * 1024 | ||
|
|
||
| largeBufferFilesizeThreshold = 10 * 1000000 // ten megs | ||
|
|
||
| contentTypeSniffLen = 512 | ||
| ) | ||
|
|
||
| func lookaheadBufferSize(size int64) int { | ||
|
|
@@ -65,7 +70,7 @@ func (s *Service) bzzUploadHandler(w http.ResponseWriter, r *http.Request) { | |
| defer span.Finish() | ||
|
|
||
| headers := struct { | ||
| ContentType string `map:"Content-Type,mimeMediaType" validate:"required"` | ||
| ContentType string `map:"Content-Type"` | ||
| BatchID []byte `map:"Swarm-Postage-Batch-Id" validate:"required"` | ||
| SwarmTag uint64 `map:"Swarm-Tag"` | ||
| Pin bool `map:"Swarm-Pin"` | ||
|
|
@@ -137,8 +142,18 @@ func (s *Service) bzzUploadHandler(w http.ResponseWriter, r *http.Request) { | |
| logger: logger, | ||
| } | ||
|
|
||
| if headers.IsDir || headers.ContentType == multiPartFormData { | ||
| s.dirUploadHandler(ctx, logger, span, ow, r, putter, r.Header.Get(ContentTypeHeader), headers.Encrypt, tag, headers.RLevel, headers.Act, headers.HistoryAddress) | ||
| contentTypeHdr := strings.TrimSpace(headers.ContentType) | ||
| r.Header.Set(ContentTypeHeader, contentTypeHdr) | ||
| mt, _, errParseCT := mime.ParseMediaType(contentTypeHdr) | ||
| isMultipart := errParseCT == nil && mt == multiPartFormData | ||
| if headers.IsDir || isMultipart { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe this is more readable, wdyt ? isDirUpload := headers.IsDir || isMultipart
if !isDirUpload {
s.fileUploadHandler(ctx, logger, span, ow, r, putter, headers.Encrypt, tag, headers.RLevel, headers.Act, headers.HistoryAddress)
return
}
if contentTypeHdr == "" {
logger.Error(nil, "content-type required for directory upload")
jsonhttp.BadRequest(w, errInvalidContentType)
return
}
s.dirUploadHandler(ctx, logger, span, ow, r, putter, headers.Encrypt, tag, headers.RLevel, headers.Act, headers.HistoryAddress)
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. both are fine 👍 the point was to just reduce nesting |
||
| if contentTypeHdr == "" { | ||
| logger.Debug("content-type required for directory upload") | ||
|
akrem-chabchoub marked this conversation as resolved.
Outdated
|
||
| logger.Error(nil, "content-type required for directory upload") | ||
| jsonhttp.BadRequest(w, errInvalidContentType) | ||
| return | ||
| } | ||
| s.dirUploadHandler(ctx, logger, span, ow, r, putter, headers.Encrypt, tag, headers.RLevel, headers.Act, headers.HistoryAddress) | ||
| return | ||
| } | ||
| s.fileUploadHandler(ctx, logger, span, ow, r, putter, headers.Encrypt, tag, headers.RLevel, headers.Act, headers.HistoryAddress) | ||
|
|
@@ -174,8 +189,23 @@ func (s *Service) fileUploadHandler( | |
|
|
||
| p := requestPipelineFn(putter, encrypt, rLevel) | ||
|
|
||
| sniffBuf := make([]byte, contentTypeSniffLen) | ||
| n, err := io.ReadFull(r.Body, sniffBuf) | ||
| sniffBuf = sniffBuf[:n] | ||
| if err != nil && !errors.Is(err, io.EOF) && !errors.Is(err, io.ErrUnexpectedEOF) { | ||
| logger.Debug("body read failed", "file_name", queries.FileName, "error", err) | ||
| logger.Error(nil, "body read failed", "file_name", queries.FileName) | ||
| jsonhttp.BadRequest(w, "failed to read request body") | ||
| return | ||
| } | ||
|
|
||
| if r.Header.Get(ContentTypeHeader) == "" { | ||
| r.Header.Set(ContentTypeHeader, http.DetectContentType(sniffBuf)) | ||
| } | ||
| bodyForStore := io.MultiReader(bytes.NewReader(sniffBuf), r.Body) | ||
|
|
||
| // first store the file and get its reference | ||
| fr, err := p(ctx, r.Body) | ||
| fr, err := p(ctx, bodyForStore) | ||
| if err != nil { | ||
| logger.Debug("file store failed", "file_name", queries.FileName, "error", err) | ||
| logger.Error(nil, "file store failed", "file_name", queries.FileName) | ||
|
|
@@ -240,7 +270,7 @@ func (s *Service) fileUploadHandler( | |
| } | ||
|
|
||
| fileMtdt := map[string]string{ | ||
| manifest.EntryMetadataContentTypeKey: r.Header.Get(ContentTypeHeader), // Content-Type has already been validated. | ||
| manifest.EntryMetadataContentTypeKey: r.Header.Get(ContentTypeHeader), | ||
| manifest.EntryMetadataFilenameKey: queries.FileName, | ||
| } | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -44,7 +44,6 @@ func (s *Service) dirUploadHandler( | |
| w http.ResponseWriter, | ||
| r *http.Request, | ||
| putter storer.PutterSession, | ||
| contentTypeString string, | ||
| encrypt bool, | ||
| tag uint64, | ||
| rLevel redundancy.Level, | ||
|
|
@@ -58,7 +57,7 @@ func (s *Service) dirUploadHandler( | |
| } | ||
|
|
||
| // The error is ignored because the header was already validated by the caller. | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The comment is not relevant anymore, right? the header is not validated by the caller. Maybe something like this: , or do we want to validate here?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good catch, agreed. |
||
| mediaType, params, _ := mime.ParseMediaType(contentTypeString) | ||
| mediaType, params, _ := mime.ParseMediaType(r.Header.Get(ContentTypeHeader)) | ||
|
|
||
| var dReader dirReader | ||
| switch mediaType { | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this necessary?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Before my changes the
dirUploadHandlerwas acceptingcontentTypeString(although it is present request).So I changed that here and did trim and set the new value in main handler.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think that I agree with @martinconic that this is not really clear why this is needed. If you're already reading, cleaning, parsing and checking the content type header - it does not make any more sense to update the value in the
Headertype.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@acud @martinconic
Good point, i agree this can look a bit strange at first.
i set the header on purpose, so we clean
Content-Typeone time, and then all next steps read the same value.without this, one part can use the trimmed value, but another part (
fileUploadHandler) can still read the original raw header. this can cause small inconsistent behavior later, especially for multipart handling and metadata.if changing the header feels too implicit, i can refactor and pass the cleaned value explicitly to the next functions. i am fine with both styles, but i think we should keep one canonical content-type value in one place.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks @akrem-chabchoub.
The problem from my perspective is that you're already doing cleaning and checking of a value, mutating it, but then you're telling downstream function calls to potentially do the same, again, because you've removed the argument from the signature. This is from my perspective not necessary and potentially duplicates code.
This may work in middleware style calls where the middlewares are decoupled (and you want to be setting a header on one middleware that processes for example a user-login or something along those lines, providing you with extra details down the call stack), but I can't see how this is needed here (you're already at the last middleware in the API).
I won't block the PR on this, please choose how you'd like to proceed with this.