Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion openapi/Swarm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -342,7 +342,7 @@ paths:
post:
summary: "Upload a file or collection of files"
description:
"Upload single files or collections of files. To upload a collection, send a multipart request with files in the form data with appropriate headers. Tar files can be uploaded with the `swarm-collection` header to extract and upload the directory structure. Without the `swarm-collection` header, requests are treated as single file uploads. Multipart requests are always treated as collections; use the `swarm-index-document` header to specify a single file to serve."
"Upload single files or collections of files. For a single file, `Content-Type` is optional: when present it is stored as metadata as-is; when absent the server infers a type from the start of the body. To upload a collection, send a multipart request with files in the form data with appropriate headers. Tar files can be uploaded with the `swarm-collection` header to extract and upload the directory structure. Without the `swarm-collection` header, requests are treated as single file uploads. Multipart requests are always treated as collections; use the `swarm-index-document` header to specify a single file to serve."
tags:
- BZZ
parameters:
Expand Down
2 changes: 1 addition & 1 deletion openapi/SwarmCommon.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1176,7 +1176,7 @@ components:
name: Content-Type
schema:
type: string
description: The specified content-type is preserved for download of the asset
description: "Single file: trimmed Content-Type is stored as-is or, if omitted or empty, inferred from the first bytes without validating against the body; tar (`swarm-collection`) and multipart collection uploads still need a full-body Content-Type (e.g. `application/x-tar` or `multipart/form-data` with boundary) so the request can be parsed."

SwarmIndexDocumentParameter:
in: header
Expand Down
5 changes: 0 additions & 5 deletions pkg/api/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ import (
"io"
"math"
"math/big"
"mime"
"net/http"
"reflect"
"strconv"
Expand Down Expand Up @@ -302,10 +301,6 @@ func New(
s.chainBackend = chainBackend
s.metricsRegistry = newDebugMetrics()
s.preMapHooks = map[string]func(v string) (string, error){
"mimeMediaType": func(v string) (string, error) {
typ, _, err := mime.ParseMediaType(v)
return typ, err
},
"decBase64url": func(v string) (string, error) {
buf, err := base64.URLEncoding.DecodeString(v)
return string(buf), err
Expand Down
46 changes: 40 additions & 6 deletions pkg/api/bzz.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,13 @@
package api

import (
"bytes"
"context"
"encoding/hex"
"errors"
"fmt"
"io"
"mime"
"net/http"
"path"
"path/filepath"
Expand Down Expand Up @@ -51,6 +54,8 @@ const (
largeFileBufferSize = 16 * 32 * 1024

largeBufferFilesizeThreshold = 10 * 1000000 // ten megs

contentTypeSniffLen = 512
)

func lookaheadBufferSize(size int64) int {
Expand All @@ -65,7 +70,7 @@ func (s *Service) bzzUploadHandler(w http.ResponseWriter, r *http.Request) {
defer span.Finish()

headers := struct {
ContentType string `map:"Content-Type,mimeMediaType" validate:"required"`
ContentType string `map:"Content-Type"`
BatchID []byte `map:"Swarm-Postage-Batch-Id" validate:"required"`
SwarmTag uint64 `map:"Swarm-Tag"`
Pin bool `map:"Swarm-Pin"`
Expand Down Expand Up @@ -137,11 +142,24 @@ func (s *Service) bzzUploadHandler(w http.ResponseWriter, r *http.Request) {
logger: logger,
}

if headers.IsDir || headers.ContentType == multiPartFormData {
s.dirUploadHandler(ctx, logger, span, ow, r, putter, r.Header.Get(ContentTypeHeader), headers.Encrypt, tag, headers.RLevel, headers.Act, headers.HistoryAddress)
contentTypeHdr := strings.TrimSpace(headers.ContentType)
r.Header.Set(ContentTypeHeader, contentTypeHdr)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this necessary?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Before my changes the dirUploadHandler was accepting contentTypeString (although it is present request).
So I changed that here and did trim and set the new value in main handler.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think that I agree with @martinconic that this is not really clear why this is needed. If you're already reading, cleaning, parsing and checking the content type header - it does not make any more sense to update the value in the Header type.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@acud @martinconic
Good point, i agree this can look a bit strange at first.

i set the header on purpose, so we clean Content-Type one time, and then all next steps read the same value.

without this, one part can use the trimmed value, but another part (fileUploadHandler) can still read the original raw header. this can cause small inconsistent behavior later, especially for multipart handling and metadata.

if changing the header feels too implicit, i can refactor and pass the cleaned value explicitly to the next functions. i am fine with both styles, but i think we should keep one canonical content-type value in one place.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks @akrem-chabchoub.
The problem from my perspective is that you're already doing cleaning and checking of a value, mutating it, but then you're telling downstream function calls to potentially do the same, again, because you've removed the argument from the signature. This is from my perspective not necessary and potentially duplicates code.

This may work in middleware style calls where the middlewares are decoupled (and you want to be setting a header on one middleware that processes for example a user-login or something along those lines, providing you with extra details down the call stack), but I can't see how this is needed here (you're already at the last middleware in the API).

I won't block the PR on this, please choose how you'd like to proceed with this.

mt, _, errParseCT := mime.ParseMediaType(contentTypeHdr)
isMultipart := errParseCT == nil && mt == multiPartFormData

isDirUpload := headers.IsDir || isMultipart
if !isDirUpload {
s.fileUploadHandler(ctx, logger, span, ow, r, putter, headers.Encrypt, tag, headers.RLevel, headers.Act, headers.HistoryAddress)
return
}

if contentTypeHdr == "" {
Comment thread
akrem-chabchoub marked this conversation as resolved.
logger.Error(nil, "content-type required for directory upload")
jsonhttp.BadRequest(w, errInvalidContentType)
return
}
s.fileUploadHandler(ctx, logger, span, ow, r, putter, headers.Encrypt, tag, headers.RLevel, headers.Act, headers.HistoryAddress)

s.dirUploadHandler(ctx, logger, span, ow, r, putter, headers.Encrypt, tag, headers.RLevel, headers.Act, headers.HistoryAddress)
}

// bzzUploadResponse is returned when an HTTP request to upload a file is successful
Expand Down Expand Up @@ -174,8 +192,24 @@ func (s *Service) fileUploadHandler(

p := requestPipelineFn(putter, encrypt, rLevel)

var body io.Reader = r.Body
if r.Header.Get(ContentTypeHeader) == "" {
sniffBuf := make([]byte, contentTypeSniffLen)
n, err := io.ReadFull(r.Body, sniffBuf)
sniffBuf = sniffBuf[:n]
if err != nil && !errors.Is(err, io.EOF) && !errors.Is(err, io.ErrUnexpectedEOF) {
logger.Debug("body read failed", "file_name", queries.FileName, "error", err)
logger.Error(nil, "body read failed", "file_name", queries.FileName)
jsonhttp.BadRequest(w, "failed to read request body")
return
}

r.Header.Set(ContentTypeHeader, http.DetectContentType(sniffBuf))
body = io.MultiReader(bytes.NewReader(sniffBuf), r.Body)
}

// first store the file and get its reference
fr, err := p(ctx, r.Body)
fr, err := p(ctx, body)
if err != nil {
logger.Debug("file store failed", "file_name", queries.FileName, "error", err)
logger.Error(nil, "file store failed", "file_name", queries.FileName)
Expand Down Expand Up @@ -240,7 +274,7 @@ func (s *Service) fileUploadHandler(
}

fileMtdt := map[string]string{
manifest.EntryMetadataContentTypeKey: r.Header.Get(ContentTypeHeader), // Content-Type has already been validated.
manifest.EntryMetadataContentTypeKey: r.Header.Get(ContentTypeHeader),
manifest.EntryMetadataFilenameKey: queries.FileName,
}

Expand Down
75 changes: 75 additions & 0 deletions pkg/api/bzz_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -465,6 +465,81 @@ func TestBzzFiles(t *testing.T) {
)
})

t.Run("omit-content-type-uses-sniff", func(t *testing.T) {
fileName := "plain.txt"
var resp api.BzzUploadResponse
jsonhttptest.Request(t, client, http.MethodPost, fileUploadResource+"?name="+fileName, http.StatusCreated,
jsonhttptest.WithRequestHeader(api.SwarmDeferredUploadHeader, "true"),
jsonhttptest.WithRequestHeader(api.SwarmPostageBatchIdHeader, batchOkStr),
jsonhttptest.WithRequestBody(bytes.NewReader(simpleData)),
jsonhttptest.WithUnmarshalJSONResponse(&resp),
)
rootHash := resp.Reference.String()
jsonhttptest.Request(t, client, http.MethodGet, fileDownloadResource(rootHash), http.StatusOK,
jsonhttptest.WithExpectedResponse(simpleData),
jsonhttptest.WithExpectedContentLength(len(simpleData)),
jsonhttptest.WithExpectedResponseHeader(api.ContentTypeHeader, "text/plain; charset=utf-8"),
jsonhttptest.WithExpectedResponseHeader(api.ContentDispositionHeader, fmt.Sprintf(`inline; filename="%s"`, fileName)),
)
})

t.Run("image-content-type-preserved", func(t *testing.T) {
ct := "image/png"
fileName := "test.txt"
var resp api.BzzUploadResponse
jsonhttptest.Request(t, client, http.MethodPost, fileUploadResource+"?name="+fileName, http.StatusCreated,
jsonhttptest.WithRequestHeader(api.SwarmDeferredUploadHeader, "true"),
jsonhttptest.WithRequestHeader(api.SwarmPostageBatchIdHeader, batchOkStr),
jsonhttptest.WithRequestHeader(api.ContentTypeHeader, ct),
jsonhttptest.WithRequestBody(bytes.NewReader(simpleData)),
jsonhttptest.WithUnmarshalJSONResponse(&resp),
)
rootHash := resp.Reference.String()
jsonhttptest.Request(t, client, http.MethodGet, fileDownloadResource(rootHash), http.StatusOK,
jsonhttptest.WithExpectedResponse(simpleData),
jsonhttptest.WithExpectedContentLength(len(simpleData)),
jsonhttptest.WithExpectedResponseHeader(api.ContentTypeHeader, ct),
jsonhttptest.WithExpectedResponseHeader(api.ContentDispositionHeader, fmt.Sprintf(`inline; filename="%s"`, fileName)),
)
})

t.Run("dir-upload-missing-content-type", func(t *testing.T) {
tr := tarFiles(t, []f{
{
data: []byte("robots text"),
name: "robots.txt",
dir: "",
header: http.Header{
api.ContentTypeHeader: {"text/plain; charset=utf-8"},
},
},
})

jsonhttptest.Request(t, client, http.MethodPost, fileUploadResource, http.StatusBadRequest,
jsonhttptest.WithRequestHeader(api.SwarmDeferredUploadHeader, "true"),
jsonhttptest.WithRequestHeader(api.SwarmPostageBatchIdHeader, batchOkStr),
jsonhttptest.WithRequestHeader(api.SwarmCollectionHeader, "true"),
jsonhttptest.WithRequestBody(tr),
jsonhttptest.WithExpectedJSONResponse(jsonhttp.StatusResponse{
Message: api.ErrInvalidContentType.Error(),
Code: http.StatusBadRequest,
}),
)
})

t.Run("dir-upload-missing-content-type-and-body", func(t *testing.T) {
jsonhttptest.Request(t, client, http.MethodPost, fileUploadResource, http.StatusBadRequest,
jsonhttptest.WithRequestHeader(api.SwarmDeferredUploadHeader, "true"),
jsonhttptest.WithRequestHeader(api.SwarmPostageBatchIdHeader, batchOkStr),
jsonhttptest.WithRequestHeader(api.SwarmCollectionHeader, "true"),
jsonhttptest.WithRequestBody(bytes.NewReader(nil)),
jsonhttptest.WithExpectedJSONResponse(jsonhttp.StatusResponse{
Message: api.ErrInvalidContentType.Error(),
Code: http.StatusBadRequest,
}),
)
})

t.Run("upload-then-download-and-check-data", func(t *testing.T) {
fileName := "sample.html"
rootHash := "36e6c1bbdfee6ac21485d5f970479fd1df458d36df9ef4e8179708ed46da557f"
Expand Down
5 changes: 2 additions & 3 deletions pkg/api/dirs.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@ func (s *Service) dirUploadHandler(
w http.ResponseWriter,
r *http.Request,
putter storer.PutterSession,
contentTypeString string,
encrypt bool,
tag uint64,
rLevel redundancy.Level,
Expand All @@ -57,8 +56,8 @@ func (s *Service) dirUploadHandler(
return
}

// The error is ignored because the header was already validated by the caller.
mediaType, params, _ := mime.ParseMediaType(contentTypeString)
// Parse error is ignored; unsupported media types are caught by the default case below.
mediaType, params, _ := mime.ParseMediaType(r.Header.Get(ContentTypeHeader))

var dReader dirReader
switch mediaType {
Expand Down
Loading