如何使用Go跟踪分段上传到s3的进度?

I am attempting to use the PutPart method provided by the goamz fork by Mitchell Hashimoto. Sadly every time I get a part back and check the size it seems to think it is the size of the whole file and not just a chunk.

For instance

When uploading a 15m file I expect to see

Uploading...
Processing 1 part of 3 and uploaded 5242880.0 bytes.
 Processing 2 part of 3 and uploaded 5242880.0 bytes.
 Processing 3 part of 3 and uploaded 5242880.0 bytes.

Instead I see:

Uploading...
Processing 1 part of 3 and uploaded 15728640 bytes.
 Processing 2 part of 3 and uploaded 15728640 bytes.
 Processing 3 part of 3 and uploaded 15728640 bytes.

Is this due to an issue with the file.Read(partBuffer)? Any help would be much appreciated.

I am using go 1.5.1 on a mac.

package main

import (
    "bufio"
    "fmt"
    "math"
    "net/http"
    "os"

    "github.com/mitchellh/goamz/aws"
    "github.com/mitchellh/goamz/s3"
)

func check(err error) {
    if err != nil {
        panic(err)
    }
}

func main() {
    fmt.Println("Test")

    auth, err := aws.GetAuth("XXXXX", "XXXXXXXXXX")
    check(err)

    client := s3.New(auth, aws.USWest2)

    b := s3.Bucket{
        S3:   client,
        Name: "some-bucket",
    }

    fileToBeUploaded := "testfile"
    file, err := os.Open(fileToBeUploaded)
    check(err)
    defer file.Close()

    fileInfo, _ := file.Stat()
    fileSize := fileInfo.Size()
    bytes := make([]byte, fileSize)

    // read into buffer
    buffer := bufio.NewReader(file)
    _, err = buffer.Read(bytes)
    check(err)
    filetype := http.DetectContentType(bytes)

    // set up for multipart upload
    multi, err := b.InitMulti("/"+fileToBeUploaded, filetype, s3.ACL("bucket-owner-read"))
    check(err)

    const fileChunk = 5242880 // 5MB
    totalPartsNum := uint64(math.Ceil(float64(fileSize) / float64(fileChunk)))
    parts := []s3.Part{}

    fmt.Println("Uploading...")
    for i := uint64(1); i < totalPartsNum; i++ {

        partSize := int(math.Min(fileChunk, float64(fileSize-int64(i*fileChunk))))
        partBuffer := make([]byte, partSize)

        _, err := file.Read(partBuffer)
        check(err)

        part, err := multi.PutPart(int(i), file) // write to S3 bucket part by part
        check(err)

        fmt.Printf("Processing %d part of %d and uploaded %d bytes.
 ", int(i), int(totalPartsNum), int(part.Size))
        parts = append(parts, part)
    }

    err = multi.Complete(parts)
    check(err)

    fmt.Println("

PutPart upload completed")

}

It's possible the issue here is caused by not reading the file fully. Read can be a little subtle:

Read reads up to len(p) bytes into p. It returns the number of bytes read (0 <= n <= len(p)) and any error encountered. Even if Read returns n < len(p), it may use all of p as scratch space during the call. If some data is available but not len(p) bytes, Read conventionally returns what is available instead of waiting for more.

So you should probably be using ioReadFull or (better) io.CopyN.

That said I think you should try switching to the official AWS Go package. They have a handy Uploader which will handle all of this for you:

package main

import (
    "log"
    "os"

    "github.com/aws/aws-sdk-go/aws/session"
    "github.com/aws/aws-sdk-go/service/s3/s3manager"
)

func main() {
    bucketName := "test-bucket"
    keyName := "test-key"
    file, err := os.Open("example")
    if err != nil {
        log.Fatalln(err)
    }
    defer file.Close()

    sess := session.New()
    uploader := s3manager.NewUploader(sess)

    // Perform an upload.
    result, err := uploader.Upload(&s3manager.UploadInput{
        Bucket: &bucketName,
        Key:    &keyName,
        Body:   file,
    })
    if err != nil {
        log.Fatalln(err)
    }
    log.Println(result)
}

You can find more documentation on godoc.org.

The data you read into partBuffer isn't used at all. You pass file into multi.PutPart and it reads the entire contents of file, seeking it back to the beginning as necessary and blowing away all of the work that you've done.

The minimal change to your code would be to pass bytes.NewReader(partBuffer) into PutPart, instead of file. bytes.Reader implements the io.ReadSeeker interface that PutPart needs, and will also report its size as being that of partBuffer.

An alternative would be to use the io.SectionReader type — instead of reading the data into a buffer yourself, you just create a series of SectionReaders based on file with the sizes and offsets that you want and pass them into PutPart, and they will pass the reads on to the underlying file reader. That should work just as well and cut down the code that you have to write (and error-check) considerably. It also avoids buffering a whole chunk of data in RAM unnecessarily.

When you pass the file part to the multi.PutPart method (n, strings.NewReader ("")) then your code would have to change some points for this to work correctly, the code below will work.

Remembering that PutPart sends part of the multipart upload, reading all the contents from r, each part, except for the last one, must be at least 5MB in size. It is described in goamz docs.

Points I've changed to work correctly:

Here I am creating our headerPart with all the bytes of the file

HeaderPart: = strings.NewReader (string (bytes))

Here io.ReadFull (HeaderPart, partBuffer) I'm reading the entire buffer part of the make ([] byte, partSize) command, each time it is positioned in a certain part of the file.

And when we run multi.PutPart (int (i) +1, strings.NewReader (string (partBuffer))), we have to +1 because it does not calculate part 0 and instead of passing the object file, we will pass part of the Content using the strings.NewReader function for this.

Check out your code below, it now works correctly.

package main

import(
"bufio"
"fmt"
"math"
"net/http"
"os"
"launchpad.net/goamz/aws"
"launchpad.net/goamz/s3"
)

func check(err error) {
    if err != nil {
     panic(err)
    } 
}

func main() {

fmt.Println("Test")

auth := aws.Auth{
    AccessKey: "xxxxxxxxxxx", // change this to yours
    SecretKey: "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
}

client := s3.New(auth, aws.USWest2)

b := s3.Bucket{
    S3:   client,
    Name: "some-bucket",
}

fileToBeUploaded := "testfile"
file, err := os.Open(fileToBeUploaded)
check(err)
defer file.Close()

fileInfo, _ := file.Stat()
fileSize := fileInfo.Size()
bytes := make([]byte, fileSize)

// read into buffer
buffer := bufio.NewReader(file)
_, err = buffer.Read(bytes)
check(err)
filetype := http.DetectContentType(bytes)

// set up for multipart upload
multi, err := b.InitMulti("/"+fileToBeUploaded, filetype, s3.ACL("bucket-owner-read"))
check(err)

const fileChunk = 5242880 // 5MB
totalPartsNum := uint64(math.Ceil(float64(fileSize) / float64(fileChunk)))
parts := []s3.Part{}

fmt.Println("Uploading...")

HeaderPart := strings.NewReader(string(bytes))

for i := uint64(0); i < totalPartsNum; i++ {

    partSize := int(math.Min(fileChunk, float64(fileSize-int64(i*fileChunk))))

    partBuffer := make([]byte, partSize)

    n , errx := io.ReadFull(HeaderPart, partBuffer)

    check(errx)

    part, err := multi.PutPart(int(i)+1, strings.NewReader(string(partBuffer))) // write to S3 bucket part by part

    check(err)

    fmt.Printf("Processing %d part of %d and uploaded %d bytes.
 ", int(i), int(totalPartsNum), int(n))
    parts = append(parts, part)
}

err = multi.Complete(parts)
check(err)

fmt.Println("

PutPart upload completed")
}