最近有一個需求是要下載 S3 檔案並且紀錄下載時間,找到的範例都不太符合需求,所以就自己寫一個紀錄一下,分享給需要用到的人 ~
下載列表檔
需求是一個 json 檔案要把裡面的檔案全部下載下來,列表範例如下:
["Folder1/Folder1/File1.jpg","Folder1/Folder2/File2.jpg"]
檔案名稱為:fileList.json
程式會自動判斷資料夾並且建立起對應的資料夾路徑
程式本體
使用之前記得使用 go get github.com/aws/aws-sdk-go 下載 Golang 的 AWS SDK,之後執行使用 go run main.go
package main
import (
"encoding/json"
"fmt"
"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/aws/credentials"
"github.com/aws/aws-sdk-go/aws/session"
"github.com/aws/aws-sdk-go/service/s3"
"github.com/aws/aws-sdk-go/service/s3/s3manager"
"io/ioutil"
"os"
"path"
"path/filepath"
"sync"
"time"
)
const (
Bucket = "bucket"
Region = "us-west-2"
CredentialsId = "id"
CredentialsSecret = "secret"
CredentialsToken = ""
FolderDir = "s3"
)
func download(i int, downloader *s3manager.Downloader, wg *sync.WaitGroup, bucket *string, key *string, sem chan int) {
sem <- 1
defer func() {
<-sem
}()
fmt.Println("[", i, "] Start downloading ...")
defer wg.Done()
startTime := time.Now()
localPath := filepath.Join(".", FolderDir, *key)
dir, _ := path.Split(localPath)
os.MkdirAll(dir, os.ModePerm)
file, err := os.Create(localPath)
if err != nil {
fmt.Println("[", i, "] Failed to create file", err)
return
}
defer file.Close()
numBytes, err := downloader.Download(file,
&s3.GetObjectInput{
Bucket: bucket,
Key: key,
})
if err != nil {
fmt.Println("[", i, "] Failed to download file", err)
return
}
fmt.Println("[", i, "] Downloaded file", file.Name(), numBytes, "bytes", "time:", time.Since(startTime))
}
func main() {
var sem = make(chan int, 10)
jsonFile, err := ioutil.ReadFile("fileList.json")
if err != nil {
fmt.Println(err)
}
fmt.Println("Successfully Opened fileList.json")
var downloadList []string
json.Unmarshal(jsonFile, &downloadList)
bucket := aws.String(Bucket)
newSession := session.Must(session.NewSession(
&aws.Config{
Credentials: credentials.NewStaticCredentials(CredentialsId, CredentialsSecret, CredentialsToken),
Region: aws.String(Region),
}))
s3Client := s3.New(newSession)
downloader := s3manager.NewDownloaderWithClient(s3Client, func(d *s3manager.Downloader) {
d.PartSize = 64 * 1024 * 1024 // 64MB per part
})
processStart := time.Now()
var wg = &sync.WaitGroup{}
listLen := len(downloadList)
for i:=0; i < listLen; i++ {
wg.Add(1)
go download(i, downloader, wg, bucket, &downloadList[i], sem)
}
wg.Wait()
fmt.Println("Process using time:", time.Since(processStart))
}
在 SDK 的使用裡面有一個 PartSize 是用來設定每個 part 的大小的,如果不需要可以把它拿掉減少記憶體的使用量。
執行結果會如:
clarence :~$ go run main.go Successfully Opened fileList.json [ 1 ] Start downloading ... [ 0 ] Start downloading ... [ 0 ] Downloaded file Folder1/Folder1/File1.jpg 29008 bytes time: 1.003095233s [ 1 ] Downloaded file Folder1/Folder2/File2.jpg 27550 bytes time: 1.021920519s Process using time: 1.022282614s