1
0
Fork 0
mirror of https://github.com/chrislusf/seaweedfs synced 2024-06-16 23:51:15 +02:00

[filer.backup] add param uploader_part_size for S3sink (#5352)

* fix: install cronie

* chore: refactor configure S3Sink

* chore: refactor cinfig

* add filer-backup compose file

* fix: X-Amz-Meta-Mtime and resolve with comments

* fix: attr mtime

* fix: MaxUploadPartst is reduced to the maximum allowable

* fix: env and force set max MaxUploadParts

* fix: env WEED_SINK_S3_UPLOADER_PART_SIZE_MB
This commit is contained in:
Konstantin Lebedev 2024-03-07 21:35:51 +05:00 committed by GitHub
parent ecc154fa9e
commit 170b63d6f8
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 140 additions and 42 deletions

View file

@ -81,6 +81,9 @@ cluster: build
2mount: build 2mount: build
docker compose -f compose/local-sync-mount-compose.yml -p seaweedfs up docker compose -f compose/local-sync-mount-compose.yml -p seaweedfs up
filer_backup: build
docker compose -f compose/local-filer-backup-compose.yml -p seaweedfs up
hashicorp_raft: build hashicorp_raft: build
docker compose -f compose/local-hashicorp-raft-compose.yml -p seaweedfs up docker compose -f compose/local-hashicorp-raft-compose.yml -p seaweedfs up

View file

@ -0,0 +1,54 @@
version: '3.9'
services:
server-left:
image: chrislusf/seaweedfs:local
command: "-v=0 server -ip=server-left -filer -filer.maxMB 5 -s3 -s3.config=/etc/seaweedfs/s3.json -volume.max=0 -master.volumeSizeLimitMB=1024 -volume.preStopSeconds=1"
volumes:
- ./s3.json:/etc/seaweedfs/s3.json
healthcheck:
test: [ "CMD", "curl", "--fail", "-I", "http://localhost:9333/cluster/healthz" ]
interval: 3s
start_period: 15s
timeout: 30s
server-right:
image: chrislusf/seaweedfs:local
command: "-v=0 server -ip=server-right -filer -filer.maxMB 64 -s3 -s3.config=/etc/seaweedfs/s3.json -volume.max=0 -master.volumeSizeLimitMB=1024 -volume.preStopSeconds=1"
volumes:
- ./s3.json:/etc/seaweedfs/s3.json
healthcheck:
test: [ "CMD", "curl", "--fail", "-I", "http://localhost:9333/cluster/healthz" ]
interval: 3s
start_period: 15s
timeout: 30s
filer-backup:
image: chrislusf/seaweedfs:local
command: "-v=0 filer.backup -debug -doDeleteFiles=False -filer server-left:8888"
volumes:
- ./replication.toml:/etc/seaweedfs/replication.toml
environment:
WEED_SINK_LOCAL_INCREMENTAL_ENABLED: "false"
WEED_SINK_S3_ENABLED: "true"
WEED_SINK_S3_BUCKET: "backup"
WEED_SINK_S3_ENDPOINT: "http://server-right:8333"
WEED_SINK_S3_DIRECTORY: "/"
WEED_SINK_S3_AWS_ACCESS_KEY_ID: "some_access_key1"
WEED_SINK_S3_AWS_SECRET_ACCESS_KEY: "some_secret_key1"
WEED_SINK_S3_S3_DISABLE_CONTENT_MD5_VALIDATION: "false"
WEED_SINK_S3_UPLOADER_PART_SIZE_MB: "5"
WEED_SINK_S3_KEEP_PART_SIZE: "false"
depends_on:
server-left:
condition: service_healthy
server-right:
condition: service_healthy
minio-warp:
image: minio/warp
command: 'mixed --duration 5s --obj.size=6mb --md5 --objects 10 --concurrent 2'
restart: on-failure
environment:
WARP_HOST: "server-left:8333"
WARP_ACCESS_KEY: "some_access_key1"
WARP_SECRET_KEY: "some_secret_key1"
depends_on:
- filer-backup

View file

@ -85,8 +85,7 @@ const (
func doFilerBackup(grpcDialOption grpc.DialOption, backupOption *FilerBackupOptions, clientId int32, clientEpoch int32) error { func doFilerBackup(grpcDialOption grpc.DialOption, backupOption *FilerBackupOptions, clientId int32, clientEpoch int32) error {
// find data sink // find data sink
config := util.GetViper() dataSink := findSink(util.GetViper())
dataSink := findSink(config)
if dataSink == nil { if dataSink == nil {
return fmt.Errorf("no data sink configured in replication.toml") return fmt.Errorf("no data sink configured in replication.toml")
} }

View file

@ -8,6 +8,8 @@ import (
"github.com/aws/aws-sdk-go/service/s3" "github.com/aws/aws-sdk-go/service/s3"
"github.com/aws/aws-sdk-go/service/s3/s3iface" "github.com/aws/aws-sdk-go/service/s3/s3iface"
"github.com/aws/aws-sdk-go/service/s3/s3manager" "github.com/aws/aws-sdk-go/service/s3/s3manager"
"github.com/seaweedfs/seaweedfs/weed/s3api/s3_constants"
"strconv"
"strings" "strings"
"github.com/seaweedfs/seaweedfs/weed/filer" "github.com/seaweedfs/seaweedfs/weed/filer"
@ -20,13 +22,19 @@ import (
type S3Sink struct { type S3Sink struct {
conn s3iface.S3API conn s3iface.S3API
filerSource *source.FilerSource
isIncremental bool
keepPartSize bool
s3DisableContentMD5Validation bool
s3ForcePathStyle bool
uploaderConcurrency int
uploaderMaxUploadParts int
uploaderPartSizeMb int
region string region string
bucket string bucket string
dir string dir string
endpoint string endpoint string
acl string acl string
filerSource *source.FilerSource
isIncremental bool
} }
func init() { func init() {
@ -46,21 +54,49 @@ func (s3sink *S3Sink) IsIncremental() bool {
} }
func (s3sink *S3Sink) Initialize(configuration util.Configuration, prefix string) error { func (s3sink *S3Sink) Initialize(configuration util.Configuration, prefix string) error {
glog.V(0).Infof("sink.s3.region: %v", configuration.GetString(prefix+"region")) configuration.SetDefault(prefix+"region", "us-east-2")
glog.V(0).Infof("sink.s3.bucket: %v", configuration.GetString(prefix+"bucket")) configuration.SetDefault(prefix+"directory", "/")
glog.V(0).Infof("sink.s3.directory: %v", configuration.GetString(prefix+"directory")) configuration.SetDefault(prefix+"keep_part_size", true)
glog.V(0).Infof("sink.s3.endpoint: %v", configuration.GetString(prefix+"endpoint")) configuration.SetDefault(prefix+"uploader_max_upload_parts", 1000)
glog.V(0).Infof("sink.s3.acl: %v", configuration.GetString(prefix+"acl")) configuration.SetDefault(prefix+"uploader_part_size_mb", 8)
glog.V(0).Infof("sink.s3.is_incremental: %v", configuration.GetString(prefix+"is_incremental")) configuration.SetDefault(prefix+"uploader_concurrency", 8)
configuration.SetDefault(prefix+"s3_disable_content_md5_validation", true)
configuration.SetDefault(prefix+"s3_force_path_style", true)
s3sink.region = configuration.GetString(prefix + "region")
s3sink.bucket = configuration.GetString(prefix + "bucket")
s3sink.dir = configuration.GetString(prefix + "directory")
s3sink.endpoint = configuration.GetString(prefix + "endpoint")
s3sink.acl = configuration.GetString(prefix + "acl")
s3sink.isIncremental = configuration.GetBool(prefix + "is_incremental") s3sink.isIncremental = configuration.GetBool(prefix + "is_incremental")
s3sink.keepPartSize = configuration.GetBool(prefix + "keep_part_size")
s3sink.s3DisableContentMD5Validation = configuration.GetBool(prefix + "s3_disable_content_md5_validation")
s3sink.s3ForcePathStyle = configuration.GetBool(prefix + "s3_force_path_style")
s3sink.uploaderMaxUploadParts = configuration.GetInt(prefix + "uploader_max_upload_parts")
s3sink.uploaderPartSizeMb = configuration.GetInt(prefix + "uploader_part_size")
s3sink.uploaderConcurrency = configuration.GetInt(prefix + "uploader_concurrency")
glog.V(0).Infof("sink.s3.region: %v", s3sink.region)
glog.V(0).Infof("sink.s3.bucket: %v", s3sink.bucket)
glog.V(0).Infof("sink.s3.directory: %v", s3sink.dir)
glog.V(0).Infof("sink.s3.endpoint: %v", s3sink.endpoint)
glog.V(0).Infof("sink.s3.acl: %v", s3sink.acl)
glog.V(0).Infof("sink.s3.is_incremental: %v", s3sink.isIncremental)
glog.V(0).Infof("sink.s3.s3_disable_content_md5_validation: %v", s3sink.s3DisableContentMD5Validation)
glog.V(0).Infof("sink.s3.s3_force_path_style: %v", s3sink.s3ForcePathStyle)
glog.V(0).Infof("sink.s3.keep_part_size: %v", s3sink.keepPartSize)
if s3sink.uploaderMaxUploadParts > s3manager.MaxUploadParts {
s3sink.uploaderMaxUploadParts = s3manager.MaxUploadParts
glog.Warningf("uploader_max_upload_parts is greater than the maximum number of parts allowed when uploading multiple parts to Amazon S3")
glog.V(0).Infof("sink.s3.uploader_max_upload_parts: %v => %v", s3sink.uploaderMaxUploadParts, s3manager.MaxUploadParts)
} else {
glog.V(0).Infof("sink.s3.uploader_max_upload_parts: %v", s3sink.uploaderMaxUploadParts)
}
glog.V(0).Infof("sink.s3.uploader_part_size_mb: %v", s3sink.uploaderPartSizeMb)
glog.V(0).Infof("sink.s3.uploader_concurrency: %v", s3sink.uploaderConcurrency)
return s3sink.initialize( return s3sink.initialize(
configuration.GetString(prefix+"aws_access_key_id"), configuration.GetString(prefix+"aws_access_key_id"),
configuration.GetString(prefix+"aws_secret_access_key"), configuration.GetString(prefix+"aws_secret_access_key"),
configuration.GetString(prefix+"region"),
configuration.GetString(prefix+"bucket"),
configuration.GetString(prefix+"directory"),
configuration.GetString(prefix+"endpoint"),
configuration.GetString(prefix+"acl"),
) )
} }
@ -68,18 +104,12 @@ func (s3sink *S3Sink) SetSourceFiler(s *source.FilerSource) {
s3sink.filerSource = s s3sink.filerSource = s
} }
func (s3sink *S3Sink) initialize(awsAccessKeyId, awsSecretAccessKey, region, bucket, dir, endpoint, acl string) error { func (s3sink *S3Sink) initialize(awsAccessKeyId, awsSecretAccessKey string) error {
s3sink.region = region
s3sink.bucket = bucket
s3sink.dir = dir
s3sink.endpoint = endpoint
s3sink.acl = acl
config := &aws.Config{ config := &aws.Config{
Region: aws.String(s3sink.region), Region: aws.String(s3sink.region),
Endpoint: aws.String(s3sink.endpoint), Endpoint: aws.String(s3sink.endpoint),
S3ForcePathStyle: aws.Bool(true), S3DisableContentMD5Validation: aws.Bool(s3sink.s3DisableContentMD5Validation),
S3DisableContentMD5Validation: aws.Bool(true), S3ForcePathStyle: aws.Bool(s3sink.s3ForcePathStyle),
} }
if awsAccessKeyId != "" && awsSecretAccessKey != "" { if awsAccessKeyId != "" && awsSecretAccessKey != "" {
config.Credentials = credentials.NewStaticCredentials(awsAccessKeyId, awsSecretAccessKey, "") config.Credentials = credentials.NewStaticCredentials(awsAccessKeyId, awsSecretAccessKey, "")
@ -128,19 +158,26 @@ func (s3sink *S3Sink) CreateEntry(key string, entry *filer_pb.Entry, signatures
reader := filer.NewFileReader(s3sink.filerSource, entry) reader := filer.NewFileReader(s3sink.filerSource, entry)
fileSize := int64(filer.FileSize(entry))
partSize := int64(8 * 1024 * 1024) // The minimum/default allowed part size is 5MB
for partSize*1000 < fileSize {
partSize *= 4
}
// Create an uploader with the session and custom options // Create an uploader with the session and custom options
uploader := s3manager.NewUploaderWithClient(s3sink.conn, func(u *s3manager.Uploader) { uploader := s3manager.NewUploaderWithClient(s3sink.conn, func(u *s3manager.Uploader) {
u.PartSize = partSize u.PartSize = int64(s3sink.uploaderPartSizeMb * 1024 * 1024)
u.Concurrency = 8 u.Concurrency = s3sink.uploaderConcurrency
u.MaxUploadParts = s3sink.uploaderMaxUploadParts
}) })
if s3sink.keepPartSize {
switch chunkCount := len(entry.Chunks); {
case chunkCount > 1:
if firstChunkSize := int64(entry.Chunks[0].Size); firstChunkSize > s3manager.MinUploadPartSize {
uploader.PartSize = firstChunkSize
}
default:
uploader.PartSize = 0
}
}
if _, ok := entry.Extended[s3_constants.AmzUserMetaMtime]; !ok {
entry.Extended[s3_constants.AmzUserMetaMtime] = []byte(strconv.FormatInt(entry.Attributes.Mtime, 10))
}
// process tagging // process tagging
tags := "" tags := ""
if true { if true {
@ -153,14 +190,18 @@ func (s3sink *S3Sink) CreateEntry(key string, entry *filer_pb.Entry, signatures
} }
// Upload the file to S3. // Upload the file to S3.
_, err = uploader.Upload(&s3manager.UploadInput{ uploadInput := s3manager.UploadInput{
Bucket: aws.String(s3sink.bucket), Bucket: aws.String(s3sink.bucket),
Key: aws.String(key), Key: aws.String(key),
Body: reader, Body: reader,
Tagging: aws.String(tags), Tagging: aws.String(tags),
}) }
if len(entry.Attributes.Md5) > 0 {
uploadInput.ContentMD5 = aws.String(fmt.Sprintf("%x", entry.Attributes.Md5))
}
_, err = uploader.Upload(&uploadInput)
return return err
} }

View file

@ -30,6 +30,7 @@ const (
// S3 user-defined metadata // S3 user-defined metadata
AmzUserMetaPrefix = "X-Amz-Meta-" AmzUserMetaPrefix = "X-Amz-Meta-"
AmzUserMetaDirective = "X-Amz-Metadata-Directive" AmzUserMetaDirective = "X-Amz-Metadata-Directive"
AmzUserMetaMtime = "X-Amz-Meta-Mtime"
// S3 object tagging // S3 object tagging
AmzObjectTagging = "X-Amz-Tagging" AmzObjectTagging = "X-Amz-Tagging"