restic/internal/backend/s3/s3.go

606 lines
17 KiB
Go
Raw Normal View History

2015-05-10 17:20:58 +02:00
package s3
import (
2017-06-03 17:39:57 +02:00
"context"
"fmt"
"hash"
2015-05-10 17:20:58 +02:00
"io"
"net/http"
2017-05-13 23:55:22 +02:00
"os"
"path"
feat(backends/s3): add warmup support before repacks and restores (#5173) * feat(backends/s3): add warmup support before repacks and restores This commit introduces basic support for transitioning pack files stored in cold storage to hot storage on S3 and S3-compatible providers. To prevent unexpected behavior for existing users, the feature is gated behind new flags: - `s3.enable-restore`: opt-in flag (defaults to false) - `s3.restore-days`: number of days for the restored objects to remain in hot storage (defaults to `7`) - `s3.restore-timeout`: maximum time to wait for a single restoration (default to `1 day`) - `s3.restore-tier`: retrieval tier at which the restore will be processed. (default to `Standard`) As restoration times can be lengthy, this implementation preemptively restores selected packs to prevent incessant restore-delays during downloads. This is slightly sub-optimal as we could process packs out-of-order (as soon as they're transitioned), but this would really add too much complexity for a marginal gain in speed. To maintain simplicity and prevent resources exhautions with lots of packs, no new concurrency mechanisms or goroutines were added. This just hooks gracefully into the existing routines. **Limitations:** - Tests against the backend were not written due to the lack of cold storage class support in MinIO. Testing was done manually on Scaleway's S3-compatible object storage. If necessary, we could explore testing with LocalStack or mocks, though this requires further discussion. - Currently, this feature only warms up before restores and repacks (prune/copy), as those are the two main use-cases I came across. Support for other commands may be added in future iterations, as long as affected packs can be calculated in advance. - The feature is gated behind a new alpha `s3-restore` feature flag to make it explicit that the feature is still wet behind the ears. - There is no explicit user notification for ongoing pack restorations. While I think it is not necessary because of the opt-in flag, showing some notice may improve usability (but would probably require major refactoring in the progress bar which I didn't want to start). Another possibility would be to add a flag to send restores requests and fail early. See https://github.com/restic/restic/issues/3202 * ui: warn user when files are warming up from cold storage * refactor: remove the PacksWarmer struct It's easier to handle multiple handles in the backend directly, and it may open the door to reducing the number of requests made to the backend in the future.
2025-02-01 19:26:27 +01:00
"slices"
2015-05-10 17:20:58 +02:00
"strings"
feat(backends/s3): add warmup support before repacks and restores (#5173) * feat(backends/s3): add warmup support before repacks and restores This commit introduces basic support for transitioning pack files stored in cold storage to hot storage on S3 and S3-compatible providers. To prevent unexpected behavior for existing users, the feature is gated behind new flags: - `s3.enable-restore`: opt-in flag (defaults to false) - `s3.restore-days`: number of days for the restored objects to remain in hot storage (defaults to `7`) - `s3.restore-timeout`: maximum time to wait for a single restoration (default to `1 day`) - `s3.restore-tier`: retrieval tier at which the restore will be processed. (default to `Standard`) As restoration times can be lengthy, this implementation preemptively restores selected packs to prevent incessant restore-delays during downloads. This is slightly sub-optimal as we could process packs out-of-order (as soon as they're transitioned), but this would really add too much complexity for a marginal gain in speed. To maintain simplicity and prevent resources exhautions with lots of packs, no new concurrency mechanisms or goroutines were added. This just hooks gracefully into the existing routines. **Limitations:** - Tests against the backend were not written due to the lack of cold storage class support in MinIO. Testing was done manually on Scaleway's S3-compatible object storage. If necessary, we could explore testing with LocalStack or mocks, though this requires further discussion. - Currently, this feature only warms up before restores and repacks (prune/copy), as those are the two main use-cases I came across. Support for other commands may be added in future iterations, as long as affected packs can be calculated in advance. - The feature is gated behind a new alpha `s3-restore` feature flag to make it explicit that the feature is still wet behind the ears. - There is no explicit user notification for ongoing pack restorations. While I think it is not necessary because of the opt-in flag, showing some notice may improve usability (but would probably require major refactoring in the progress bar which I didn't want to start). Another possibility would be to add a flag to send restores requests and fail early. See https://github.com/restic/restic/issues/3202 * ui: warn user when files are warming up from cold storage * refactor: remove the PacksWarmer struct It's easier to handle multiple handles in the backend directly, and it may open the door to reducing the number of requests made to the backend in the future.
2025-02-01 19:26:27 +01:00
"time"
2015-05-10 17:20:58 +02:00
feat(backends/s3): add warmup support before repacks and restores (#5173) * feat(backends/s3): add warmup support before repacks and restores This commit introduces basic support for transitioning pack files stored in cold storage to hot storage on S3 and S3-compatible providers. To prevent unexpected behavior for existing users, the feature is gated behind new flags: - `s3.enable-restore`: opt-in flag (defaults to false) - `s3.restore-days`: number of days for the restored objects to remain in hot storage (defaults to `7`) - `s3.restore-timeout`: maximum time to wait for a single restoration (default to `1 day`) - `s3.restore-tier`: retrieval tier at which the restore will be processed. (default to `Standard`) As restoration times can be lengthy, this implementation preemptively restores selected packs to prevent incessant restore-delays during downloads. This is slightly sub-optimal as we could process packs out-of-order (as soon as they're transitioned), but this would really add too much complexity for a marginal gain in speed. To maintain simplicity and prevent resources exhautions with lots of packs, no new concurrency mechanisms or goroutines were added. This just hooks gracefully into the existing routines. **Limitations:** - Tests against the backend were not written due to the lack of cold storage class support in MinIO. Testing was done manually on Scaleway's S3-compatible object storage. If necessary, we could explore testing with LocalStack or mocks, though this requires further discussion. - Currently, this feature only warms up before restores and repacks (prune/copy), as those are the two main use-cases I came across. Support for other commands may be added in future iterations, as long as affected packs can be calculated in advance. - The feature is gated behind a new alpha `s3-restore` feature flag to make it explicit that the feature is still wet behind the ears. - There is no explicit user notification for ongoing pack restorations. While I think it is not necessary because of the opt-in flag, showing some notice may improve usability (but would probably require major refactoring in the progress bar which I didn't want to start). Another possibility would be to add a flag to send restores requests and fail early. See https://github.com/restic/restic/issues/3202 * ui: warn user when files are warming up from cold storage * refactor: remove the PacksWarmer struct It's easier to handle multiple handles in the backend directly, and it may open the door to reducing the number of requests made to the backend in the future.
2025-02-01 19:26:27 +01:00
"github.com/cenkalti/backoff/v4"
"github.com/restic/restic/internal/backend"
"github.com/restic/restic/internal/backend/layout"
"github.com/restic/restic/internal/backend/location"
"github.com/restic/restic/internal/backend/util"
2020-12-17 12:47:53 +01:00
"github.com/restic/restic/internal/debug"
2017-07-23 14:21:03 +02:00
"github.com/restic/restic/internal/errors"
"github.com/restic/restic/internal/feature"
"github.com/minio/minio-go/v7"
"github.com/minio/minio-go/v7/pkg/credentials"
2015-05-10 17:20:58 +02:00
)
2017-06-07 21:59:01 +02:00
// Backend stores data on an S3 endpoint.
type Backend struct {
2017-06-15 16:41:09 +02:00
client *minio.Client
cfg Config
layout.Layout
}
2017-06-07 21:59:01 +02:00
// make sure that *Backend implements backend.Backend
var _ backend.Backend = &Backend{}
2017-06-03 17:39:57 +02:00
feat(backends/s3): add warmup support before repacks and restores (#5173) * feat(backends/s3): add warmup support before repacks and restores This commit introduces basic support for transitioning pack files stored in cold storage to hot storage on S3 and S3-compatible providers. To prevent unexpected behavior for existing users, the feature is gated behind new flags: - `s3.enable-restore`: opt-in flag (defaults to false) - `s3.restore-days`: number of days for the restored objects to remain in hot storage (defaults to `7`) - `s3.restore-timeout`: maximum time to wait for a single restoration (default to `1 day`) - `s3.restore-tier`: retrieval tier at which the restore will be processed. (default to `Standard`) As restoration times can be lengthy, this implementation preemptively restores selected packs to prevent incessant restore-delays during downloads. This is slightly sub-optimal as we could process packs out-of-order (as soon as they're transitioned), but this would really add too much complexity for a marginal gain in speed. To maintain simplicity and prevent resources exhautions with lots of packs, no new concurrency mechanisms or goroutines were added. This just hooks gracefully into the existing routines. **Limitations:** - Tests against the backend were not written due to the lack of cold storage class support in MinIO. Testing was done manually on Scaleway's S3-compatible object storage. If necessary, we could explore testing with LocalStack or mocks, though this requires further discussion. - Currently, this feature only warms up before restores and repacks (prune/copy), as those are the two main use-cases I came across. Support for other commands may be added in future iterations, as long as affected packs can be calculated in advance. - The feature is gated behind a new alpha `s3-restore` feature flag to make it explicit that the feature is still wet behind the ears. - There is no explicit user notification for ongoing pack restorations. While I think it is not necessary because of the opt-in flag, showing some notice may improve usability (but would probably require major refactoring in the progress bar which I didn't want to start). Another possibility would be to add a flag to send restores requests and fail early. See https://github.com/restic/restic/issues/3202 * ui: warn user when files are warming up from cold storage * refactor: remove the PacksWarmer struct It's easier to handle multiple handles in the backend directly, and it may open the door to reducing the number of requests made to the backend in the future.
2025-02-01 19:26:27 +01:00
var archiveClasses = []string{"GLACIER", "DEEP_ARCHIVE"}
type warmupStatus int
const (
warmupStatusCold warmupStatus = iota
warmupStatusWarmingUp
warmupStatusWarm
warmupStatusLukewarm
)
func NewFactory() location.Factory {
return location.NewHTTPBackendFactory("s3", ParseConfig, location.NoPassword, Create, Open)
}
2024-08-26 21:16:22 +02:00
func open(cfg Config, rt http.RoundTripper) (*Backend, error) {
2016-09-27 22:35:08 +02:00
debug.Log("open, config %#v", cfg)
2015-05-10 17:20:58 +02:00
feat(backends/s3): add warmup support before repacks and restores (#5173) * feat(backends/s3): add warmup support before repacks and restores This commit introduces basic support for transitioning pack files stored in cold storage to hot storage on S3 and S3-compatible providers. To prevent unexpected behavior for existing users, the feature is gated behind new flags: - `s3.enable-restore`: opt-in flag (defaults to false) - `s3.restore-days`: number of days for the restored objects to remain in hot storage (defaults to `7`) - `s3.restore-timeout`: maximum time to wait for a single restoration (default to `1 day`) - `s3.restore-tier`: retrieval tier at which the restore will be processed. (default to `Standard`) As restoration times can be lengthy, this implementation preemptively restores selected packs to prevent incessant restore-delays during downloads. This is slightly sub-optimal as we could process packs out-of-order (as soon as they're transitioned), but this would really add too much complexity for a marginal gain in speed. To maintain simplicity and prevent resources exhautions with lots of packs, no new concurrency mechanisms or goroutines were added. This just hooks gracefully into the existing routines. **Limitations:** - Tests against the backend were not written due to the lack of cold storage class support in MinIO. Testing was done manually on Scaleway's S3-compatible object storage. If necessary, we could explore testing with LocalStack or mocks, though this requires further discussion. - Currently, this feature only warms up before restores and repacks (prune/copy), as those are the two main use-cases I came across. Support for other commands may be added in future iterations, as long as affected packs can be calculated in advance. - The feature is gated behind a new alpha `s3-restore` feature flag to make it explicit that the feature is still wet behind the ears. - There is no explicit user notification for ongoing pack restorations. While I think it is not necessary because of the opt-in flag, showing some notice may improve usability (but would probably require major refactoring in the progress bar which I didn't want to start). Another possibility would be to add a flag to send restores requests and fail early. See https://github.com/restic/restic/issues/3202 * ui: warn user when files are warming up from cold storage * refactor: remove the PacksWarmer struct It's easier to handle multiple handles in the backend directly, and it may open the door to reducing the number of requests made to the backend in the future.
2025-02-01 19:26:27 +01:00
if cfg.EnableRestore && !feature.Flag.Enabled(feature.S3Restore) {
return nil, fmt.Errorf("feature flag `s3-restore` is required to use `-o s3.enable-restore=true`")
}
if cfg.KeyID == "" && cfg.Secret.String() != "" {
return nil, errors.Fatalf("unable to open S3 backend: Key ID ($AWS_ACCESS_KEY_ID) is empty")
} else if cfg.KeyID != "" && cfg.Secret.String() == "" {
return nil, errors.Fatalf("unable to open S3 backend: Secret ($AWS_SECRET_ACCESS_KEY) is empty")
}
if cfg.MaxRetries > 0 {
minio.MaxRetry = int(cfg.MaxRetries)
}
creds, err := getCredentials(cfg, rt)
if err != nil {
2023-09-18 20:09:32 +02:00
return nil, errors.Wrap(err, "s3.getCredentials")
}
options := &minio.Options{
Creds: creds,
Secure: !cfg.UseHTTP,
Region: cfg.Region,
Transport: rt,
}
switch strings.ToLower(cfg.BucketLookup) {
case "", "auto":
options.BucketLookup = minio.BucketLookupAuto
case "dns":
options.BucketLookup = minio.BucketLookupDNS
case "path":
options.BucketLookup = minio.BucketLookupPath
default:
return nil, fmt.Errorf(`bad bucket-lookup style %q must be "auto", "path" or "dns"`, cfg.BucketLookup)
}
client, err := minio.New(cfg.Endpoint, options)
if err != nil {
return nil, errors.Wrap(err, "minio.New")
}
2017-06-07 21:59:01 +02:00
be := &Backend{
2017-06-15 16:41:09 +02:00
client: client,
cfg: cfg,
2024-08-26 20:28:39 +02:00
Layout: layout.NewDefaultLayout(cfg.Prefix, path.Join),
}
2017-02-10 19:24:54 +01:00
2017-06-17 22:15:58 +02:00
return be, nil
}
2023-09-18 20:09:32 +02:00
// getCredentials -- runs through the various credential types and returns the first one that works.
// additionally if the user has specified a role to assume, it will do that as well.
func getCredentials(cfg Config, tr http.RoundTripper) (*credentials.Credentials, error) {
if cfg.UnsafeAnonymousAuth {
return credentials.New(&credentials.Static{}), nil
}
2023-09-18 20:09:32 +02:00
// Chains all credential types, in the following order:
// - Static credentials provided by user
// - AWS env vars (i.e. AWS_ACCESS_KEY_ID)
// - Minio env vars (i.e. MINIO_ACCESS_KEY)
// - AWS creds file (i.e. AWS_SHARED_CREDENTIALS_FILE or ~/.aws/credentials)
// - Minio creds file (i.e. MINIO_SHARED_CREDENTIALS_FILE or ~/.mc/config.json)
// - IAM profile based credentials. (performs an HTTP
// call to a pre-defined endpoint, only valid inside
// configured ec2 instances)
creds := credentials.NewChainCredentials([]credentials.Provider{
&credentials.EnvAWS{},
2023-09-18 20:09:32 +02:00
&credentials.Static{
Value: credentials.Value{
AccessKeyID: cfg.KeyID,
SecretAccessKey: cfg.Secret.Unwrap(),
},
},
&credentials.EnvMinio{},
&credentials.FileAWSCredentials{},
&credentials.FileMinioClient{},
&credentials.IAM{
Client: &http.Client{
Transport: tr,
2023-09-18 20:09:32 +02:00
},
},
})
c, err := creds.Get()
if err != nil {
return nil, errors.Wrap(err, "creds.Get")
}
if c.SignerType == credentials.SignatureAnonymous {
// Fail if no credentials were found to prevent repeated attempts to (unsuccessfully) retrieve new credentials.
// The first attempt still has to timeout which slows down restic usage considerably. Thus, migrate towards forcing
// users to explicitly decide between authenticated and anonymous access.
return nil, fmt.Errorf("no credentials found. Use `-o s3.unsafe-anonymous-auth=true` for anonymous authentication")
2023-09-18 20:09:32 +02:00
}
roleArn := os.Getenv("RESTIC_AWS_ASSUME_ROLE_ARN")
if roleArn != "" {
// use the region provided by the configuration by default
awsRegion := cfg.Region
// allow the region to be overridden if for some reason it is required
2024-01-06 21:44:53 +01:00
if os.Getenv("RESTIC_AWS_ASSUME_ROLE_REGION") != "" {
2023-09-18 20:09:32 +02:00
awsRegion = os.Getenv("RESTIC_AWS_ASSUME_ROLE_REGION")
}
sessionName := os.Getenv("RESTIC_AWS_ASSUME_ROLE_SESSION_NAME")
externalID := os.Getenv("RESTIC_AWS_ASSUME_ROLE_EXTERNAL_ID")
policy := os.Getenv("RESTIC_AWS_ASSUME_ROLE_POLICY")
stsEndpoint := os.Getenv("RESTIC_AWS_ASSUME_ROLE_STS_ENDPOINT")
if stsEndpoint == "" {
2024-01-06 21:44:53 +01:00
if awsRegion != "" {
2023-09-18 20:09:32 +02:00
if strings.HasPrefix(awsRegion, "cn-") {
stsEndpoint = "https://sts." + awsRegion + ".amazonaws.com.cn"
} else {
stsEndpoint = "https://sts." + awsRegion + ".amazonaws.com"
}
} else {
stsEndpoint = "https://sts.amazonaws.com"
}
}
opts := credentials.STSAssumeRoleOptions{
RoleARN: roleArn,
AccessKey: c.AccessKeyID,
SecretKey: c.SecretAccessKey,
SessionToken: c.SessionToken,
RoleSessionName: sessionName,
ExternalID: externalID,
Policy: policy,
2024-01-06 21:44:53 +01:00
Location: awsRegion,
2023-09-18 20:09:32 +02:00
}
creds, err = credentials.NewSTSAssumeRole(stsEndpoint, opts)
if err != nil {
return nil, errors.Wrap(err, "creds.AssumeRole")
}
}
return creds, nil
}
2017-06-17 22:15:58 +02:00
// Open opens the S3 backend at bucket and region. The bucket is created if it
// does not exist yet.
2024-08-26 21:16:22 +02:00
func Open(_ context.Context, cfg Config, rt http.RoundTripper) (backend.Backend, error) {
return open(cfg, rt)
2017-06-17 22:15:58 +02:00
}
// Create opens the S3 backend at bucket and region and creates the bucket if
// it does not exist yet.
func Create(ctx context.Context, cfg Config, rt http.RoundTripper) (backend.Backend, error) {
2024-08-26 21:16:22 +02:00
be, err := open(cfg, rt)
2017-07-17 10:33:19 +02:00
if err != nil {
return nil, errors.Wrap(err, "open")
}
found, err := be.client.BucketExists(ctx, cfg.Bucket)
if err != nil && isAccessDenied(err) {
err = nil
found = true
}
2016-08-21 16:14:58 +02:00
if err != nil {
debug.Log("BucketExists(%v) returned err %v", cfg.Bucket, err)
2016-08-29 21:54:50 +02:00
return nil, errors.Wrap(err, "client.BucketExists")
2016-08-21 16:14:58 +02:00
}
if !found {
// create new bucket with default ACL in default region
err = be.client.MakeBucket(ctx, cfg.Bucket, minio.MakeBucketOptions{})
if err != nil {
2016-08-29 21:54:50 +02:00
return nil, errors.Wrap(err, "client.MakeBucket")
}
}
return be, nil
}
// isAccessDenied returns true if the error is caused by Access Denied.
func isAccessDenied(err error) bool {
debug.Log("isAccessDenied(%T, %#v)", err, err)
var e minio.ErrorResponse
return errors.As(err, &e) && e.Code == "AccessDenied"
}
// IsNotExist returns true if the error is caused by a not existing file.
2017-06-07 21:59:01 +02:00
func (be *Backend) IsNotExist(err error) bool {
var e minio.ErrorResponse
return errors.As(err, &e) && e.Code == "NoSuchKey"
}
func (be *Backend) IsPermanentError(err error) bool {
if be.IsNotExist(err) {
return true
}
var merr minio.ErrorResponse
if errors.As(err, &merr) {
if merr.Code == "InvalidRange" || merr.Code == "AccessDenied" {
return true
}
}
return false
}
2021-08-07 22:20:49 +02:00
func (be *Backend) Connections() uint {
return be.cfg.Connections
}
// Hasher may return a hash function for calculating a content hash for the backend
func (be *Backend) Hasher() hash.Hash {
return nil
}
// HasAtomicReplace returns whether Save() can atomically replace files
func (be *Backend) HasAtomicReplace() bool {
return true
}
// Path returns the path in the bucket that is used for this backend.
func (be *Backend) Path() string {
2017-06-15 16:41:09 +02:00
return be.cfg.Prefix
2015-05-10 17:20:58 +02:00
}
// useStorageClass returns whether file should be saved in the provided Storage Class
// For archive storage classes, only data files are stored using that class; metadata
// must remain instantly accessible.
func (be *Backend) useStorageClass(h backend.Handle) bool {
isDataFile := h.Type == backend.PackFile && !h.IsMetadata
feat(backends/s3): add warmup support before repacks and restores (#5173) * feat(backends/s3): add warmup support before repacks and restores This commit introduces basic support for transitioning pack files stored in cold storage to hot storage on S3 and S3-compatible providers. To prevent unexpected behavior for existing users, the feature is gated behind new flags: - `s3.enable-restore`: opt-in flag (defaults to false) - `s3.restore-days`: number of days for the restored objects to remain in hot storage (defaults to `7`) - `s3.restore-timeout`: maximum time to wait for a single restoration (default to `1 day`) - `s3.restore-tier`: retrieval tier at which the restore will be processed. (default to `Standard`) As restoration times can be lengthy, this implementation preemptively restores selected packs to prevent incessant restore-delays during downloads. This is slightly sub-optimal as we could process packs out-of-order (as soon as they're transitioned), but this would really add too much complexity for a marginal gain in speed. To maintain simplicity and prevent resources exhautions with lots of packs, no new concurrency mechanisms or goroutines were added. This just hooks gracefully into the existing routines. **Limitations:** - Tests against the backend were not written due to the lack of cold storage class support in MinIO. Testing was done manually on Scaleway's S3-compatible object storage. If necessary, we could explore testing with LocalStack or mocks, though this requires further discussion. - Currently, this feature only warms up before restores and repacks (prune/copy), as those are the two main use-cases I came across. Support for other commands may be added in future iterations, as long as affected packs can be calculated in advance. - The feature is gated behind a new alpha `s3-restore` feature flag to make it explicit that the feature is still wet behind the ears. - There is no explicit user notification for ongoing pack restorations. While I think it is not necessary because of the opt-in flag, showing some notice may improve usability (but would probably require major refactoring in the progress bar which I didn't want to start). Another possibility would be to add a flag to send restores requests and fail early. See https://github.com/restic/restic/issues/3202 * ui: warn user when files are warming up from cold storage * refactor: remove the PacksWarmer struct It's easier to handle multiple handles in the backend directly, and it may open the door to reducing the number of requests made to the backend in the future.
2025-02-01 19:26:27 +01:00
isArchiveClass := slices.Contains(archiveClasses, be.cfg.StorageClass)
return !isArchiveClass || isDataFile
}
2016-01-24 01:15:35 +01:00
// Save stores data in the backend at the handle.
func (be *Backend) Save(ctx context.Context, h backend.Handle, rd backend.RewindReader) error {
2017-04-11 22:04:18 +02:00
objName := be.Filename(h)
2017-04-17 19:18:47 +02:00
opts := minio.PutObjectOptions{
ContentType: "application/octet-stream",
// the only option with the high-level api is to let the library handle the checksum computation
SendContentMd5: true,
// only use multipart uploads for very large files
PartSize: 200 * 1024 * 1024,
}
if be.useStorageClass(h) {
opts.StorageClass = be.cfg.StorageClass
}
2017-12-08 21:52:50 +01:00
info, err := be.client.PutObject(ctx, be.cfg.Bucket, objName, io.NopCloser(rd), int64(rd.Length()), opts)
// sanity check
if err == nil && info.Size != rd.Length() {
return errors.Errorf("wrote %d bytes instead of the expected %d bytes", info.Size, rd.Length())
}
2016-01-24 01:15:35 +01:00
2016-08-29 21:54:50 +02:00
return errors.Wrap(err, "client.PutObject")
2016-01-24 01:15:35 +01:00
}
// Load runs fn with a reader that yields the contents of the file at h at the
// given offset.
func (be *Backend) Load(ctx context.Context, h backend.Handle, length int, offset int64, fn func(rd io.Reader) error) error {
ctx, cancel := context.WithCancel(ctx)
defer cancel()
return util.DefaultLoad(ctx, h, length, offset, be.openReader, fn)
}
func (be *Backend) openReader(ctx context.Context, h backend.Handle, length int, offset int64) (io.ReadCloser, error) {
2017-04-11 22:04:18 +02:00
objName := be.Filename(h)
2017-12-08 21:52:50 +01:00
opts := minio.GetObjectOptions{}
2017-01-22 22:01:12 +01:00
2017-12-08 21:52:50 +01:00
var err error
s3: Use low-level API with a Range header for Load benchmark old ns/op new ns/op delta BenchmarkBackendMinio/LoadFile-4 9213315 11001787 +19.41% BenchmarkBackendMinio/LoadPartialFile-4 4176619 3479707 -16.69% BenchmarkBackendMinio/LoadPartialFileOffset-4 4391521 3139214 -28.52% BenchmarkBackendS3/LoadFile-4 2886070905 2505907501 -13.17% BenchmarkBackendS3/LoadPartialFile-4 762702722 735694398 -3.54% BenchmarkBackendS3/LoadPartialFileOffset-4 789724328 1108989142 +40.43% benchmark old MB/s new MB/s speedup BenchmarkBackendMinio/LoadFile-4 1821.21 1525.15 0.84x BenchmarkBackendMinio/LoadPartialFile-4 1004.49 1205.67 1.20x BenchmarkBackendMinio/LoadPartialFileOffset-4 955.34 1336.45 1.40x BenchmarkBackendS3/LoadFile-4 5.81 6.70 1.15x BenchmarkBackendS3/LoadPartialFile-4 5.50 5.70 1.04x BenchmarkBackendS3/LoadPartialFileOffset-4 5.31 3.78 0.71x benchmark old allocs new allocs delta BenchmarkBackendMinio/LoadFile-4 406 204 -49.75% BenchmarkBackendMinio/LoadPartialFile-4 225 206 -8.44% BenchmarkBackendMinio/LoadPartialFileOffset-4 227 207 -8.81% BenchmarkBackendS3/LoadFile-4 600 388 -35.33% BenchmarkBackendS3/LoadPartialFile-4 416 302 -27.40% BenchmarkBackendS3/LoadPartialFileOffset-4 417 303 -27.34% benchmark old bytes new bytes delta BenchmarkBackendMinio/LoadFile-4 29475 13904 -52.83% BenchmarkBackendMinio/LoadPartialFile-4 4218838 13958 -99.67% BenchmarkBackendMinio/LoadPartialFileOffset-4 4219175 14332 -99.66% BenchmarkBackendS3/LoadFile-4 114152 97424 -14.65% BenchmarkBackendS3/LoadPartialFile-4 4265416 56212 -98.68% BenchmarkBackendS3/LoadPartialFileOffset-4 4266520 56308 -98.68%
2017-05-13 21:18:14 +02:00
if length > 0 {
2017-12-08 21:52:50 +01:00
err = opts.SetRange(offset, offset+int64(length)-1)
} else if offset > 0 {
err = opts.SetRange(offset, 0)
2017-01-22 22:01:12 +01:00
}
2017-12-08 21:52:50 +01:00
if err != nil {
return nil, errors.Wrap(err, "SetRange")
}
2017-01-22 22:01:12 +01:00
2017-06-06 00:17:39 +02:00
coreClient := minio.Core{Client: be.client}
rd, info, _, err := coreClient.GetObject(ctx, be.cfg.Bucket, objName, opts)
2017-05-14 00:09:59 +02:00
if err != nil {
return nil, err
}
2017-01-22 22:01:12 +01:00
if feature.Flag.Enabled(feature.BackendErrorRedesign) && length > 0 {
if info.Size > 0 && info.Size != int64(length) {
_ = rd.Close()
return nil, minio.ErrorResponse{Code: "InvalidRange", Message: "restic-file-too-short"}
}
}
return rd, err
2017-01-22 22:01:12 +01:00
}
2016-01-23 23:27:58 +01:00
// Stat returns information about a blob.
func (be *Backend) Stat(ctx context.Context, h backend.Handle) (bi backend.FileInfo, err error) {
2017-04-11 22:04:18 +02:00
objName := be.Filename(h)
var obj *minio.Object
2017-12-08 21:52:50 +01:00
opts := minio.GetObjectOptions{}
obj, err = be.client.GetObject(ctx, be.cfg.Bucket, objName, opts)
2016-01-23 23:27:58 +01:00
if err != nil {
return backend.FileInfo{}, errors.Wrap(err, "client.GetObject")
2016-01-23 23:27:58 +01:00
}
// make sure that the object is closed properly.
defer func() {
e := obj.Close()
if err == nil {
2016-08-29 21:54:50 +02:00
err = errors.Wrap(e, "Close")
}
}()
2016-01-23 23:27:58 +01:00
fi, err := obj.Stat()
if err != nil {
return backend.FileInfo{}, errors.Wrap(err, "Stat")
2016-01-23 23:27:58 +01:00
}
return backend.FileInfo{Size: fi.Size, Name: h.Name}, nil
2016-01-23 23:27:58 +01:00
}
2015-05-10 17:20:58 +02:00
// Remove removes the blob with the given name and type.
func (be *Backend) Remove(ctx context.Context, h backend.Handle) error {
2017-04-11 22:04:18 +02:00
objName := be.Filename(h)
err := be.client.RemoveObject(ctx, be.cfg.Bucket, objName, minio.RemoveObjectOptions{})
if be.IsNotExist(err) {
err = nil
}
2016-08-29 21:54:50 +02:00
return errors.Wrap(err, "client.RemoveObject")
2015-05-10 17:20:58 +02:00
}
// List runs fn for each file in the backend which has the type t. When an
// error occurs (or fn returns an error), List stops and returns it.
func (be *Backend) List(ctx context.Context, t backend.FileType, fn func(backend.FileInfo) error) error {
prefix, recursive := be.Basedir(t)
2015-05-10 17:20:58 +02:00
// make sure prefix ends with a slash
if !strings.HasSuffix(prefix, "/") {
prefix += "/"
}
ctx, cancel := context.WithCancel(ctx)
defer cancel()
debug.Log("using ListObjectsV1(%v)", be.cfg.ListObjectsV1)
// NB: unfortunately we can't protect this with be.sem.GetToken() here.
// Doing so would enable a deadlock situation (gh-1399), as ListObjects()
// starts its own goroutine and returns results via a channel.
listresp := be.client.ListObjects(ctx, be.cfg.Bucket, minio.ListObjectsOptions{
Prefix: prefix,
Recursive: recursive,
UseV1: be.cfg.ListObjectsV1,
})
for obj := range listresp {
2018-06-01 22:15:23 +02:00
if obj.Err != nil {
return obj.Err
}
m := strings.TrimPrefix(obj.Key, prefix)
if m == "" {
continue
2015-05-10 17:20:58 +02:00
}
fi := backend.FileInfo{
Name: path.Base(m),
Size: obj.Size,
}
2015-05-10 17:20:58 +02:00
if ctx.Err() != nil {
return ctx.Err()
}
err := fn(fi)
if err != nil {
return err
}
if ctx.Err() != nil {
return ctx.Err()
}
}
return ctx.Err()
}
// Delete removes all restic keys in the bucket. It will not remove the bucket itself.
2017-06-07 21:59:01 +02:00
func (be *Backend) Delete(ctx context.Context) error {
return util.DefaultDelete(ctx, be)
2015-05-10 17:20:58 +02:00
}
// Close does nothing
2017-06-07 21:59:01 +02:00
func (be *Backend) Close() error { return nil }
feat(backends/s3): add warmup support before repacks and restores (#5173) * feat(backends/s3): add warmup support before repacks and restores This commit introduces basic support for transitioning pack files stored in cold storage to hot storage on S3 and S3-compatible providers. To prevent unexpected behavior for existing users, the feature is gated behind new flags: - `s3.enable-restore`: opt-in flag (defaults to false) - `s3.restore-days`: number of days for the restored objects to remain in hot storage (defaults to `7`) - `s3.restore-timeout`: maximum time to wait for a single restoration (default to `1 day`) - `s3.restore-tier`: retrieval tier at which the restore will be processed. (default to `Standard`) As restoration times can be lengthy, this implementation preemptively restores selected packs to prevent incessant restore-delays during downloads. This is slightly sub-optimal as we could process packs out-of-order (as soon as they're transitioned), but this would really add too much complexity for a marginal gain in speed. To maintain simplicity and prevent resources exhautions with lots of packs, no new concurrency mechanisms or goroutines were added. This just hooks gracefully into the existing routines. **Limitations:** - Tests against the backend were not written due to the lack of cold storage class support in MinIO. Testing was done manually on Scaleway's S3-compatible object storage. If necessary, we could explore testing with LocalStack or mocks, though this requires further discussion. - Currently, this feature only warms up before restores and repacks (prune/copy), as those are the two main use-cases I came across. Support for other commands may be added in future iterations, as long as affected packs can be calculated in advance. - The feature is gated behind a new alpha `s3-restore` feature flag to make it explicit that the feature is still wet behind the ears. - There is no explicit user notification for ongoing pack restorations. While I think it is not necessary because of the opt-in flag, showing some notice may improve usability (but would probably require major refactoring in the progress bar which I didn't want to start). Another possibility would be to add a flag to send restores requests and fail early. See https://github.com/restic/restic/issues/3202 * ui: warn user when files are warming up from cold storage * refactor: remove the PacksWarmer struct It's easier to handle multiple handles in the backend directly, and it may open the door to reducing the number of requests made to the backend in the future.
2025-02-01 19:26:27 +01:00
// Warmup transitions handles from cold to hot storage if needed.
func (be *Backend) Warmup(ctx context.Context, handles []backend.Handle) ([]backend.Handle, error) {
handlesWarmingUp := []backend.Handle{}
if be.cfg.EnableRestore {
for _, h := range handles {
filename := be.Filename(h)
isWarmingUp, err := be.requestRestore(ctx, filename)
if err != nil {
return handlesWarmingUp, err
}
if isWarmingUp {
debug.Log("s3 file is being restored: %s", filename)
handlesWarmingUp = append(handlesWarmingUp, h)
}
}
}
return handlesWarmingUp, nil
}
// requestRestore sends a glacier restore request on a given file.
func (be *Backend) requestRestore(ctx context.Context, filename string) (bool, error) {
objectInfo, err := be.client.StatObject(ctx, be.cfg.Bucket, filename, minio.StatObjectOptions{})
if err != nil {
return false, err
}
ws := be.getWarmupStatus(objectInfo)
switch ws {
case warmupStatusWarm:
return false, nil
case warmupStatusWarmingUp:
return true, nil
}
opts := minio.RestoreRequest{}
opts.SetDays(be.cfg.RestoreDays)
opts.SetGlacierJobParameters(minio.GlacierJobParameters{Tier: minio.TierType(be.cfg.RestoreTier)})
if err := be.client.RestoreObject(ctx, be.cfg.Bucket, filename, "", opts); err != nil {
var e minio.ErrorResponse
if errors.As(err, &e) {
switch e.Code {
case "InvalidObjectState":
return false, nil
case "RestoreAlreadyInProgress":
return true, nil
}
}
return false, err
}
isWarmingUp := ws != warmupStatusLukewarm
return isWarmingUp, nil
}
// getWarmupStatus returns the warmup status of the provided object.
func (be *Backend) getWarmupStatus(objectInfo minio.ObjectInfo) warmupStatus {
// We can't use objectInfo.StorageClass to get the storage class of the
// object because this field is only set during ListObjects operations.
// The response header is the documented way to get the storage class
// for GetObject/StatObject operations.
storageClass := objectInfo.Metadata.Get("X-Amz-Storage-Class")
isArchiveClass := slices.Contains(archiveClasses, storageClass)
if !isArchiveClass {
return warmupStatusWarm
}
restore := objectInfo.Restore
if restore != nil {
if restore.OngoingRestore {
return warmupStatusWarmingUp
}
minExpiryTime := time.Now().Add(time.Duration(be.cfg.RestoreDays) * 24 * time.Hour)
expiryTime := restore.ExpiryTime
if !expiryTime.IsZero() {
if minExpiryTime.Before(expiryTime) {
return warmupStatusWarm
}
return warmupStatusLukewarm
}
}
return warmupStatusCold
}
// WarmupWait waits until all handles are in hot storage.
func (be *Backend) WarmupWait(ctx context.Context, handles []backend.Handle) error {
timeoutCtx, timeoutCtxCancel := context.WithTimeout(ctx, be.cfg.RestoreTimeout)
defer timeoutCtxCancel()
if be.cfg.EnableRestore {
for _, h := range handles {
filename := be.Filename(h)
err := be.waitForRestore(timeoutCtx, filename)
if err != nil {
return err
}
debug.Log("s3 file is restored: %s", filename)
}
}
return nil
}
// waitForRestore waits for a given file to be restored.
func (be *Backend) waitForRestore(ctx context.Context, filename string) error {
for {
var objectInfo minio.ObjectInfo
// Restore requests can last many hours, therefore network may fail
// temporarily. We don't need to die in such even.
b := backoff.WithMaxRetries(backoff.NewExponentialBackOff(), 10)
b = backoff.WithContext(b, ctx)
err := backoff.Retry(
func() (err error) {
objectInfo, err = be.client.StatObject(ctx, be.cfg.Bucket, filename, minio.StatObjectOptions{})
return
},
b,
)
if err != nil {
return err
}
ws := be.getWarmupStatus(objectInfo)
switch ws {
case warmupStatusLukewarm:
fallthrough
case warmupStatusWarm:
return nil
case warmupStatusCold:
return errors.New("waiting on S3 handle that is not warming up")
}
select {
case <-time.After(1 * time.Minute):
case <-ctx.Done():
return ctx.Err()
}
}
}