restic/internal/backend/azure/azure.go

484 lines
14 KiB
Go
Raw Normal View History

package azure
import (
2022-11-18 23:04:31 +01:00
"bytes"
"context"
"crypto/md5"
"encoding/base64"
2022-03-05 19:16:13 +01:00
"fmt"
"hash"
"io"
2017-08-05 21:25:38 +02:00
"net/http"
"path"
"strings"
"github.com/restic/restic/internal/backend"
"github.com/restic/restic/internal/backend/layout"
"github.com/restic/restic/internal/backend/location"
"github.com/restic/restic/internal/backend/util"
"github.com/restic/restic/internal/debug"
2017-08-05 21:46:15 +02:00
"github.com/restic/restic/internal/errors"
2020-12-17 12:47:53 +01:00
2022-11-18 23:04:31 +01:00
"github.com/Azure/azure-sdk-for-go/sdk/azcore"
"github.com/Azure/azure-sdk-for-go/sdk/azcore/streaming"
"github.com/Azure/azure-sdk-for-go/sdk/azidentity"
2022-11-18 23:04:31 +01:00
"github.com/Azure/azure-sdk-for-go/sdk/storage/azblob"
"github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/blob"
"github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/bloberror"
"github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/blockblob"
azContainer "github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/container"
)
// Backend stores data on an azure endpoint.
type Backend struct {
2022-11-18 23:04:31 +01:00
cfg Config
container *azContainer.Client
2021-08-07 22:20:49 +02:00
connections uint
prefix string
listMaxItems int
layout.Layout
accessTier blob.AccessTier
}
2022-11-18 23:04:31 +01:00
const saveLargeSize = 256 * 1024 * 1024
const defaultListMaxItems = 5000
// make sure that *Backend implements backend.Backend
var _ backend.Backend = &Backend{}
func NewFactory() location.Factory {
return location.NewHTTPBackendFactory("azure", ParseConfig, location.NoPassword, Create, Open)
}
func open(cfg Config, rt http.RoundTripper) (*Backend, error) {
debug.Log("open, config %#v", cfg)
2022-11-18 23:04:31 +01:00
var client *azContainer.Client
2022-03-05 19:16:13 +01:00
var err error
2022-11-18 23:04:31 +01:00
2023-06-25 02:06:54 +02:00
var endpointSuffix string
if cfg.EndpointSuffix != "" {
endpointSuffix = cfg.EndpointSuffix
} else {
endpointSuffix = "core.windows.net"
}
2024-11-13 16:02:22 +01:00
if cfg.AccountName == "" {
return nil, errors.Fatalf("unable to open Azure backend: Account name ($AZURE_ACCOUNT_NAME) is empty")
}
2023-06-25 02:06:54 +02:00
url := fmt.Sprintf("https://%s.blob.%s/%s", cfg.AccountName, endpointSuffix, cfg.Container)
2022-11-18 23:04:31 +01:00
opts := &azContainer.ClientOptions{
ClientOptions: azcore.ClientOptions{
2023-05-18 20:07:47 +02:00
Transport: &http.Client{Transport: rt},
2022-11-18 23:04:31 +01:00
},
}
2022-03-05 19:16:13 +01:00
if cfg.AccountKey.String() != "" {
// We have an account key value, find the BlobServiceClient
// from with a BasicClient
debug.Log(" - using account key")
2022-11-18 23:04:31 +01:00
cred, err := azblob.NewSharedKeyCredential(cfg.AccountName, cfg.AccountKey.Unwrap())
if err != nil {
return nil, errors.Wrap(err, "NewSharedKeyCredential")
}
client, err = azContainer.NewClientWithSharedKeyCredential(url, cred, opts)
2022-03-05 19:16:13 +01:00
if err != nil {
2022-11-18 23:04:31 +01:00
return nil, errors.Wrap(err, "NewClientWithSharedKeyCredential")
2022-03-05 19:16:13 +01:00
}
} else if cfg.AccountSAS.String() != "" {
// Get the client using the SAS Token as authentication, this
// is longer winded than above because the SDK wants a URL for the Account
// if your using a SAS token, and not just the account name
// we (as per the SDK ) assume the default Azure portal.
2022-11-18 23:04:31 +01:00
// https://github.com/Azure/azure-storage-blob-go/issues/130
2022-03-05 19:16:13 +01:00
debug.Log(" - using sas token")
2022-07-16 23:45:41 +02:00
sas := cfg.AccountSAS.Unwrap()
2022-11-18 23:04:31 +01:00
2022-07-16 23:45:41 +02:00
// strip query sign prefix
if sas[0] == '?' {
sas = sas[1:]
}
2022-11-18 23:04:31 +01:00
urlWithSAS := fmt.Sprintf("%s?%s", url, sas)
client, err = azContainer.NewClientWithNoCredential(urlWithSAS, opts)
2022-03-05 19:16:13 +01:00
if err != nil {
return nil, errors.Wrap(err, "NewAccountSASClientFromEndpointToken")
}
} else {
var cred azcore.TokenCredential
if cfg.ForceCliCredential {
debug.Log(" - using AzureCLICredential")
cred, err = azidentity.NewAzureCLICredential(nil)
if err != nil {
return nil, errors.Wrap(err, "NewAzureCLICredential")
}
} else {
debug.Log(" - using DefaultAzureCredential")
cred, err = azidentity.NewDefaultAzureCredential(nil)
if err != nil {
return nil, errors.Wrap(err, "NewDefaultAzureCredential")
}
}
client, err = azContainer.NewClient(url, cred, opts)
if err != nil {
return nil, errors.Wrap(err, "NewClient")
}
}
var accessTier blob.AccessTier
// if the access tier is not supported, then we will not set the access tier; during the upload process,
// the value will be inferred from the default configured on the storage account.
for _, tier := range supportedAccessTiers() {
if strings.EqualFold(string(tier), cfg.AccessTier) {
accessTier = tier
debug.Log(" - using access tier %v", accessTier)
break
}
}
be := &Backend{
container: client,
cfg: cfg,
connections: cfg.Connections,
Layout: layout.NewDefaultLayout(cfg.Prefix, path.Join),
listMaxItems: defaultListMaxItems,
accessTier: accessTier,
}
return be, nil
}
func supportedAccessTiers() []blob.AccessTier {
return []blob.AccessTier{blob.AccessTierHot, blob.AccessTierCool, blob.AccessTierCold, blob.AccessTierArchive}
}
// Open opens the Azure backend at specified container.
func Open(_ context.Context, cfg Config, rt http.RoundTripper) (*Backend, error) {
return open(cfg, rt)
}
// Create opens the Azure backend at specified container and creates the container if
// it does not exist yet.
2022-11-18 23:04:31 +01:00
func Create(ctx context.Context, cfg Config, rt http.RoundTripper) (*Backend, error) {
be, err := open(cfg, rt)
if err != nil {
return nil, errors.Wrap(err, "open")
}
_, err = be.container.GetProperties(ctx, &azContainer.GetPropertiesOptions{})
2022-11-18 23:04:31 +01:00
if err != nil && bloberror.HasCode(err, bloberror.ContainerNotFound) {
_, err = be.container.Create(ctx, &azContainer.CreateOptions{})
2022-11-18 23:04:31 +01:00
if err != nil {
return nil, errors.Wrap(err, "container.Create")
}
} else if err != nil && bloberror.HasCode(err, bloberror.AuthorizationFailure) {
// We ignore this Auth. Failure, as the failure is related to the type
// of SAS/SAT, not an actual real failure. If the token is invalid, we
// fail later on anyway.
// For details see Issue #4004.
debug.Log("Ignoring AuthorizationFailure when calling GetProperties")
2022-11-18 23:04:31 +01:00
} else if err != nil {
return be, errors.Wrap(err, "container.GetProperties")
}
return be, nil
}
// SetListMaxItems sets the number of list items to load per request.
func (be *Backend) SetListMaxItems(i int) {
be.listMaxItems = i
}
// IsNotExist returns true if the error is caused by a not existing file.
func (be *Backend) IsNotExist(err error) bool {
2022-11-18 23:04:31 +01:00
return bloberror.HasCode(err, bloberror.BlobNotFound)
}
func (be *Backend) IsPermanentError(err error) bool {
if be.IsNotExist(err) {
return true
}
var aerr *azcore.ResponseError
if errors.As(err, &aerr) {
if aerr.StatusCode == http.StatusRequestedRangeNotSatisfiable || aerr.StatusCode == http.StatusUnauthorized || aerr.StatusCode == http.StatusForbidden {
return true
}
}
return false
}
2021-08-07 22:20:49 +02:00
func (be *Backend) Connections() uint {
return be.connections
}
// Hasher may return a hash function for calculating a content hash for the backend
func (be *Backend) Hasher() hash.Hash {
return md5.New()
}
// HasAtomicReplace returns whether Save() can atomically replace files
func (be *Backend) HasAtomicReplace() bool {
return true
}
// Path returns the path in the bucket that is used for this backend.
func (be *Backend) Path() string {
return be.prefix
}
// useAccessTier determines whether to apply the configured access tier to a given file.
// For archive access tier, only data files are stored using that class; metadata
// must remain instantly accessible.
func (be *Backend) useAccessTier(h backend.Handle) bool {
notArchiveClass := !strings.EqualFold(be.cfg.AccessTier, "archive")
isDataFile := h.Type == backend.PackFile && !h.IsMetadata
return isDataFile || notArchiveClass
}
// Save stores data in the backend at the handle.
func (be *Backend) Save(ctx context.Context, h backend.Handle, rd backend.RewindReader) error {
objName := be.Filename(h)
2022-11-18 23:04:31 +01:00
debug.Log("InsertObject(%v, %v)", be.cfg.AccountName, objName)
var accessTier blob.AccessTier
if be.useAccessTier(h) {
accessTier = be.accessTier
}
var err error
2022-11-18 23:04:31 +01:00
if rd.Length() < saveLargeSize {
// if it's smaller than 256miB, then just create the file directly from the reader
err = be.saveSmall(ctx, objName, rd, accessTier)
} else {
// otherwise use the more complicated method
err = be.saveLarge(ctx, objName, rd, accessTier)
}
2022-11-18 23:04:31 +01:00
return err
}
func (be *Backend) saveSmall(ctx context.Context, objName string, rd backend.RewindReader, accessTier blob.AccessTier) error {
2022-11-18 23:04:31 +01:00
blockBlobClient := be.container.NewBlockBlobClient(objName)
// upload it as a new "block", use the base64 hash for the ID
id := base64.StdEncoding.EncodeToString(rd.Hash())
buf := make([]byte, rd.Length())
_, err := io.ReadFull(rd, buf)
if err != nil {
2022-11-18 23:04:31 +01:00
return errors.Wrap(err, "ReadFull")
}
2022-11-18 23:04:31 +01:00
reader := bytes.NewReader(buf)
_, err = blockBlobClient.StageBlock(ctx, id, streaming.NopCloser(reader), &blockblob.StageBlockOptions{
2023-04-07 15:05:55 +02:00
TransactionalValidation: blob.TransferValidationTypeMD5(rd.Hash()),
2022-11-18 23:04:31 +01:00
})
if err != nil {
return errors.Wrap(err, "StageBlock")
}
blocks := []string{id}
_, err = blockBlobClient.CommitBlockList(ctx, blocks, &blockblob.CommitBlockListOptions{
Tier: &accessTier,
})
2022-11-18 23:04:31 +01:00
return errors.Wrap(err, "CommitBlockList")
}
func (be *Backend) saveLarge(ctx context.Context, objName string, rd backend.RewindReader, accessTier blob.AccessTier) error {
2022-11-18 23:04:31 +01:00
blockBlobClient := be.container.NewBlockBlobClient(objName)
buf := make([]byte, 100*1024*1024)
2022-11-18 23:04:31 +01:00
blocks := []string{}
uploadedBytes := 0
for {
n, err := io.ReadFull(rd, buf)
if err == io.ErrUnexpectedEOF {
err = nil
}
2022-11-18 23:04:31 +01:00
if err == io.EOF {
// end of file reached, no bytes have been read at all
break
}
if err != nil {
return errors.Wrap(err, "ReadFull")
}
buf = buf[:n]
uploadedBytes += n
// upload it as a new "block", use the base64 hash for the ID
h := md5.Sum(buf)
id := base64.StdEncoding.EncodeToString(h[:])
2022-11-18 23:04:31 +01:00
reader := bytes.NewReader(buf)
debug.Log("StageBlock %v with %d bytes", id, len(buf))
_, err = blockBlobClient.StageBlock(ctx, id, streaming.NopCloser(reader), &blockblob.StageBlockOptions{
2023-04-07 15:05:55 +02:00
TransactionalValidation: blob.TransferValidationTypeMD5(h[:]),
2022-11-18 23:04:31 +01:00
})
if err != nil {
2022-11-18 23:04:31 +01:00
return errors.Wrap(err, "StageBlock")
}
2022-11-18 23:04:31 +01:00
blocks = append(blocks, id)
}
// sanity check
if uploadedBytes != int(rd.Length()) {
return errors.Errorf("wrote %d bytes instead of the expected %d bytes", uploadedBytes, rd.Length())
}
_, err := blockBlobClient.CommitBlockList(ctx, blocks, &blockblob.CommitBlockListOptions{
Tier: &accessTier,
})
2022-11-18 23:04:31 +01:00
debug.Log("uploaded %d parts: %v", len(blocks), blocks)
2022-11-18 23:04:31 +01:00
return errors.Wrap(err, "CommitBlockList")
}
// Load runs fn with a reader that yields the contents of the file at h at the
// given offset.
func (be *Backend) Load(ctx context.Context, h backend.Handle, length int, offset int64, fn func(rd io.Reader) error) error {
return util.DefaultLoad(ctx, h, length, offset, be.openReader, fn)
}
func (be *Backend) openReader(ctx context.Context, h backend.Handle, length int, offset int64) (io.ReadCloser, error) {
objName := be.Filename(h)
2022-11-18 23:04:31 +01:00
blockBlobClient := be.container.NewBlobClient(objName)
2022-11-18 23:04:31 +01:00
resp, err := blockBlobClient.DownloadStream(ctx, &blob.DownloadStreamOptions{
Range: azblob.HTTPRange{
Offset: offset,
Count: int64(length),
},
})
if err != nil {
return nil, err
}
if length > 0 && (resp.ContentLength == nil || *resp.ContentLength != int64(length)) {
_ = resp.Body.Close()
return nil, &azcore.ResponseError{ErrorCode: "restic-file-too-short", StatusCode: http.StatusRequestedRangeNotSatisfiable}
}
return resp.Body, err
}
// Stat returns information about a blob.
func (be *Backend) Stat(ctx context.Context, h backend.Handle) (backend.FileInfo, error) {
objName := be.Filename(h)
2022-11-18 23:04:31 +01:00
blobClient := be.container.NewBlobClient(objName)
2022-11-18 23:04:31 +01:00
props, err := blobClient.GetProperties(ctx, nil)
if err != nil {
return backend.FileInfo{}, errors.Wrap(err, "blob.GetProperties")
}
fi := backend.FileInfo{
2022-11-18 23:04:31 +01:00
Size: *props.ContentLength,
Name: h.Name,
}
return fi, nil
}
// Remove removes the blob with the given name and type.
func (be *Backend) Remove(ctx context.Context, h backend.Handle) error {
objName := be.Filename(h)
2022-11-18 23:04:31 +01:00
blob := be.container.NewBlobClient(objName)
2022-11-18 23:04:31 +01:00
_, err := blob.Delete(ctx, &azblob.DeleteBlobOptions{})
if be.IsNotExist(err) {
2022-11-18 23:04:31 +01:00
return nil
}
return errors.Wrap(err, "client.RemoveObject")
}
// List runs fn for each file in the backend which has the type t. When an
// error occurs (or fn returns an error), List stops and returns it.
func (be *Backend) List(ctx context.Context, t backend.FileType, fn func(backend.FileInfo) error) error {
prefix, _ := be.Basedir(t)
// make sure prefix ends with a slash
if !strings.HasSuffix(prefix, "/") {
prefix += "/"
}
2022-11-18 23:04:31 +01:00
max := int32(be.listMaxItems)
opts := &azContainer.ListBlobsFlatOptions{
MaxResults: &max,
Prefix: &prefix,
}
2022-11-18 23:04:31 +01:00
lister := be.container.NewListBlobsFlatPager(opts)
2022-11-18 23:04:31 +01:00
for lister.More() {
resp, err := lister.NextPage(ctx)
if err != nil {
return err
}
2022-11-18 23:04:31 +01:00
debug.Log("got %v objects", len(resp.Segment.BlobItems))
2022-11-18 23:04:31 +01:00
for _, item := range resp.Segment.BlobItems {
m := strings.TrimPrefix(*item.Name, prefix)
if m == "" {
continue
}
fi := backend.FileInfo{
Name: path.Base(m),
2022-11-18 23:04:31 +01:00
Size: *item.Properties.ContentLength,
}
if ctx.Err() != nil {
return ctx.Err()
}
err := fn(fi)
if err != nil {
return err
}
if ctx.Err() != nil {
return ctx.Err()
}
}
}
return ctx.Err()
}
// Delete removes all restic keys in the bucket. It will not remove the bucket itself.
func (be *Backend) Delete(ctx context.Context) error {
return util.DefaultDelete(ctx, be)
}
// Close does nothing
func (be *Backend) Close() error { return nil }
feat(backends/s3): add warmup support before repacks and restores (#5173) * feat(backends/s3): add warmup support before repacks and restores This commit introduces basic support for transitioning pack files stored in cold storage to hot storage on S3 and S3-compatible providers. To prevent unexpected behavior for existing users, the feature is gated behind new flags: - `s3.enable-restore`: opt-in flag (defaults to false) - `s3.restore-days`: number of days for the restored objects to remain in hot storage (defaults to `7`) - `s3.restore-timeout`: maximum time to wait for a single restoration (default to `1 day`) - `s3.restore-tier`: retrieval tier at which the restore will be processed. (default to `Standard`) As restoration times can be lengthy, this implementation preemptively restores selected packs to prevent incessant restore-delays during downloads. This is slightly sub-optimal as we could process packs out-of-order (as soon as they're transitioned), but this would really add too much complexity for a marginal gain in speed. To maintain simplicity and prevent resources exhautions with lots of packs, no new concurrency mechanisms or goroutines were added. This just hooks gracefully into the existing routines. **Limitations:** - Tests against the backend were not written due to the lack of cold storage class support in MinIO. Testing was done manually on Scaleway's S3-compatible object storage. If necessary, we could explore testing with LocalStack or mocks, though this requires further discussion. - Currently, this feature only warms up before restores and repacks (prune/copy), as those are the two main use-cases I came across. Support for other commands may be added in future iterations, as long as affected packs can be calculated in advance. - The feature is gated behind a new alpha `s3-restore` feature flag to make it explicit that the feature is still wet behind the ears. - There is no explicit user notification for ongoing pack restorations. While I think it is not necessary because of the opt-in flag, showing some notice may improve usability (but would probably require major refactoring in the progress bar which I didn't want to start). Another possibility would be to add a flag to send restores requests and fail early. See https://github.com/restic/restic/issues/3202 * ui: warn user when files are warming up from cold storage * refactor: remove the PacksWarmer struct It's easier to handle multiple handles in the backend directly, and it may open the door to reducing the number of requests made to the backend in the future.
2025-02-01 19:26:27 +01:00
// Warmup not implemented
func (be *Backend) Warmup(_ context.Context, _ []backend.Handle) ([]backend.Handle, error) {
return []backend.Handle{}, nil
}
func (be *Backend) WarmupWait(_ context.Context, _ []backend.Handle) error { return nil }