2018-04-08 14:02:30 +02:00
|
|
|
package restorer
|
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
feat(backends/s3): add warmup support before repacks and restores (#5173)
* feat(backends/s3): add warmup support before repacks and restores
This commit introduces basic support for transitioning pack files stored
in cold storage to hot storage on S3 and S3-compatible providers.
To prevent unexpected behavior for existing users, the feature is gated
behind new flags:
- `s3.enable-restore`: opt-in flag (defaults to false)
- `s3.restore-days`: number of days for the restored objects to remain
in hot storage (defaults to `7`)
- `s3.restore-timeout`: maximum time to wait for a single restoration
(default to `1 day`)
- `s3.restore-tier`: retrieval tier at which the restore will be
processed. (default to `Standard`)
As restoration times can be lengthy, this implementation preemptively
restores selected packs to prevent incessant restore-delays during
downloads. This is slightly sub-optimal as we could process packs
out-of-order (as soon as they're transitioned), but this would really
add too much complexity for a marginal gain in speed.
To maintain simplicity and prevent resources exhautions with lots of
packs, no new concurrency mechanisms or goroutines were added. This just
hooks gracefully into the existing routines.
**Limitations:**
- Tests against the backend were not written due to the lack of cold
storage class support in MinIO. Testing was done manually on
Scaleway's S3-compatible object storage. If necessary, we could
explore testing with LocalStack or mocks, though this requires further
discussion.
- Currently, this feature only warms up before restores and repacks
(prune/copy), as those are the two main use-cases I came across.
Support for other commands may be added in future iterations, as long
as affected packs can be calculated in advance.
- The feature is gated behind a new alpha `s3-restore` feature flag to
make it explicit that the feature is still wet behind the ears.
- There is no explicit user notification for ongoing pack restorations.
While I think it is not necessary because of the opt-in flag, showing
some notice may improve usability (but would probably require major
refactoring in the progress bar which I didn't want to start). Another
possibility would be to add a flag to send restores requests and fail
early.
See https://github.com/restic/restic/issues/3202
* ui: warn user when files are warming up from cold storage
* refactor: remove the PacksWarmer struct
It's easier to handle multiple handles in the backend directly, and it
may open the door to reducing the number of requests made to the backend
in the future.
2025-02-01 19:26:27 +01:00
|
|
|
"fmt"
|
2018-09-15 02:18:37 +02:00
|
|
|
"path/filepath"
|
2019-11-27 13:22:38 +01:00
|
|
|
"sync"
|
2018-04-08 14:02:30 +02:00
|
|
|
|
2021-01-04 19:20:04 +01:00
|
|
|
"golang.org/x/sync/errgroup"
|
|
|
|
|
2018-04-08 14:02:30 +02:00
|
|
|
"github.com/restic/restic/internal/debug"
|
|
|
|
"github.com/restic/restic/internal/errors"
|
feat(backends/s3): add warmup support before repacks and restores (#5173)
* feat(backends/s3): add warmup support before repacks and restores
This commit introduces basic support for transitioning pack files stored
in cold storage to hot storage on S3 and S3-compatible providers.
To prevent unexpected behavior for existing users, the feature is gated
behind new flags:
- `s3.enable-restore`: opt-in flag (defaults to false)
- `s3.restore-days`: number of days for the restored objects to remain
in hot storage (defaults to `7`)
- `s3.restore-timeout`: maximum time to wait for a single restoration
(default to `1 day`)
- `s3.restore-tier`: retrieval tier at which the restore will be
processed. (default to `Standard`)
As restoration times can be lengthy, this implementation preemptively
restores selected packs to prevent incessant restore-delays during
downloads. This is slightly sub-optimal as we could process packs
out-of-order (as soon as they're transitioned), but this would really
add too much complexity for a marginal gain in speed.
To maintain simplicity and prevent resources exhautions with lots of
packs, no new concurrency mechanisms or goroutines were added. This just
hooks gracefully into the existing routines.
**Limitations:**
- Tests against the backend were not written due to the lack of cold
storage class support in MinIO. Testing was done manually on
Scaleway's S3-compatible object storage. If necessary, we could
explore testing with LocalStack or mocks, though this requires further
discussion.
- Currently, this feature only warms up before restores and repacks
(prune/copy), as those are the two main use-cases I came across.
Support for other commands may be added in future iterations, as long
as affected packs can be calculated in advance.
- The feature is gated behind a new alpha `s3-restore` feature flag to
make it explicit that the feature is still wet behind the ears.
- There is no explicit user notification for ongoing pack restorations.
While I think it is not necessary because of the opt-in flag, showing
some notice may improve usability (but would probably require major
refactoring in the progress bar which I didn't want to start). Another
possibility would be to add a flag to send restores requests and fail
early.
See https://github.com/restic/restic/issues/3202
* ui: warn user when files are warming up from cold storage
* refactor: remove the PacksWarmer struct
It's easier to handle multiple handles in the backend directly, and it
may open the door to reducing the number of requests made to the backend
in the future.
2025-02-01 19:26:27 +01:00
|
|
|
"github.com/restic/restic/internal/feature"
|
2021-08-20 12:12:38 +02:00
|
|
|
"github.com/restic/restic/internal/repository"
|
2018-04-08 14:02:30 +02:00
|
|
|
"github.com/restic/restic/internal/restic"
|
2022-10-28 17:44:34 +02:00
|
|
|
"github.com/restic/restic/internal/ui/restore"
|
2018-04-08 14:02:30 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
const (
|
2019-11-27 13:22:38 +01:00
|
|
|
largeFileBlobCount = 25
|
2018-04-08 14:02:30 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
// information about regular file being restored
|
|
|
|
type fileInfo struct {
|
2021-01-04 19:20:04 +01:00
|
|
|
lock sync.Mutex
|
|
|
|
inProgress bool
|
2022-08-07 17:26:46 +02:00
|
|
|
sparse bool
|
2021-01-04 19:20:04 +01:00
|
|
|
size int64
|
|
|
|
location string // file on local filesystem relative to restorer basedir
|
|
|
|
blobs interface{} // blobs of the file
|
2024-05-31 17:06:08 +02:00
|
|
|
state *fileState
|
2019-11-27 13:22:38 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
type fileBlobInfo struct {
|
|
|
|
id restic.ID // the blob id
|
|
|
|
offset int64 // blob offset in the file
|
2018-04-08 14:02:30 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
// information about a data pack required to restore one or more files
|
|
|
|
type packInfo struct {
|
2019-11-27 13:22:38 +01:00
|
|
|
id restic.ID // the pack id
|
|
|
|
files map[*fileInfo]struct{} // set of files that use blobs from this pack
|
2018-04-08 14:02:30 +02:00
|
|
|
}
|
|
|
|
|
2023-12-31 12:07:19 +01:00
|
|
|
type blobsLoaderFn func(ctx context.Context, packID restic.ID, blobs []restic.Blob, handleBlobFn func(blob restic.BlobHandle, buf []byte, err error) error) error
|
feat(backends/s3): add warmup support before repacks and restores (#5173)
* feat(backends/s3): add warmup support before repacks and restores
This commit introduces basic support for transitioning pack files stored
in cold storage to hot storage on S3 and S3-compatible providers.
To prevent unexpected behavior for existing users, the feature is gated
behind new flags:
- `s3.enable-restore`: opt-in flag (defaults to false)
- `s3.restore-days`: number of days for the restored objects to remain
in hot storage (defaults to `7`)
- `s3.restore-timeout`: maximum time to wait for a single restoration
(default to `1 day`)
- `s3.restore-tier`: retrieval tier at which the restore will be
processed. (default to `Standard`)
As restoration times can be lengthy, this implementation preemptively
restores selected packs to prevent incessant restore-delays during
downloads. This is slightly sub-optimal as we could process packs
out-of-order (as soon as they're transitioned), but this would really
add too much complexity for a marginal gain in speed.
To maintain simplicity and prevent resources exhautions with lots of
packs, no new concurrency mechanisms or goroutines were added. This just
hooks gracefully into the existing routines.
**Limitations:**
- Tests against the backend were not written due to the lack of cold
storage class support in MinIO. Testing was done manually on
Scaleway's S3-compatible object storage. If necessary, we could
explore testing with LocalStack or mocks, though this requires further
discussion.
- Currently, this feature only warms up before restores and repacks
(prune/copy), as those are the two main use-cases I came across.
Support for other commands may be added in future iterations, as long
as affected packs can be calculated in advance.
- The feature is gated behind a new alpha `s3-restore` feature flag to
make it explicit that the feature is still wet behind the ears.
- There is no explicit user notification for ongoing pack restorations.
While I think it is not necessary because of the opt-in flag, showing
some notice may improve usability (but would probably require major
refactoring in the progress bar which I didn't want to start). Another
possibility would be to add a flag to send restores requests and fail
early.
See https://github.com/restic/restic/issues/3202
* ui: warn user when files are warming up from cold storage
* refactor: remove the PacksWarmer struct
It's easier to handle multiple handles in the backend directly, and it
may open the door to reducing the number of requests made to the backend
in the future.
2025-02-01 19:26:27 +01:00
|
|
|
type startWarmupFn func(context.Context, restic.IDSet) (restic.WarmupJob, error)
|
2023-12-31 12:07:19 +01:00
|
|
|
|
2018-04-08 14:02:30 +02:00
|
|
|
// fileRestorer restores set of files
|
|
|
|
type fileRestorer struct {
|
2024-05-19 14:56:17 +02:00
|
|
|
idx func(restic.BlobType, restic.ID) []restic.PackedBlob
|
2023-12-31 12:07:19 +01:00
|
|
|
blobsLoader blobsLoaderFn
|
2018-04-08 14:02:30 +02:00
|
|
|
|
feat(backends/s3): add warmup support before repacks and restores (#5173)
* feat(backends/s3): add warmup support before repacks and restores
This commit introduces basic support for transitioning pack files stored
in cold storage to hot storage on S3 and S3-compatible providers.
To prevent unexpected behavior for existing users, the feature is gated
behind new flags:
- `s3.enable-restore`: opt-in flag (defaults to false)
- `s3.restore-days`: number of days for the restored objects to remain
in hot storage (defaults to `7`)
- `s3.restore-timeout`: maximum time to wait for a single restoration
(default to `1 day`)
- `s3.restore-tier`: retrieval tier at which the restore will be
processed. (default to `Standard`)
As restoration times can be lengthy, this implementation preemptively
restores selected packs to prevent incessant restore-delays during
downloads. This is slightly sub-optimal as we could process packs
out-of-order (as soon as they're transitioned), but this would really
add too much complexity for a marginal gain in speed.
To maintain simplicity and prevent resources exhautions with lots of
packs, no new concurrency mechanisms or goroutines were added. This just
hooks gracefully into the existing routines.
**Limitations:**
- Tests against the backend were not written due to the lack of cold
storage class support in MinIO. Testing was done manually on
Scaleway's S3-compatible object storage. If necessary, we could
explore testing with LocalStack or mocks, though this requires further
discussion.
- Currently, this feature only warms up before restores and repacks
(prune/copy), as those are the two main use-cases I came across.
Support for other commands may be added in future iterations, as long
as affected packs can be calculated in advance.
- The feature is gated behind a new alpha `s3-restore` feature flag to
make it explicit that the feature is still wet behind the ears.
- There is no explicit user notification for ongoing pack restorations.
While I think it is not necessary because of the opt-in flag, showing
some notice may improve usability (but would probably require major
refactoring in the progress bar which I didn't want to start). Another
possibility would be to add a flag to send restores requests and fail
early.
See https://github.com/restic/restic/issues/3202
* ui: warn user when files are warming up from cold storage
* refactor: remove the PacksWarmer struct
It's easier to handle multiple handles in the backend directly, and it
may open the door to reducing the number of requests made to the backend
in the future.
2025-02-01 19:26:27 +01:00
|
|
|
startWarmup startWarmupFn
|
|
|
|
|
2021-08-08 00:38:17 +02:00
|
|
|
workerCount int
|
2019-11-27 13:22:38 +01:00
|
|
|
filesWriter *filesWriter
|
2022-08-07 17:26:46 +02:00
|
|
|
zeroChunk restic.ID
|
|
|
|
sparse bool
|
2022-10-28 17:44:34 +02:00
|
|
|
progress *restore.Progress
|
2018-04-08 14:02:30 +02:00
|
|
|
|
2024-06-29 20:23:28 +02:00
|
|
|
allowRecursiveDelete bool
|
|
|
|
|
2018-09-15 02:18:37 +02:00
|
|
|
dst string
|
2018-04-08 14:02:30 +02:00
|
|
|
files []*fileInfo
|
2021-01-04 19:20:04 +01:00
|
|
|
Error func(string, error) error
|
feat(backends/s3): add warmup support before repacks and restores (#5173)
* feat(backends/s3): add warmup support before repacks and restores
This commit introduces basic support for transitioning pack files stored
in cold storage to hot storage on S3 and S3-compatible providers.
To prevent unexpected behavior for existing users, the feature is gated
behind new flags:
- `s3.enable-restore`: opt-in flag (defaults to false)
- `s3.restore-days`: number of days for the restored objects to remain
in hot storage (defaults to `7`)
- `s3.restore-timeout`: maximum time to wait for a single restoration
(default to `1 day`)
- `s3.restore-tier`: retrieval tier at which the restore will be
processed. (default to `Standard`)
As restoration times can be lengthy, this implementation preemptively
restores selected packs to prevent incessant restore-delays during
downloads. This is slightly sub-optimal as we could process packs
out-of-order (as soon as they're transitioned), but this would really
add too much complexity for a marginal gain in speed.
To maintain simplicity and prevent resources exhautions with lots of
packs, no new concurrency mechanisms or goroutines were added. This just
hooks gracefully into the existing routines.
**Limitations:**
- Tests against the backend were not written due to the lack of cold
storage class support in MinIO. Testing was done manually on
Scaleway's S3-compatible object storage. If necessary, we could
explore testing with LocalStack or mocks, though this requires further
discussion.
- Currently, this feature only warms up before restores and repacks
(prune/copy), as those are the two main use-cases I came across.
Support for other commands may be added in future iterations, as long
as affected packs can be calculated in advance.
- The feature is gated behind a new alpha `s3-restore` feature flag to
make it explicit that the feature is still wet behind the ears.
- There is no explicit user notification for ongoing pack restorations.
While I think it is not necessary because of the opt-in flag, showing
some notice may improve usability (but would probably require major
refactoring in the progress bar which I didn't want to start). Another
possibility would be to add a flag to send restores requests and fail
early.
See https://github.com/restic/restic/issues/3202
* ui: warn user when files are warming up from cold storage
* refactor: remove the PacksWarmer struct
It's easier to handle multiple handles in the backend directly, and it
may open the door to reducing the number of requests made to the backend
in the future.
2025-02-01 19:26:27 +01:00
|
|
|
Info func(string)
|
2018-04-08 14:02:30 +02:00
|
|
|
}
|
|
|
|
|
2019-11-27 13:22:38 +01:00
|
|
|
func newFileRestorer(dst string,
|
2023-12-31 12:07:19 +01:00
|
|
|
blobsLoader blobsLoaderFn,
|
2024-05-19 14:56:17 +02:00
|
|
|
idx func(restic.BlobType, restic.ID) []restic.PackedBlob,
|
2022-08-07 17:26:46 +02:00
|
|
|
connections uint,
|
2022-10-28 17:44:34 +02:00
|
|
|
sparse bool,
|
2024-06-29 20:23:28 +02:00
|
|
|
allowRecursiveDelete bool,
|
feat(backends/s3): add warmup support before repacks and restores (#5173)
* feat(backends/s3): add warmup support before repacks and restores
This commit introduces basic support for transitioning pack files stored
in cold storage to hot storage on S3 and S3-compatible providers.
To prevent unexpected behavior for existing users, the feature is gated
behind new flags:
- `s3.enable-restore`: opt-in flag (defaults to false)
- `s3.restore-days`: number of days for the restored objects to remain
in hot storage (defaults to `7`)
- `s3.restore-timeout`: maximum time to wait for a single restoration
(default to `1 day`)
- `s3.restore-tier`: retrieval tier at which the restore will be
processed. (default to `Standard`)
As restoration times can be lengthy, this implementation preemptively
restores selected packs to prevent incessant restore-delays during
downloads. This is slightly sub-optimal as we could process packs
out-of-order (as soon as they're transitioned), but this would really
add too much complexity for a marginal gain in speed.
To maintain simplicity and prevent resources exhautions with lots of
packs, no new concurrency mechanisms or goroutines were added. This just
hooks gracefully into the existing routines.
**Limitations:**
- Tests against the backend were not written due to the lack of cold
storage class support in MinIO. Testing was done manually on
Scaleway's S3-compatible object storage. If necessary, we could
explore testing with LocalStack or mocks, though this requires further
discussion.
- Currently, this feature only warms up before restores and repacks
(prune/copy), as those are the two main use-cases I came across.
Support for other commands may be added in future iterations, as long
as affected packs can be calculated in advance.
- The feature is gated behind a new alpha `s3-restore` feature flag to
make it explicit that the feature is still wet behind the ears.
- There is no explicit user notification for ongoing pack restorations.
While I think it is not necessary because of the opt-in flag, showing
some notice may improve usability (but would probably require major
refactoring in the progress bar which I didn't want to start). Another
possibility would be to add a flag to send restores requests and fail
early.
See https://github.com/restic/restic/issues/3202
* ui: warn user when files are warming up from cold storage
* refactor: remove the PacksWarmer struct
It's easier to handle multiple handles in the backend directly, and it
may open the door to reducing the number of requests made to the backend
in the future.
2025-02-01 19:26:27 +01:00
|
|
|
startWarmup startWarmupFn,
|
2022-10-28 17:44:34 +02:00
|
|
|
progress *restore.Progress) *fileRestorer {
|
2021-08-08 00:38:17 +02:00
|
|
|
|
|
|
|
// as packs are streamed the concurrency is limited by IO
|
|
|
|
workerCount := int(connections)
|
2019-11-27 13:22:38 +01:00
|
|
|
|
2018-04-08 14:02:30 +02:00
|
|
|
return &fileRestorer{
|
2024-06-29 20:23:28 +02:00
|
|
|
idx: idx,
|
|
|
|
blobsLoader: blobsLoader,
|
feat(backends/s3): add warmup support before repacks and restores (#5173)
* feat(backends/s3): add warmup support before repacks and restores
This commit introduces basic support for transitioning pack files stored
in cold storage to hot storage on S3 and S3-compatible providers.
To prevent unexpected behavior for existing users, the feature is gated
behind new flags:
- `s3.enable-restore`: opt-in flag (defaults to false)
- `s3.restore-days`: number of days for the restored objects to remain
in hot storage (defaults to `7`)
- `s3.restore-timeout`: maximum time to wait for a single restoration
(default to `1 day`)
- `s3.restore-tier`: retrieval tier at which the restore will be
processed. (default to `Standard`)
As restoration times can be lengthy, this implementation preemptively
restores selected packs to prevent incessant restore-delays during
downloads. This is slightly sub-optimal as we could process packs
out-of-order (as soon as they're transitioned), but this would really
add too much complexity for a marginal gain in speed.
To maintain simplicity and prevent resources exhautions with lots of
packs, no new concurrency mechanisms or goroutines were added. This just
hooks gracefully into the existing routines.
**Limitations:**
- Tests against the backend were not written due to the lack of cold
storage class support in MinIO. Testing was done manually on
Scaleway's S3-compatible object storage. If necessary, we could
explore testing with LocalStack or mocks, though this requires further
discussion.
- Currently, this feature only warms up before restores and repacks
(prune/copy), as those are the two main use-cases I came across.
Support for other commands may be added in future iterations, as long
as affected packs can be calculated in advance.
- The feature is gated behind a new alpha `s3-restore` feature flag to
make it explicit that the feature is still wet behind the ears.
- There is no explicit user notification for ongoing pack restorations.
While I think it is not necessary because of the opt-in flag, showing
some notice may improve usability (but would probably require major
refactoring in the progress bar which I didn't want to start). Another
possibility would be to add a flag to send restores requests and fail
early.
See https://github.com/restic/restic/issues/3202
* ui: warn user when files are warming up from cold storage
* refactor: remove the PacksWarmer struct
It's easier to handle multiple handles in the backend directly, and it
may open the door to reducing the number of requests made to the backend
in the future.
2025-02-01 19:26:27 +01:00
|
|
|
startWarmup: startWarmup,
|
2024-06-29 20:23:28 +02:00
|
|
|
filesWriter: newFilesWriter(workerCount, allowRecursiveDelete),
|
|
|
|
zeroChunk: repository.ZeroChunk(),
|
|
|
|
sparse: sparse,
|
|
|
|
progress: progress,
|
|
|
|
allowRecursiveDelete: allowRecursiveDelete,
|
|
|
|
workerCount: workerCount,
|
|
|
|
dst: dst,
|
|
|
|
Error: restorerAbortOnAllErrors,
|
feat(backends/s3): add warmup support before repacks and restores (#5173)
* feat(backends/s3): add warmup support before repacks and restores
This commit introduces basic support for transitioning pack files stored
in cold storage to hot storage on S3 and S3-compatible providers.
To prevent unexpected behavior for existing users, the feature is gated
behind new flags:
- `s3.enable-restore`: opt-in flag (defaults to false)
- `s3.restore-days`: number of days for the restored objects to remain
in hot storage (defaults to `7`)
- `s3.restore-timeout`: maximum time to wait for a single restoration
(default to `1 day`)
- `s3.restore-tier`: retrieval tier at which the restore will be
processed. (default to `Standard`)
As restoration times can be lengthy, this implementation preemptively
restores selected packs to prevent incessant restore-delays during
downloads. This is slightly sub-optimal as we could process packs
out-of-order (as soon as they're transitioned), but this would really
add too much complexity for a marginal gain in speed.
To maintain simplicity and prevent resources exhautions with lots of
packs, no new concurrency mechanisms or goroutines were added. This just
hooks gracefully into the existing routines.
**Limitations:**
- Tests against the backend were not written due to the lack of cold
storage class support in MinIO. Testing was done manually on
Scaleway's S3-compatible object storage. If necessary, we could
explore testing with LocalStack or mocks, though this requires further
discussion.
- Currently, this feature only warms up before restores and repacks
(prune/copy), as those are the two main use-cases I came across.
Support for other commands may be added in future iterations, as long
as affected packs can be calculated in advance.
- The feature is gated behind a new alpha `s3-restore` feature flag to
make it explicit that the feature is still wet behind the ears.
- There is no explicit user notification for ongoing pack restorations.
While I think it is not necessary because of the opt-in flag, showing
some notice may improve usability (but would probably require major
refactoring in the progress bar which I didn't want to start). Another
possibility would be to add a flag to send restores requests and fail
early.
See https://github.com/restic/restic/issues/3202
* ui: warn user when files are warming up from cold storage
* refactor: remove the PacksWarmer struct
It's easier to handle multiple handles in the backend directly, and it
may open the door to reducing the number of requests made to the backend
in the future.
2025-02-01 19:26:27 +01:00
|
|
|
Info: func(_ string) {},
|
2018-04-08 14:02:30 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-05-31 17:06:08 +02:00
|
|
|
func (r *fileRestorer) addFile(location string, content restic.IDs, size int64, state *fileState) {
|
|
|
|
r.files = append(r.files, &fileInfo{location: location, blobs: content, size: size, state: state})
|
2018-09-15 02:18:37 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
func (r *fileRestorer) targetPath(location string) string {
|
|
|
|
return filepath.Join(r.dst, location)
|
2018-04-08 14:02:30 +02:00
|
|
|
}
|
|
|
|
|
2024-07-20 12:32:08 +02:00
|
|
|
func (r *fileRestorer) forEachBlob(blobIDs []restic.ID, fn func(packID restic.ID, packBlob restic.Blob, idx int, fileOffset int64)) error {
|
2019-11-27 13:22:38 +01:00
|
|
|
if len(blobIDs) == 0 {
|
|
|
|
return nil
|
|
|
|
}
|
2018-04-08 14:02:30 +02:00
|
|
|
|
2024-07-20 12:32:08 +02:00
|
|
|
fileOffset := int64(0)
|
2024-05-31 17:06:08 +02:00
|
|
|
for i, blobID := range blobIDs {
|
2024-05-19 14:56:17 +02:00
|
|
|
packs := r.idx(restic.DataBlob, blobID)
|
2020-06-14 13:26:10 +02:00
|
|
|
if len(packs) == 0 {
|
2019-11-27 13:22:38 +01:00
|
|
|
return errors.Errorf("Unknown blob %s", blobID.String())
|
2018-04-08 14:02:30 +02:00
|
|
|
}
|
2024-07-20 12:32:08 +02:00
|
|
|
pb := packs[0]
|
|
|
|
fn(pb.PackID, pb.Blob, i, fileOffset)
|
|
|
|
fileOffset += int64(pb.DataLength())
|
2018-04-08 14:02:30 +02:00
|
|
|
}
|
|
|
|
|
2019-11-27 13:22:38 +01:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (r *fileRestorer) restoreFiles(ctx context.Context) error {
|
|
|
|
|
|
|
|
packs := make(map[restic.ID]*packInfo) // all packs
|
2020-08-15 17:41:55 +02:00
|
|
|
// Process packs in order of first access. While this cannot guarantee
|
|
|
|
// that file chunks are restored sequentially, it offers a good enough
|
|
|
|
// approximation to shorten restore times by up to 19% in some test.
|
|
|
|
var packOrder restic.IDs
|
2018-04-08 14:02:30 +02:00
|
|
|
|
2019-11-27 13:22:38 +01:00
|
|
|
// create packInfo from fileInfo
|
|
|
|
for _, file := range r.files {
|
2024-07-31 19:30:47 +02:00
|
|
|
if ctx.Err() != nil {
|
|
|
|
return ctx.Err()
|
|
|
|
}
|
|
|
|
|
2019-11-27 13:22:38 +01:00
|
|
|
fileBlobs := file.blobs.(restic.IDs)
|
|
|
|
largeFile := len(fileBlobs) > largeFileBlobCount
|
|
|
|
var packsMap map[restic.ID][]fileBlobInfo
|
|
|
|
if largeFile {
|
|
|
|
packsMap = make(map[restic.ID][]fileBlobInfo)
|
2024-07-20 12:32:08 +02:00
|
|
|
file.blobs = packsMap
|
2019-11-27 13:22:38 +01:00
|
|
|
}
|
2024-07-20 12:23:12 +02:00
|
|
|
restoredBlobs := false
|
2024-07-20 12:32:08 +02:00
|
|
|
err := r.forEachBlob(fileBlobs, func(packID restic.ID, blob restic.Blob, idx int, fileOffset int64) {
|
2024-07-20 12:23:12 +02:00
|
|
|
if !file.state.HasMatchingBlob(idx) {
|
|
|
|
if largeFile {
|
2024-07-12 22:59:01 +02:00
|
|
|
packsMap[packID] = append(packsMap[packID], fileBlobInfo{id: blob.ID, offset: fileOffset})
|
|
|
|
}
|
2024-07-20 12:23:12 +02:00
|
|
|
restoredBlobs = true
|
|
|
|
} else {
|
|
|
|
r.reportBlobProgress(file, uint64(blob.DataLength()))
|
2024-07-21 12:03:28 +02:00
|
|
|
// completely ignore blob
|
|
|
|
return
|
2019-11-27 13:22:38 +01:00
|
|
|
}
|
|
|
|
pack, ok := packs[packID]
|
|
|
|
if !ok {
|
|
|
|
pack = &packInfo{
|
|
|
|
id: packID,
|
|
|
|
files: make(map[*fileInfo]struct{}),
|
|
|
|
}
|
|
|
|
packs[packID] = pack
|
2020-08-15 17:41:55 +02:00
|
|
|
packOrder = append(packOrder, packID)
|
2019-11-27 13:22:38 +01:00
|
|
|
}
|
|
|
|
pack.files[file] = struct{}{}
|
2022-08-07 17:26:46 +02:00
|
|
|
if blob.ID.Equal(r.zeroChunk) {
|
|
|
|
file.sparse = r.sparse
|
|
|
|
}
|
2019-11-27 13:22:38 +01:00
|
|
|
})
|
2024-07-20 12:32:08 +02:00
|
|
|
if err != nil {
|
|
|
|
// repository index is messed up, can't do anything
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2022-08-07 17:26:46 +02:00
|
|
|
if len(fileBlobs) == 1 {
|
|
|
|
// no need to preallocate files with a single block, thus we can always consider them to be sparse
|
|
|
|
// in addition, a short chunk will never match r.zeroChunk which would prevent sparseness for short files
|
|
|
|
file.sparse = r.sparse
|
|
|
|
}
|
2024-06-05 22:01:55 +02:00
|
|
|
if file.state != nil {
|
|
|
|
// The restorer currently cannot punch new holes into an existing files.
|
|
|
|
// Thus sections that contained data but should be sparse after restoring
|
|
|
|
// the snapshot would still contain the old data resulting in a corrupt restore.
|
|
|
|
file.sparse = false
|
|
|
|
}
|
2022-08-07 17:26:46 +02:00
|
|
|
|
2024-07-20 12:23:12 +02:00
|
|
|
// empty file or one with already uptodate content. Make sure that the file size is correct
|
|
|
|
if !restoredBlobs {
|
|
|
|
err := r.truncateFileToSize(file.location, file.size)
|
|
|
|
if errFile := r.sanitizeError(file, err); errFile != nil {
|
|
|
|
return errFile
|
|
|
|
}
|
|
|
|
|
|
|
|
// the progress events were already sent for non-zero size files
|
|
|
|
if file.size == 0 {
|
|
|
|
r.reportBlobProgress(file, 0)
|
|
|
|
}
|
|
|
|
}
|
2019-11-27 13:22:38 +01:00
|
|
|
}
|
2024-05-31 15:50:48 +02:00
|
|
|
// drop no longer necessary file list
|
|
|
|
r.files = nil
|
2018-04-08 14:02:30 +02:00
|
|
|
|
feat(backends/s3): add warmup support before repacks and restores (#5173)
* feat(backends/s3): add warmup support before repacks and restores
This commit introduces basic support for transitioning pack files stored
in cold storage to hot storage on S3 and S3-compatible providers.
To prevent unexpected behavior for existing users, the feature is gated
behind new flags:
- `s3.enable-restore`: opt-in flag (defaults to false)
- `s3.restore-days`: number of days for the restored objects to remain
in hot storage (defaults to `7`)
- `s3.restore-timeout`: maximum time to wait for a single restoration
(default to `1 day`)
- `s3.restore-tier`: retrieval tier at which the restore will be
processed. (default to `Standard`)
As restoration times can be lengthy, this implementation preemptively
restores selected packs to prevent incessant restore-delays during
downloads. This is slightly sub-optimal as we could process packs
out-of-order (as soon as they're transitioned), but this would really
add too much complexity for a marginal gain in speed.
To maintain simplicity and prevent resources exhautions with lots of
packs, no new concurrency mechanisms or goroutines were added. This just
hooks gracefully into the existing routines.
**Limitations:**
- Tests against the backend were not written due to the lack of cold
storage class support in MinIO. Testing was done manually on
Scaleway's S3-compatible object storage. If necessary, we could
explore testing with LocalStack or mocks, though this requires further
discussion.
- Currently, this feature only warms up before restores and repacks
(prune/copy), as those are the two main use-cases I came across.
Support for other commands may be added in future iterations, as long
as affected packs can be calculated in advance.
- The feature is gated behind a new alpha `s3-restore` feature flag to
make it explicit that the feature is still wet behind the ears.
- There is no explicit user notification for ongoing pack restorations.
While I think it is not necessary because of the opt-in flag, showing
some notice may improve usability (but would probably require major
refactoring in the progress bar which I didn't want to start). Another
possibility would be to add a flag to send restores requests and fail
early.
See https://github.com/restic/restic/issues/3202
* ui: warn user when files are warming up from cold storage
* refactor: remove the PacksWarmer struct
It's easier to handle multiple handles in the backend directly, and it
may open the door to reducing the number of requests made to the backend
in the future.
2025-02-01 19:26:27 +01:00
|
|
|
if feature.Flag.Enabled(feature.S3Restore) {
|
|
|
|
warmupJob, err := r.startWarmup(ctx, restic.NewIDSet(packOrder...))
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
if warmupJob.HandleCount() != 0 {
|
|
|
|
r.Info(fmt.Sprintf("warming up %d packs from cold storage, this may take a while...", warmupJob.HandleCount()))
|
|
|
|
if err := warmupJob.Wait(ctx); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-01-04 19:20:04 +01:00
|
|
|
wg, ctx := errgroup.WithContext(ctx)
|
2019-11-27 13:22:38 +01:00
|
|
|
downloadCh := make(chan *packInfo)
|
2021-01-04 19:20:04 +01:00
|
|
|
|
|
|
|
worker := func() error {
|
|
|
|
for pack := range downloadCh {
|
|
|
|
if err := r.downloadPack(ctx, pack); err != nil {
|
|
|
|
return err
|
2018-04-08 14:02:30 +02:00
|
|
|
}
|
|
|
|
}
|
2021-01-04 19:20:04 +01:00
|
|
|
return nil
|
2018-04-08 14:02:30 +02:00
|
|
|
}
|
2021-08-08 00:38:17 +02:00
|
|
|
for i := 0; i < r.workerCount; i++ {
|
2021-01-04 19:20:04 +01:00
|
|
|
wg.Go(worker)
|
2018-04-08 14:02:30 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
// the main restore loop
|
2021-01-04 19:20:04 +01:00
|
|
|
wg.Go(func() error {
|
2024-07-12 22:24:35 +02:00
|
|
|
defer close(downloadCh)
|
2021-01-04 19:20:04 +01:00
|
|
|
for _, id := range packOrder {
|
|
|
|
pack := packs[id]
|
2023-04-23 11:33:21 +02:00
|
|
|
// allow garbage collection of packInfo
|
|
|
|
delete(packs, id)
|
2021-01-04 19:20:04 +01:00
|
|
|
select {
|
|
|
|
case <-ctx.Done():
|
|
|
|
return ctx.Err()
|
|
|
|
case downloadCh <- pack:
|
|
|
|
debug.Log("Scheduled download pack %s", pack.id.Str())
|
|
|
|
}
|
2018-04-08 14:02:30 +02:00
|
|
|
}
|
2021-01-04 19:20:04 +01:00
|
|
|
return nil
|
|
|
|
})
|
2019-11-27 13:22:38 +01:00
|
|
|
|
2021-01-04 19:20:04 +01:00
|
|
|
return wg.Wait()
|
2018-04-08 14:02:30 +02:00
|
|
|
}
|
|
|
|
|
2024-07-20 12:23:12 +02:00
|
|
|
func (r *fileRestorer) truncateFileToSize(location string, size int64) error {
|
|
|
|
f, err := createFile(r.targetPath(location), size, false, r.allowRecursiveDelete)
|
2024-05-30 23:06:15 +02:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2024-07-20 12:23:12 +02:00
|
|
|
return f.Close()
|
2024-05-30 23:06:15 +02:00
|
|
|
}
|
|
|
|
|
2024-01-07 12:00:32 +01:00
|
|
|
type blobToFileOffsetsMapping map[restic.ID]struct {
|
|
|
|
files map[*fileInfo][]int64 // file -> offsets (plural!) of the blob in the file
|
2024-01-07 12:06:36 +01:00
|
|
|
blob restic.Blob
|
2024-01-07 12:00:32 +01:00
|
|
|
}
|
2018-04-08 14:02:30 +02:00
|
|
|
|
2024-01-07 12:00:32 +01:00
|
|
|
func (r *fileRestorer) downloadPack(ctx context.Context, pack *packInfo) error {
|
2022-02-13 11:43:09 +01:00
|
|
|
// calculate blob->[]files->[]offsets mappings
|
2024-01-07 12:00:32 +01:00
|
|
|
blobs := make(blobToFileOffsetsMapping)
|
2018-04-08 14:02:30 +02:00
|
|
|
for file := range pack.files {
|
2019-11-27 13:22:38 +01:00
|
|
|
addBlob := func(blob restic.Blob, fileOffset int64) {
|
|
|
|
blobInfo, ok := blobs[blob.ID]
|
|
|
|
if !ok {
|
|
|
|
blobInfo.files = make(map[*fileInfo][]int64)
|
2024-01-07 12:06:36 +01:00
|
|
|
blobInfo.blob = blob
|
2019-11-27 13:22:38 +01:00
|
|
|
blobs[blob.ID] = blobInfo
|
|
|
|
}
|
|
|
|
blobInfo.files[file] = append(blobInfo.files[file], fileOffset)
|
|
|
|
}
|
|
|
|
if fileBlobs, ok := file.blobs.(restic.IDs); ok {
|
2024-07-20 12:32:08 +02:00
|
|
|
err := r.forEachBlob(fileBlobs, func(packID restic.ID, blob restic.Blob, idx int, fileOffset int64) {
|
2024-07-20 12:23:12 +02:00
|
|
|
if packID.Equal(pack.id) && !file.state.HasMatchingBlob(idx) {
|
|
|
|
addBlob(blob, fileOffset)
|
2019-11-27 13:22:38 +01:00
|
|
|
}
|
|
|
|
})
|
2021-01-31 18:04:45 +01:00
|
|
|
if err != nil {
|
|
|
|
// restoreFiles should have caught this error before
|
|
|
|
panic(err)
|
|
|
|
}
|
2019-11-27 13:22:38 +01:00
|
|
|
} else if packsMap, ok := file.blobs.(map[restic.ID][]fileBlobInfo); ok {
|
|
|
|
for _, blob := range packsMap[pack.id] {
|
2024-05-19 14:56:17 +02:00
|
|
|
idxPacks := r.idx(restic.DataBlob, blob.id)
|
2020-06-14 13:26:10 +02:00
|
|
|
for _, idxPack := range idxPacks {
|
|
|
|
if idxPack.PackID.Equal(pack.id) {
|
|
|
|
addBlob(idxPack.Blob, blob.offset)
|
|
|
|
break
|
2018-04-08 14:02:30 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2019-11-27 13:22:38 +01:00
|
|
|
}
|
2018-04-08 14:02:30 +02:00
|
|
|
}
|
|
|
|
|
2023-12-30 22:39:26 +01:00
|
|
|
// track already processed blobs for precise error reporting
|
|
|
|
processedBlobs := restic.NewBlobSet()
|
2024-01-07 12:17:35 +01:00
|
|
|
err := r.downloadBlobs(ctx, pack.id, blobs, processedBlobs)
|
2024-01-07 12:20:31 +01:00
|
|
|
return r.reportError(blobs, processedBlobs, err)
|
2018-04-08 14:02:30 +02:00
|
|
|
}
|
2024-01-07 12:00:32 +01:00
|
|
|
|
|
|
|
func (r *fileRestorer) sanitizeError(file *fileInfo, err error) error {
|
2024-07-12 22:27:32 +02:00
|
|
|
switch err {
|
|
|
|
case nil, context.Canceled, context.DeadlineExceeded:
|
|
|
|
// Context errors are permanent.
|
|
|
|
return err
|
|
|
|
default:
|
|
|
|
return r.Error(file.location, err)
|
2024-01-07 12:00:32 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-01-07 12:20:31 +01:00
|
|
|
func (r *fileRestorer) reportError(blobs blobToFileOffsetsMapping, processedBlobs restic.BlobSet, err error) error {
|
|
|
|
if err == nil {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// only report error for not yet processed blobs
|
|
|
|
affectedFiles := make(map[*fileInfo]struct{})
|
|
|
|
for _, entry := range blobs {
|
|
|
|
if processedBlobs.Has(entry.blob.BlobHandle) {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
for file := range entry.files {
|
|
|
|
affectedFiles[file] = struct{}{}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
for file := range affectedFiles {
|
|
|
|
if errFile := r.sanitizeError(file, err); errFile != nil {
|
|
|
|
return errFile
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2024-01-07 12:06:36 +01:00
|
|
|
func (r *fileRestorer) downloadBlobs(ctx context.Context, packID restic.ID,
|
2024-01-07 12:00:32 +01:00
|
|
|
blobs blobToFileOffsetsMapping, processedBlobs restic.BlobSet) error {
|
|
|
|
|
2024-01-07 12:06:36 +01:00
|
|
|
blobList := make([]restic.Blob, 0, len(blobs))
|
|
|
|
for _, entry := range blobs {
|
|
|
|
blobList = append(blobList, entry.blob)
|
|
|
|
}
|
2023-12-31 12:07:19 +01:00
|
|
|
return r.blobsLoader(ctx, packID, blobList,
|
2024-01-07 12:00:32 +01:00
|
|
|
func(h restic.BlobHandle, blobData []byte, err error) error {
|
|
|
|
processedBlobs.Insert(h)
|
|
|
|
blob := blobs[h.ID]
|
|
|
|
if err != nil {
|
|
|
|
for file := range blob.files {
|
|
|
|
if errFile := r.sanitizeError(file, err); errFile != nil {
|
|
|
|
return errFile
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
for file, offsets := range blob.files {
|
|
|
|
for _, offset := range offsets {
|
2024-07-12 22:29:56 +02:00
|
|
|
// avoid long cancelation delays for frequently used blobs
|
|
|
|
if ctx.Err() != nil {
|
|
|
|
return ctx.Err()
|
|
|
|
}
|
|
|
|
|
2024-01-07 12:00:32 +01:00
|
|
|
writeToFile := func() error {
|
|
|
|
// this looks overly complicated and needs explanation
|
|
|
|
// two competing requirements:
|
|
|
|
// - must create the file once and only once
|
|
|
|
// - should allow concurrent writes to the file
|
|
|
|
// so write the first blob while holding file lock
|
|
|
|
// write other blobs after releasing the lock
|
|
|
|
createSize := int64(-1)
|
|
|
|
file.lock.Lock()
|
|
|
|
if file.inProgress {
|
|
|
|
file.lock.Unlock()
|
|
|
|
} else {
|
|
|
|
defer file.lock.Unlock()
|
|
|
|
file.inProgress = true
|
|
|
|
createSize = file.size
|
|
|
|
}
|
|
|
|
writeErr := r.filesWriter.writeToFile(r.targetPath(file.location), blobData, offset, createSize, file.sparse)
|
2024-07-12 22:59:01 +02:00
|
|
|
r.reportBlobProgress(file, uint64(len(blobData)))
|
2024-01-07 12:00:32 +01:00
|
|
|
return writeErr
|
|
|
|
}
|
|
|
|
err := r.sanitizeError(file, writeToFile())
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
})
|
|
|
|
}
|
2024-07-12 22:59:01 +02:00
|
|
|
|
|
|
|
func (r *fileRestorer) reportBlobProgress(file *fileInfo, blobSize uint64) {
|
|
|
|
action := restore.ActionFileUpdated
|
|
|
|
if file.state == nil {
|
|
|
|
action = restore.ActionFileRestored
|
|
|
|
}
|
|
|
|
r.progress.AddProgress(file.location, action, uint64(blobSize), uint64(file.size))
|
|
|
|
}
|