2016-08-01 20:04:23 +02:00
|
|
|
package repository_test
|
2016-08-01 18:55:07 +02:00
|
|
|
|
|
|
|
import (
|
2017-06-05 23:56:59 +02:00
|
|
|
"context"
|
2016-08-01 18:55:07 +02:00
|
|
|
"math/rand"
|
|
|
|
"testing"
|
2020-11-02 12:55:34 +01:00
|
|
|
"time"
|
2017-07-23 14:21:03 +02:00
|
|
|
|
2023-10-01 11:40:12 +02:00
|
|
|
"github.com/restic/restic/internal/backend"
|
2017-07-23 14:21:03 +02:00
|
|
|
"github.com/restic/restic/internal/repository"
|
2017-07-24 17:42:25 +02:00
|
|
|
"github.com/restic/restic/internal/restic"
|
2022-07-30 17:33:40 +02:00
|
|
|
rtest "github.com/restic/restic/internal/test"
|
2024-05-19 15:55:42 +02:00
|
|
|
"github.com/restic/restic/internal/ui/progress"
|
2021-08-07 22:52:05 +02:00
|
|
|
"golang.org/x/sync/errgroup"
|
2016-08-01 18:55:07 +02:00
|
|
|
)
|
|
|
|
|
2024-08-10 19:34:49 +02:00
|
|
|
func randomSize(random *rand.Rand, min, max int) int {
|
|
|
|
return random.Intn(max-min) + min
|
2016-08-01 20:24:15 +02:00
|
|
|
}
|
|
|
|
|
2024-08-10 19:34:49 +02:00
|
|
|
func createRandomBlobs(t testing.TB, random *rand.Rand, repo restic.Repository, blobs int, pData float32, smallBlobs bool) {
|
2021-08-07 22:52:05 +02:00
|
|
|
var wg errgroup.Group
|
|
|
|
repo.StartPackUploader(context.TODO(), &wg)
|
|
|
|
|
2016-08-01 20:24:15 +02:00
|
|
|
for i := 0; i < blobs; i++ {
|
|
|
|
var (
|
2016-08-31 20:58:57 +02:00
|
|
|
tpe restic.BlobType
|
2016-08-01 20:24:15 +02:00
|
|
|
length int
|
|
|
|
)
|
|
|
|
|
2024-08-10 19:34:49 +02:00
|
|
|
if random.Float32() < pData {
|
2016-08-31 23:07:50 +02:00
|
|
|
tpe = restic.DataBlob
|
2024-04-14 11:48:40 +02:00
|
|
|
if smallBlobs {
|
2024-08-10 19:34:49 +02:00
|
|
|
length = randomSize(random, 1*1024, 20*1024) // 1KiB to 20KiB of data
|
2024-04-14 11:48:40 +02:00
|
|
|
} else {
|
2024-08-10 19:34:49 +02:00
|
|
|
length = randomSize(random, 10*1024, 1024*1024) // 10KiB to 1MiB of data
|
2024-04-14 11:48:40 +02:00
|
|
|
}
|
2016-08-01 20:24:15 +02:00
|
|
|
} else {
|
2016-08-31 23:07:50 +02:00
|
|
|
tpe = restic.TreeBlob
|
2024-08-10 19:34:49 +02:00
|
|
|
length = randomSize(random, 1*1024, 20*1024) // 1KiB to 20KiB
|
2016-08-01 20:24:15 +02:00
|
|
|
}
|
|
|
|
|
2020-03-09 09:54:24 +01:00
|
|
|
buf := make([]byte, length)
|
2024-08-10 19:34:49 +02:00
|
|
|
random.Read(buf)
|
2016-08-04 20:42:11 +02:00
|
|
|
|
2022-05-01 14:26:57 +02:00
|
|
|
id, exists, _, err := repo.SaveBlob(context.TODO(), tpe, buf, restic.ID{}, false)
|
2016-08-01 20:24:15 +02:00
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("SaveFrom() error %v", err)
|
|
|
|
}
|
|
|
|
|
2020-06-06 22:20:44 +02:00
|
|
|
if exists {
|
|
|
|
t.Errorf("duplicate blob %v/%v ignored", id, restic.DataBlob)
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
2016-08-01 20:24:15 +02:00
|
|
|
if rand.Float32() < 0.2 {
|
2017-11-22 12:27:29 +01:00
|
|
|
if err = repo.Flush(context.Background()); err != nil {
|
2016-08-01 20:24:15 +02:00
|
|
|
t.Fatalf("repo.Flush() returned error %v", err)
|
|
|
|
}
|
2021-08-07 22:52:05 +02:00
|
|
|
repo.StartPackUploader(context.TODO(), &wg)
|
2016-08-01 20:24:15 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-11-22 12:27:29 +01:00
|
|
|
if err := repo.Flush(context.Background()); err != nil {
|
2016-08-01 20:24:15 +02:00
|
|
|
t.Fatalf("repo.Flush() returned error %v", err)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-08-10 19:34:49 +02:00
|
|
|
func createRandomWrongBlob(t testing.TB, random *rand.Rand, repo restic.Repository) restic.BlobHandle {
|
|
|
|
length := randomSize(random, 10*1024, 1024*1024) // 10KiB to 1MiB of data
|
2020-03-31 17:04:48 +02:00
|
|
|
buf := make([]byte, length)
|
2024-08-10 19:34:49 +02:00
|
|
|
random.Read(buf)
|
2020-03-31 17:04:48 +02:00
|
|
|
id := restic.Hash(buf)
|
|
|
|
// invert first data byte
|
|
|
|
buf[0] ^= 0xff
|
|
|
|
|
2021-08-07 22:52:05 +02:00
|
|
|
var wg errgroup.Group
|
|
|
|
repo.StartPackUploader(context.TODO(), &wg)
|
2022-05-01 14:26:57 +02:00
|
|
|
_, _, _, err := repo.SaveBlob(context.TODO(), restic.DataBlob, buf, id, false)
|
2020-03-31 17:04:48 +02:00
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("SaveFrom() error %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
if err := repo.Flush(context.Background()); err != nil {
|
|
|
|
t.Fatalf("repo.Flush() returned error %v", err)
|
|
|
|
}
|
2024-01-20 21:54:47 +01:00
|
|
|
return restic.BlobHandle{ID: id, Type: restic.DataBlob}
|
2020-03-31 17:04:48 +02:00
|
|
|
}
|
|
|
|
|
2016-08-01 20:24:15 +02:00
|
|
|
// selectBlobs splits the list of all blobs randomly into two lists. A blob
|
2023-12-06 13:11:55 +01:00
|
|
|
// will be contained in the firstone with probability p.
|
2024-08-10 19:34:49 +02:00
|
|
|
func selectBlobs(t *testing.T, random *rand.Rand, repo restic.Repository, p float32) (list1, list2 restic.BlobSet) {
|
2016-08-31 23:07:50 +02:00
|
|
|
list1 = restic.NewBlobSet()
|
|
|
|
list2 = restic.NewBlobSet()
|
2016-08-01 20:24:15 +02:00
|
|
|
|
2016-08-31 23:07:50 +02:00
|
|
|
blobs := restic.NewBlobSet()
|
2016-08-04 20:42:11 +02:00
|
|
|
|
2020-08-16 11:16:38 +02:00
|
|
|
err := repo.List(context.TODO(), restic.PackFile, func(id restic.ID, size int64) error {
|
2018-01-24 03:43:21 +01:00
|
|
|
entries, _, err := repo.ListPack(context.TODO(), id, size)
|
2016-08-01 20:24:15 +02:00
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("error listing pack %v: %v", id, err)
|
|
|
|
}
|
|
|
|
|
|
|
|
for _, entry := range entries {
|
2016-08-31 23:07:50 +02:00
|
|
|
h := restic.BlobHandle{ID: entry.ID, Type: entry.Type}
|
2016-08-04 20:42:11 +02:00
|
|
|
if blobs.Has(h) {
|
|
|
|
t.Errorf("ignoring duplicate blob %v", h)
|
2018-01-21 17:25:36 +01:00
|
|
|
return nil
|
2016-08-04 20:42:11 +02:00
|
|
|
}
|
|
|
|
blobs.Insert(h)
|
|
|
|
|
2024-08-10 19:34:49 +02:00
|
|
|
if random.Float32() <= p {
|
2016-08-31 23:07:50 +02:00
|
|
|
list1.Insert(restic.BlobHandle{ID: entry.ID, Type: entry.Type})
|
2016-08-01 20:24:15 +02:00
|
|
|
} else {
|
2016-08-31 23:07:50 +02:00
|
|
|
list2.Insert(restic.BlobHandle{ID: entry.ID, Type: entry.Type})
|
2016-08-01 20:24:15 +02:00
|
|
|
}
|
|
|
|
}
|
2018-01-21 17:25:36 +01:00
|
|
|
return nil
|
|
|
|
})
|
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
2016-08-01 20:24:15 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
return list1, list2
|
|
|
|
}
|
|
|
|
|
2024-01-19 22:44:50 +01:00
|
|
|
func listPacks(t *testing.T, repo restic.Lister) restic.IDSet {
|
2024-04-14 11:53:08 +02:00
|
|
|
return listFiles(t, repo, restic.PackFile)
|
|
|
|
}
|
|
|
|
|
|
|
|
func listFiles(t *testing.T, repo restic.Lister, tpe backend.FileType) restic.IDSet {
|
2016-08-31 20:29:54 +02:00
|
|
|
list := restic.NewIDSet()
|
2024-04-14 11:53:08 +02:00
|
|
|
err := repo.List(context.TODO(), tpe, func(id restic.ID, size int64) error {
|
2016-08-01 20:24:15 +02:00
|
|
|
list.Insert(id)
|
2018-01-21 17:25:36 +01:00
|
|
|
return nil
|
|
|
|
})
|
|
|
|
|
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
2016-08-01 20:24:15 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
return list
|
|
|
|
}
|
|
|
|
|
2016-09-03 13:34:04 +02:00
|
|
|
func findPacksForBlobs(t *testing.T, repo restic.Repository, blobs restic.BlobSet) restic.IDSet {
|
2016-08-31 20:29:54 +02:00
|
|
|
packs := restic.NewIDSet()
|
2016-08-01 20:24:15 +02:00
|
|
|
|
2016-08-03 22:38:05 +02:00
|
|
|
for h := range blobs {
|
2024-05-19 14:56:17 +02:00
|
|
|
list := repo.LookupBlob(h.Type, h.ID)
|
2020-06-14 13:26:10 +02:00
|
|
|
if len(list) == 0 {
|
2018-01-12 07:20:12 +01:00
|
|
|
t.Fatal("Failed to find blob", h.ID.Str(), "with type", h.Type)
|
2016-08-01 20:24:15 +02:00
|
|
|
}
|
|
|
|
|
2016-08-03 22:38:05 +02:00
|
|
|
for _, pb := range list {
|
|
|
|
packs.Insert(pb.PackID)
|
|
|
|
}
|
2016-08-01 20:24:15 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
return packs
|
|
|
|
}
|
|
|
|
|
2024-12-01 12:19:16 +01:00
|
|
|
func repack(t *testing.T, repo restic.Repository, be backend.Backend, packs restic.IDSet, blobs restic.BlobSet) {
|
feat(backends/s3): add warmup support before repacks and restores (#5173)
* feat(backends/s3): add warmup support before repacks and restores
This commit introduces basic support for transitioning pack files stored
in cold storage to hot storage on S3 and S3-compatible providers.
To prevent unexpected behavior for existing users, the feature is gated
behind new flags:
- `s3.enable-restore`: opt-in flag (defaults to false)
- `s3.restore-days`: number of days for the restored objects to remain
in hot storage (defaults to `7`)
- `s3.restore-timeout`: maximum time to wait for a single restoration
(default to `1 day`)
- `s3.restore-tier`: retrieval tier at which the restore will be
processed. (default to `Standard`)
As restoration times can be lengthy, this implementation preemptively
restores selected packs to prevent incessant restore-delays during
downloads. This is slightly sub-optimal as we could process packs
out-of-order (as soon as they're transitioned), but this would really
add too much complexity for a marginal gain in speed.
To maintain simplicity and prevent resources exhautions with lots of
packs, no new concurrency mechanisms or goroutines were added. This just
hooks gracefully into the existing routines.
**Limitations:**
- Tests against the backend were not written due to the lack of cold
storage class support in MinIO. Testing was done manually on
Scaleway's S3-compatible object storage. If necessary, we could
explore testing with LocalStack or mocks, though this requires further
discussion.
- Currently, this feature only warms up before restores and repacks
(prune/copy), as those are the two main use-cases I came across.
Support for other commands may be added in future iterations, as long
as affected packs can be calculated in advance.
- The feature is gated behind a new alpha `s3-restore` feature flag to
make it explicit that the feature is still wet behind the ears.
- There is no explicit user notification for ongoing pack restorations.
While I think it is not necessary because of the opt-in flag, showing
some notice may improve usability (but would probably require major
refactoring in the progress bar which I didn't want to start). Another
possibility would be to add a flag to send restores requests and fail
early.
See https://github.com/restic/restic/issues/3202
* ui: warn user when files are warming up from cold storage
* refactor: remove the PacksWarmer struct
It's easier to handle multiple handles in the backend directly, and it
may open the door to reducing the number of requests made to the backend
in the future.
2025-02-01 19:26:27 +01:00
|
|
|
repackedBlobs, err := repository.Repack(context.TODO(), repo, repo, packs, blobs, nil, nil)
|
2016-08-01 18:55:07 +02:00
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
2017-06-15 14:40:34 +02:00
|
|
|
|
|
|
|
for id := range repackedBlobs {
|
2024-12-01 12:19:16 +01:00
|
|
|
err = be.Remove(context.TODO(), backend.Handle{Type: restic.PackFile, Name: id.String()})
|
2017-06-15 14:40:34 +02:00
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
|
|
|
}
|
2016-08-01 18:55:07 +02:00
|
|
|
}
|
|
|
|
|
2024-05-19 15:55:42 +02:00
|
|
|
func rebuildAndReloadIndex(t *testing.T, repo *repository.Repository) {
|
|
|
|
rtest.OK(t, repository.RepairIndex(context.TODO(), repo, repository.RepairIndexOptions{
|
|
|
|
ReadAllPacks: true,
|
|
|
|
}, &progress.NoopPrinter{}))
|
2020-10-10 22:29:55 +02:00
|
|
|
|
2024-05-19 15:55:42 +02:00
|
|
|
rtest.OK(t, repo.LoadIndex(context.TODO(), nil))
|
2016-08-01 18:55:07 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
func TestRepack(t *testing.T) {
|
2022-04-29 23:16:16 +02:00
|
|
|
repository.TestAllVersions(t, testRepack)
|
|
|
|
}
|
|
|
|
|
|
|
|
func testRepack(t *testing.T, version uint) {
|
2024-12-01 12:19:16 +01:00
|
|
|
repo, _, be := repository.TestRepositoryWithVersion(t, version)
|
2016-08-01 18:55:07 +02:00
|
|
|
|
2020-11-02 12:55:34 +01:00
|
|
|
seed := time.Now().UnixNano()
|
2024-08-10 19:34:49 +02:00
|
|
|
random := rand.New(rand.NewSource(seed))
|
2018-01-23 21:41:39 +01:00
|
|
|
t.Logf("rand seed is %v", seed)
|
|
|
|
|
2024-04-14 11:48:40 +02:00
|
|
|
// add a small amount of blobs twice to create multiple pack files
|
2024-08-10 19:34:49 +02:00
|
|
|
createRandomBlobs(t, random, repo, 10, 0.7, false)
|
|
|
|
createRandomBlobs(t, random, repo, 10, 0.7, false)
|
2016-08-01 18:55:07 +02:00
|
|
|
|
|
|
|
packsBefore := listPacks(t, repo)
|
|
|
|
|
|
|
|
// Running repack on empty ID sets should not do anything at all.
|
2024-12-01 12:19:16 +01:00
|
|
|
repack(t, repo, be, nil, nil)
|
2016-08-01 18:55:07 +02:00
|
|
|
|
|
|
|
packsAfter := listPacks(t, repo)
|
|
|
|
|
|
|
|
if !packsAfter.Equals(packsBefore) {
|
|
|
|
t.Fatalf("packs are not equal, Repack modified something. Before:\n %v\nAfter:\n %v",
|
|
|
|
packsBefore, packsAfter)
|
|
|
|
}
|
|
|
|
|
2024-08-10 19:34:49 +02:00
|
|
|
removeBlobs, keepBlobs := selectBlobs(t, random, repo, 0.2)
|
2016-08-01 18:55:07 +02:00
|
|
|
|
|
|
|
removePacks := findPacksForBlobs(t, repo, removeBlobs)
|
|
|
|
|
2024-12-01 12:19:16 +01:00
|
|
|
repack(t, repo, be, removePacks, keepBlobs)
|
2024-05-19 15:55:42 +02:00
|
|
|
rebuildAndReloadIndex(t, repo)
|
2016-08-01 18:55:07 +02:00
|
|
|
|
|
|
|
packsAfter = listPacks(t, repo)
|
|
|
|
for id := range removePacks {
|
|
|
|
if packsAfter.Has(id) {
|
|
|
|
t.Errorf("pack %v still present although it should have been repacked and removed", id.Str())
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-08-03 22:38:05 +02:00
|
|
|
for h := range keepBlobs {
|
2024-05-19 14:56:17 +02:00
|
|
|
list := repo.LookupBlob(h.Type, h.ID)
|
2020-06-14 13:26:10 +02:00
|
|
|
if len(list) == 0 {
|
2016-08-03 22:38:05 +02:00
|
|
|
t.Errorf("unable to find blob %v in repo", h.ID.Str())
|
|
|
|
continue
|
2016-08-01 18:55:07 +02:00
|
|
|
}
|
|
|
|
|
2016-08-03 22:38:05 +02:00
|
|
|
if len(list) != 1 {
|
|
|
|
t.Errorf("expected one pack in the list, got: %v", list)
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
pb := list[0]
|
|
|
|
|
2016-08-01 18:55:07 +02:00
|
|
|
if removePacks.Has(pb.PackID) {
|
|
|
|
t.Errorf("lookup returned pack ID %v that should've been removed", pb.PackID)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-08-03 22:38:05 +02:00
|
|
|
for h := range removeBlobs {
|
2024-05-19 14:54:50 +02:00
|
|
|
if _, found := repo.LookupBlobSize(h.Type, h.ID); found {
|
2016-08-03 22:38:05 +02:00
|
|
|
t.Errorf("blob %v still contained in the repo", h)
|
2016-08-01 18:55:07 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2020-03-31 17:04:48 +02:00
|
|
|
|
2021-09-12 00:03:41 +02:00
|
|
|
func TestRepackCopy(t *testing.T) {
|
2022-04-29 23:16:16 +02:00
|
|
|
repository.TestAllVersions(t, testRepackCopy)
|
|
|
|
}
|
|
|
|
|
2022-08-28 11:40:31 +02:00
|
|
|
type oneConnectionRepo struct {
|
|
|
|
restic.Repository
|
|
|
|
}
|
|
|
|
|
|
|
|
func (r oneConnectionRepo) Connections() uint {
|
|
|
|
return 1
|
|
|
|
}
|
|
|
|
|
2022-04-29 23:16:16 +02:00
|
|
|
func testRepackCopy(t *testing.T, version uint) {
|
2024-12-01 12:19:16 +01:00
|
|
|
repo, _, _ := repository.TestRepositoryWithVersion(t, version)
|
|
|
|
dstRepo, _, _ := repository.TestRepositoryWithVersion(t, version)
|
2021-09-12 00:03:41 +02:00
|
|
|
|
2022-08-28 11:40:31 +02:00
|
|
|
// test with minimal possible connection count
|
|
|
|
repoWrapped := &oneConnectionRepo{repo}
|
|
|
|
dstRepoWrapped := &oneConnectionRepo{dstRepo}
|
|
|
|
|
2021-09-12 00:03:41 +02:00
|
|
|
seed := time.Now().UnixNano()
|
2024-08-10 19:34:49 +02:00
|
|
|
random := rand.New(rand.NewSource(seed))
|
2021-09-12 00:03:41 +02:00
|
|
|
t.Logf("rand seed is %v", seed)
|
|
|
|
|
2024-04-14 11:48:40 +02:00
|
|
|
// add a small amount of blobs twice to create multiple pack files
|
2024-08-10 19:34:49 +02:00
|
|
|
createRandomBlobs(t, random, repo, 10, 0.7, false)
|
|
|
|
createRandomBlobs(t, random, repo, 10, 0.7, false)
|
2021-09-12 00:03:41 +02:00
|
|
|
|
2024-08-10 19:34:49 +02:00
|
|
|
_, keepBlobs := selectBlobs(t, random, repo, 0.2)
|
2021-09-12 00:03:41 +02:00
|
|
|
copyPacks := findPacksForBlobs(t, repo, keepBlobs)
|
|
|
|
|
feat(backends/s3): add warmup support before repacks and restores (#5173)
* feat(backends/s3): add warmup support before repacks and restores
This commit introduces basic support for transitioning pack files stored
in cold storage to hot storage on S3 and S3-compatible providers.
To prevent unexpected behavior for existing users, the feature is gated
behind new flags:
- `s3.enable-restore`: opt-in flag (defaults to false)
- `s3.restore-days`: number of days for the restored objects to remain
in hot storage (defaults to `7`)
- `s3.restore-timeout`: maximum time to wait for a single restoration
(default to `1 day`)
- `s3.restore-tier`: retrieval tier at which the restore will be
processed. (default to `Standard`)
As restoration times can be lengthy, this implementation preemptively
restores selected packs to prevent incessant restore-delays during
downloads. This is slightly sub-optimal as we could process packs
out-of-order (as soon as they're transitioned), but this would really
add too much complexity for a marginal gain in speed.
To maintain simplicity and prevent resources exhautions with lots of
packs, no new concurrency mechanisms or goroutines were added. This just
hooks gracefully into the existing routines.
**Limitations:**
- Tests against the backend were not written due to the lack of cold
storage class support in MinIO. Testing was done manually on
Scaleway's S3-compatible object storage. If necessary, we could
explore testing with LocalStack or mocks, though this requires further
discussion.
- Currently, this feature only warms up before restores and repacks
(prune/copy), as those are the two main use-cases I came across.
Support for other commands may be added in future iterations, as long
as affected packs can be calculated in advance.
- The feature is gated behind a new alpha `s3-restore` feature flag to
make it explicit that the feature is still wet behind the ears.
- There is no explicit user notification for ongoing pack restorations.
While I think it is not necessary because of the opt-in flag, showing
some notice may improve usability (but would probably require major
refactoring in the progress bar which I didn't want to start). Another
possibility would be to add a flag to send restores requests and fail
early.
See https://github.com/restic/restic/issues/3202
* ui: warn user when files are warming up from cold storage
* refactor: remove the PacksWarmer struct
It's easier to handle multiple handles in the backend directly, and it
may open the door to reducing the number of requests made to the backend
in the future.
2025-02-01 19:26:27 +01:00
|
|
|
_, err := repository.Repack(context.TODO(), repoWrapped, dstRepoWrapped, copyPacks, keepBlobs, nil, nil)
|
2021-09-12 00:03:41 +02:00
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
2024-05-19 15:55:42 +02:00
|
|
|
rebuildAndReloadIndex(t, dstRepo)
|
2021-09-12 00:03:41 +02:00
|
|
|
|
|
|
|
for h := range keepBlobs {
|
2024-05-19 14:56:17 +02:00
|
|
|
list := dstRepo.LookupBlob(h.Type, h.ID)
|
2021-09-12 00:03:41 +02:00
|
|
|
if len(list) == 0 {
|
|
|
|
t.Errorf("unable to find blob %v in repo", h.ID.Str())
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
if len(list) != 1 {
|
|
|
|
t.Errorf("expected one pack in the list, got: %v", list)
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-03-31 17:04:48 +02:00
|
|
|
func TestRepackWrongBlob(t *testing.T) {
|
2022-04-29 23:16:16 +02:00
|
|
|
repository.TestAllVersions(t, testRepackWrongBlob)
|
|
|
|
}
|
|
|
|
|
|
|
|
func testRepackWrongBlob(t *testing.T, version uint) {
|
2024-02-03 17:47:48 +01:00
|
|
|
// disable verification to allow adding corrupted blobs to the repository
|
2024-05-10 16:59:09 +02:00
|
|
|
repo, _ := repository.TestRepositoryWithBackend(t, nil, version, repository.Options{NoExtraVerify: true})
|
2020-03-31 17:04:48 +02:00
|
|
|
|
2020-11-02 12:55:34 +01:00
|
|
|
seed := time.Now().UnixNano()
|
2024-08-10 19:34:49 +02:00
|
|
|
random := rand.New(rand.NewSource(seed))
|
2020-03-31 17:04:48 +02:00
|
|
|
t.Logf("rand seed is %v", seed)
|
|
|
|
|
2024-08-10 19:34:49 +02:00
|
|
|
createRandomBlobs(t, random, repo, 5, 0.7, false)
|
|
|
|
createRandomWrongBlob(t, random, repo)
|
2020-03-31 17:04:48 +02:00
|
|
|
|
|
|
|
// just keep all blobs, but also rewrite every pack
|
2024-08-10 19:34:49 +02:00
|
|
|
_, keepBlobs := selectBlobs(t, random, repo, 0)
|
2020-03-31 17:04:48 +02:00
|
|
|
rewritePacks := findPacksForBlobs(t, repo, keepBlobs)
|
|
|
|
|
feat(backends/s3): add warmup support before repacks and restores (#5173)
* feat(backends/s3): add warmup support before repacks and restores
This commit introduces basic support for transitioning pack files stored
in cold storage to hot storage on S3 and S3-compatible providers.
To prevent unexpected behavior for existing users, the feature is gated
behind new flags:
- `s3.enable-restore`: opt-in flag (defaults to false)
- `s3.restore-days`: number of days for the restored objects to remain
in hot storage (defaults to `7`)
- `s3.restore-timeout`: maximum time to wait for a single restoration
(default to `1 day`)
- `s3.restore-tier`: retrieval tier at which the restore will be
processed. (default to `Standard`)
As restoration times can be lengthy, this implementation preemptively
restores selected packs to prevent incessant restore-delays during
downloads. This is slightly sub-optimal as we could process packs
out-of-order (as soon as they're transitioned), but this would really
add too much complexity for a marginal gain in speed.
To maintain simplicity and prevent resources exhautions with lots of
packs, no new concurrency mechanisms or goroutines were added. This just
hooks gracefully into the existing routines.
**Limitations:**
- Tests against the backend were not written due to the lack of cold
storage class support in MinIO. Testing was done manually on
Scaleway's S3-compatible object storage. If necessary, we could
explore testing with LocalStack or mocks, though this requires further
discussion.
- Currently, this feature only warms up before restores and repacks
(prune/copy), as those are the two main use-cases I came across.
Support for other commands may be added in future iterations, as long
as affected packs can be calculated in advance.
- The feature is gated behind a new alpha `s3-restore` feature flag to
make it explicit that the feature is still wet behind the ears.
- There is no explicit user notification for ongoing pack restorations.
While I think it is not necessary because of the opt-in flag, showing
some notice may improve usability (but would probably require major
refactoring in the progress bar which I didn't want to start). Another
possibility would be to add a flag to send restores requests and fail
early.
See https://github.com/restic/restic/issues/3202
* ui: warn user when files are warming up from cold storage
* refactor: remove the PacksWarmer struct
It's easier to handle multiple handles in the backend directly, and it
may open the door to reducing the number of requests made to the backend
in the future.
2025-02-01 19:26:27 +01:00
|
|
|
_, err := repository.Repack(context.TODO(), repo, repo, rewritePacks, keepBlobs, nil, nil)
|
2020-03-31 17:04:48 +02:00
|
|
|
if err == nil {
|
|
|
|
t.Fatal("expected repack to fail but got no error")
|
|
|
|
}
|
2020-11-02 12:53:45 +01:00
|
|
|
t.Logf("found expected error: %v", err)
|
2020-03-31 17:04:48 +02:00
|
|
|
}
|
2022-07-30 17:33:40 +02:00
|
|
|
|
|
|
|
func TestRepackBlobFallback(t *testing.T) {
|
|
|
|
repository.TestAllVersions(t, testRepackBlobFallback)
|
|
|
|
}
|
|
|
|
|
|
|
|
func testRepackBlobFallback(t *testing.T, version uint) {
|
2024-02-03 17:47:48 +01:00
|
|
|
// disable verification to allow adding corrupted blobs to the repository
|
2024-05-10 16:59:09 +02:00
|
|
|
repo, _ := repository.TestRepositoryWithBackend(t, nil, version, repository.Options{NoExtraVerify: true})
|
2022-07-30 17:33:40 +02:00
|
|
|
|
|
|
|
seed := time.Now().UnixNano()
|
2024-08-10 19:34:49 +02:00
|
|
|
random := rand.New(rand.NewSource(seed))
|
2022-07-30 17:33:40 +02:00
|
|
|
t.Logf("rand seed is %v", seed)
|
|
|
|
|
2024-08-10 19:34:49 +02:00
|
|
|
length := randomSize(random, 10*1024, 1024*1024) // 10KiB to 1MiB of data
|
2022-07-30 17:33:40 +02:00
|
|
|
buf := make([]byte, length)
|
2024-08-10 19:34:49 +02:00
|
|
|
random.Read(buf)
|
2022-07-30 17:33:40 +02:00
|
|
|
id := restic.Hash(buf)
|
|
|
|
|
|
|
|
// corrupted copy
|
|
|
|
modbuf := make([]byte, len(buf))
|
|
|
|
copy(modbuf, buf)
|
|
|
|
// invert first data byte
|
|
|
|
modbuf[0] ^= 0xff
|
|
|
|
|
|
|
|
// create pack with broken copy
|
|
|
|
var wg errgroup.Group
|
|
|
|
repo.StartPackUploader(context.TODO(), &wg)
|
|
|
|
_, _, _, err := repo.SaveBlob(context.TODO(), restic.DataBlob, modbuf, id, false)
|
|
|
|
rtest.OK(t, err)
|
|
|
|
rtest.OK(t, repo.Flush(context.Background()))
|
|
|
|
|
|
|
|
// find pack with damaged blob
|
|
|
|
keepBlobs := restic.NewBlobSet(restic.BlobHandle{Type: restic.DataBlob, ID: id})
|
|
|
|
rewritePacks := findPacksForBlobs(t, repo, keepBlobs)
|
|
|
|
|
|
|
|
// create pack with valid copy
|
|
|
|
repo.StartPackUploader(context.TODO(), &wg)
|
|
|
|
_, _, _, err = repo.SaveBlob(context.TODO(), restic.DataBlob, buf, id, true)
|
|
|
|
rtest.OK(t, err)
|
|
|
|
rtest.OK(t, repo.Flush(context.Background()))
|
|
|
|
|
|
|
|
// repack must fallback to valid copy
|
feat(backends/s3): add warmup support before repacks and restores (#5173)
* feat(backends/s3): add warmup support before repacks and restores
This commit introduces basic support for transitioning pack files stored
in cold storage to hot storage on S3 and S3-compatible providers.
To prevent unexpected behavior for existing users, the feature is gated
behind new flags:
- `s3.enable-restore`: opt-in flag (defaults to false)
- `s3.restore-days`: number of days for the restored objects to remain
in hot storage (defaults to `7`)
- `s3.restore-timeout`: maximum time to wait for a single restoration
(default to `1 day`)
- `s3.restore-tier`: retrieval tier at which the restore will be
processed. (default to `Standard`)
As restoration times can be lengthy, this implementation preemptively
restores selected packs to prevent incessant restore-delays during
downloads. This is slightly sub-optimal as we could process packs
out-of-order (as soon as they're transitioned), but this would really
add too much complexity for a marginal gain in speed.
To maintain simplicity and prevent resources exhautions with lots of
packs, no new concurrency mechanisms or goroutines were added. This just
hooks gracefully into the existing routines.
**Limitations:**
- Tests against the backend were not written due to the lack of cold
storage class support in MinIO. Testing was done manually on
Scaleway's S3-compatible object storage. If necessary, we could
explore testing with LocalStack or mocks, though this requires further
discussion.
- Currently, this feature only warms up before restores and repacks
(prune/copy), as those are the two main use-cases I came across.
Support for other commands may be added in future iterations, as long
as affected packs can be calculated in advance.
- The feature is gated behind a new alpha `s3-restore` feature flag to
make it explicit that the feature is still wet behind the ears.
- There is no explicit user notification for ongoing pack restorations.
While I think it is not necessary because of the opt-in flag, showing
some notice may improve usability (but would probably require major
refactoring in the progress bar which I didn't want to start). Another
possibility would be to add a flag to send restores requests and fail
early.
See https://github.com/restic/restic/issues/3202
* ui: warn user when files are warming up from cold storage
* refactor: remove the PacksWarmer struct
It's easier to handle multiple handles in the backend directly, and it
may open the door to reducing the number of requests made to the backend
in the future.
2025-02-01 19:26:27 +01:00
|
|
|
_, err = repository.Repack(context.TODO(), repo, repo, rewritePacks, keepBlobs, nil, nil)
|
2022-07-30 17:33:40 +02:00
|
|
|
rtest.OK(t, err)
|
|
|
|
|
|
|
|
keepBlobs = restic.NewBlobSet(restic.BlobHandle{Type: restic.DataBlob, ID: id})
|
|
|
|
packs := findPacksForBlobs(t, repo, keepBlobs)
|
|
|
|
rtest.Assert(t, len(packs) == 3, "unexpected number of copies: %v", len(packs))
|
|
|
|
}
|