index: rewrite oversized indexes

This commit is contained in:
Michael Eischer 2025-02-16 17:03:14 +01:00
parent 2fd8a3865c
commit 3b8d15d651
3 changed files with 18 additions and 1 deletions

View File

@ -11,6 +11,7 @@ import (
"github.com/restic/restic/internal/crypto"
"github.com/restic/restic/internal/errors"
"github.com/restic/restic/internal/repository/pack"
"github.com/restic/restic/internal/restic"
"github.com/restic/restic/internal/debug"
@ -116,7 +117,18 @@ var IndexFull = func(idx *Index) bool {
debug.Log("index %p only has %d blobs and is too young (%v)", idx, blobs, age)
return false
}
var IndexOversized = func(idx *Index) bool {
idx.m.RLock()
defer idx.m.RUnlock()
var blobs uint
for typ := range idx.byType {
blobs += idx.byType[typ].len()
}
return blobs >= indexMaxBlobs+pack.MaxHeaderEntries
}
// StorePack remembers the ids of all blobs of a given pack

View File

@ -419,7 +419,7 @@ func (mi *MasterIndex) Rewrite(ctx context.Context, repo restic.Unpacked[restic.
newIndex := NewIndex()
for task := range rewriteCh {
// always rewrite indexes that include a pack that must be removed or that are not full
if len(task.idx.Packs().Intersect(excludePacks)) == 0 && IndexFull(task.idx) {
if len(task.idx.Packs().Intersect(excludePacks)) == 0 && IndexFull(task.idx) && !IndexOversized(task.idx) {
// make sure that each pack is only stored exactly once in the index
excludePacks.Merge(task.idx.Packs())
// index is already up to date

View File

@ -215,6 +215,11 @@ const (
eagerEntries = 15
)
var (
// MaxHeaderEntries is the number of entries a pack file can contain at most
MaxHeaderEntries = (MaxHeaderSize - headerSize) / entrySize
)
// readRecords reads up to bufsize bytes from the underlying ReaderAt, returning
// the raw header, the total number of bytes in the header, and any error.
// If the header contains fewer than bufsize bytes, the header is truncated to