- Added EntryUncertain to EntryType enum and put to use.

- Make Windows report all paths recursively when existing directory is moved in, for consistency with other platforms.
- Added de-duplication logic. Changes are notified only after at least 1/10 of a second has elapsed since the last change, in case of lengthy writes that can produce a sequence of many write events in quick succession.
This commit is contained in:
Lawrence Murray 2025-02-28 16:47:57 +07:00
parent 96ec03c24c
commit ac602eecc2
6 changed files with 95 additions and 147 deletions

View File

@ -271,6 +271,21 @@ func isMetadataFile(filename string) bool {
return strings.HasSuffix(filename, metaFileExt)
}
// Checks whether a file is a compressed file
func isCompressedFile(filename string) bool {
return strings.HasSuffix(filename, gzFileExt)
}
// Checks whether a file is an uncompressed file
func isUncompressedFile(filename string) bool {
return strings.HasSuffix(filename, uncompressedFileExt)
}
// Checks whether a path is a file, all of which must have one of the special file extensions
func isFile(filename string) bool {
return isMetadataFile(filename) || isCompressedFile(filename) || isUncompressedFile(filename)
}
// Checks whether a file is a metadata file and returns the original
// file name and a flag indicating whether it was a metadata file or
// not.
@ -1022,6 +1037,17 @@ func (f *Fs) ChangeNotify(ctx context.Context, notifyFunc func(string, fs.EntryT
wrappedPath string
isMetadataFile bool
)
// Uncertain entry types can be resolved by the fact that all files in the compressed
// backend have a special file extension
if entryType == fs.EntryUncertain {
if isFile(path) {
entryType = fs.EntryObject
} else {
entryType = fs.EntryDirectory
}
}
switch entryType {
case fs.EntryDirectory:
wrappedPath = path

View File

@ -874,6 +874,17 @@ func (f *Fs) ChangeNotify(ctx context.Context, notifyFunc func(string, fs.EntryT
decrypted, err = f.cipher.DecryptDirName(path)
case fs.EntryObject:
decrypted, err = f.cipher.DecryptFileName(path)
case fs.EntryUncertain:
if f.opt.FilenameEncryption == "on" && !f.opt.DirectoryNameEncryption {
// Uncertain entry types cannot be handled in this case, as not only is the entry
// type uncertain, but also whether or not the name is encrypted
fs.Errorf(path, "crypt ChangeNotify: ignoring EntryType %d as file names are encrypted but directory names are not", fs.EntryUncertain)
return
}
decrypted, err = f.cipher.DecryptFileName(path)
if err == ErrorNotAnEncryptedFile {
decrypted, err = f.cipher.DecryptDirName(path)
}
default:
fs.Errorf(path, "crypt ChangeNotify: ignoring unknown EntryType %d", entryType)
return

View File

@ -35,12 +35,9 @@ func (f *Fs) ChangeNotify(ctx context.Context, notifyFunc func(string, fs.EntryT
return
}
// All known files and directories, used to call notifyFunc() with correct
// entry type even on remove and rename events.
known := make(map[string]fs.EntryType)
// Files and directories that have changed in the last poll window.
changed := make(map[string]fs.EntryType)
// Files and directories changed in the last poll window, mapped to the
// time at which notification of the change was received.
changed := make(map[string]time.Time)
// Channel to handle new paths. Buffered ensures filesystem events keep
// being consumed.
@ -81,11 +78,17 @@ func (f *Fs) ChangeNotify(ctx context.Context, notifyFunc func(string, fs.EntryT
tickerC = ticker.C
}
case <-tickerC:
// notify for all changed paths since last tick
for entryPath, entryType := range changed {
notifyFunc(filepath.ToSlash(entryPath), entryType)
// Notify for all paths that have changed since the last sync, and
// which were changed at least 1/10 of a second (1e8 nanoseconds)
// ago. The lag is for de-duping purposes during long writes, which
// can consist of multiple write notifications in quick succession.
cutoff := time.Now().Add(-1e8)
for entryPath, entryTime := range changed {
if entryTime.Before(cutoff) {
notifyFunc(filepath.ToSlash(entryPath), fs.EntryUncertain)
delete(changed, entryPath)
}
}
changed = make(map[string]fs.EntryType)
case event, ok := <-watcher.Events:
if !ok {
break loop
@ -109,26 +112,10 @@ func (f *Fs) ChangeNotify(ctx context.Context, notifyFunc func(string, fs.EntryT
if event.Has(fsnotify.Create) {
fs.Debugf(f, "Create: %s", event.Name)
watchChan <- event.Name
<-replyChan // implies mutex on 'known' and 'changed'
<-replyChan // implies mutex on 'changed'
} else {
// Determine the entry type (file or directory) using 'known'. This
// is instead of Stat(), say, which is both expensive (a system
// call) and does not work if the entry has been removed (including
// removed before a creation or write event is handled).
entryPath, _ := filepath.Rel(f.root, event.Name)
entryType, ok := known[entryPath]
if !ok {
// By the time the create event was handled for this entry, it was
// already deleted, and it could not be determined whether it was
// a file or directory. It is ignored, as it does not affect the
// state of the filesystem between the previous tick and the next
// tick.
} else {
changed[entryPath] = entryType
if event.Has(fsnotify.Remove) || event.Has(fsnotify.Rename) {
delete(known, entryPath)
}
}
changed[entryPath] = time.Now()
// Internally, fsnotify stops watching directories that are removed
// or renamed, so it is not necessary to make updates to the watch
@ -153,7 +140,7 @@ func (f *Fs) ChangeNotify(ctx context.Context, notifyFunc func(string, fs.EntryT
}
}()
// Start goroutine to establish watchers and update 'known'
// Start goroutine to establish watchers
go func() {
for {
path, ok := <-watchChan
@ -189,16 +176,11 @@ func (f *Fs) ChangeNotify(ctx context.Context, notifyFunc func(string, fs.EntryT
} else if info.IsDir() {
entryType = fs.EntryDirectory
}
}
// Record known and possibly changed
known[entryPath] = entryType
if !initial {
changed[entryPath] = entryType
changed[entryPath] = time.Now()
}
if entryType == fs.EntryDirectory {
// Recursively watch the directory and populate 'known'
// Recursively watch the directory
err := watcher.Add(path)
if err != nil {
fs.Errorf(f, "Failed to start watching %s, already removed? %s", path, err)
@ -215,20 +197,14 @@ func (f *Fs) ChangeNotify(ctx context.Context, notifyFunc func(string, fs.EntryT
}
for _, d := range entries {
entryPath := d.Remote()
entryType := fs.EntryObject
path := filepath.Join(f.root, entryPath)
info, err := os.Lstat(path)
if err != nil {
fs.Errorf(f, "Failed to stat %s, already removed? %s", path, err)
continue
}
if info.IsDir() {
entryType = fs.EntryDirectory
}
known[entryPath] = entryType
if !initial {
changed[entryPath] = entryType
changed[entryPath] = time.Now()
}
if info.IsDir() {
// Watch the directory.

View File

@ -4,7 +4,6 @@ package local
import (
"context"
"os"
"path/filepath"
"time"
_ "unsafe" // use go:linkname
@ -46,36 +45,9 @@ func (f *Fs) ChangeNotify(ctx context.Context, notifyFunc func(string, fs.EntryT
fs.Debugf(f, "Started watching %s", f.root)
}
// All known files and directories, used to call notifyFunc() with correct
// entry type even on remove and rename events.
known := make(map[string]fs.EntryType)
// Files and directories that have changed in the last poll window.
changed := make(map[string]fs.EntryType)
// Walk the root directory to populate 'known'
known[""] = fs.EntryDirectory
err = walk.Walk(ctx, f, "", false, -1, func(entryPath string, entries fs.DirEntries, err error) error {
if err != nil {
fs.Errorf(f, "Failed to walk %s, already removed? %s", entryPath, err)
} else {
entryType := fs.EntryObject
path := filepath.Join(f.root, entryPath)
info, err := os.Lstat(path)
if err != nil {
fs.Errorf(f, "Failed to stat %s, already removed? %s", path, err)
} else {
if info.IsDir() {
entryType = fs.EntryDirectory
}
known[entryPath] = entryType
}
}
return nil
})
if err != nil {
fs.Errorf(f, "Failed to walk root, already removed? %s", err)
}
// Files and directories changed in the last poll window, mapped to the
// time at which notification of the change was received.
changed := make(map[string]time.Time)
// Start goroutine to handle filesystem events
go func() {
@ -109,11 +81,17 @@ func (f *Fs) ChangeNotify(ctx context.Context, notifyFunc func(string, fs.EntryT
tickerC = ticker.C
}
case <-tickerC:
// notify for all changed paths since last tick
for entryPath, entryType := range changed {
notifyFunc(filepath.ToSlash(entryPath), entryType)
// Notify for all paths that have changed since the last sync, and
// which were changed at least 1/10 of a second (1e8 nanoseconds)
// ago. The lag is for de-duping purposes during long writes, which
// can consist of multiple write notifications in quick succession.
cutoff := time.Now().Add(-1e8)
for entryPath, entryTime := range changed {
if entryTime.Before(cutoff) {
notifyFunc(filepath.ToSlash(entryPath), fs.EntryUncertain)
delete(changed, entryPath)
}
}
changed = make(map[string]fs.EntryType)
case event, ok := <-watcher.Events:
if !ok {
break loop
@ -133,77 +111,27 @@ func (f *Fs) ChangeNotify(ctx context.Context, notifyFunc func(string, fs.EntryT
if event.Has(fsnotify.Chmod) {
fs.Debugf(f, "Chmod: %s", event.Name)
}
// Determine the entry type (file or directory) using 'known'. This
// is instead of Stat(), say, which is both expensive (a system
// call) and does not work if the entry has been removed (including
// removed before a creation or write event is handled).
entryPath, _ := filepath.Rel(f.root, event.Name)
entryType := fs.EntryObject
changed[entryPath] = time.Now()
if event.Has(fsnotify.Create) {
// Stat to determine whether entry is a file or directory
info, err := os.Lstat(event.Name)
err = walk.Walk(ctx, f, entryPath, false, -1, func(entryPath string, entries fs.DirEntries, err error) error {
if err != nil {
// Entry has already been deleted, so cannot determine whether it
// was a file or directory. It is ignored, as it does not affect
// The entry has already been removed, and we do not know what
// type it was. It can be ignored, as this means it has been both
// created and removed since the last tick, which will not change
// the diff at the next tick.
} else if info.IsDir() {
entryType = fs.EntryDirectory
known[entryPath] = entryType
changed[entryPath] = entryType
// TODO: Recursively add to 'known' and 'changed'
//
// The issue here is that the walk triggers errors, "The
// process cannot access the file because it is being
// used by another process."
//
// err = walk.Walk(ctx, f, entryPath, false, -1, func(entryPath string, entries fs.DirEntries, err error) error {
// if err != nil {
// fs.Errorf(f, "Failed to walk %s, already removed? %s", entryPath, err)
// } else {
// entryType := fs.EntryObject
// path := filepath.Join(f.root, entryPath)
// info, err := os.Lstat(path)
// if err != nil {
// fs.Errorf(f, "Failed to stat %s, already removed? %s", path, err)
// } else {
// if info.IsDir() {
// entryType = fs.EntryDirectory
// }
// known[entryPath] = entryType
// }
// }
// return nil
// })
// if err != nil {
// fs.Errorf(f, "Failed to walk %s, already removed? %s", entryPath, err)
// }
} else {
known[entryPath] = entryType
changed[entryPath] = entryType
fs.Errorf(f, "Failed to walk %s, already removed? %s", entryPath, err)
}
} else {
entryType, ok := known[entryPath]
if !ok {
// By the time the create event was handled for this
// entry, it was already removed, and it could not be
// determined whether it was a file or directory. It is
// ignored, as it does not affect the diff at the next
// tick.
} else {
changed[entryPath] = entryType
if event.Has(fsnotify.Remove) || event.Has(fsnotify.Rename) {
delete(known, entryPath)
// TODO: Recursively remove from 'known' and
// add to 'changed'.
for _, d := range entries {
entryPath := d.Remote()
changed[entryPath] = time.Now()
}
return nil
})
if err != nil {
fs.Errorf(f, "Failed to walk %s, already removed? %s", entryPath, err)
}
// Internally, fsnotify stops watching directories that are
// removed or renamed, so it is not necessary to make
// updates to the watch list.
}
case err, ok := <-watcher.Errors:
if !ok {

View File

@ -568,6 +568,9 @@ const (
EntryDirectory EntryType = iota // 0
// EntryObject should be used to classify remote paths in objects
EntryObject // 1
// EntryUncertain should be used when a remote path cannot reliably or
// efficiently be classified as EntryDirectory or EntryObject
EntryUncertain // 2
)
// UnWrapper is an optional interfaces for Fs

View File

@ -879,6 +879,7 @@ func Run(t *testing.T, opt *Opt) {
pollInterval := make(chan time.Duration)
dirChanges := map[string]struct{}{}
objChanges := map[string]struct{}{}
uncChanges := map[string]struct{}{}
var mutex sync.Mutex
doChangeNotify(ctx, func(x string, e fs.EntryType) {
fs.Debugf(nil, "doChangeNotify(%q, %+v)", x, e)
@ -891,6 +892,8 @@ func Run(t *testing.T, opt *Opt) {
dirChanges[x] = struct{}{}
} else if e == fs.EntryObject {
objChanges[x] = struct{}{}
} else if e == fs.EntryUncertain {
uncChanges[x] = struct{}{}
}
mutex.Unlock()
}, pollInterval)
@ -930,19 +933,20 @@ func Run(t *testing.T, opt *Opt) {
// Wait a little while for the changes to come in
wantDirChanges := []string{"dir/subdir1", "dir/subdir3", "dir/subdir2"}
wantObjChanges := []string{"dir/file2", "dir/file4", "dir/file3"}
wantUncChanges := append(wantDirChanges, wantObjChanges...)
ok := false
for tries := 1; tries < 10; tries++ {
mutex.Lock()
ok = contains(dirChanges, wantDirChanges) && contains(objChanges, wantObjChanges)
ok = (contains(dirChanges, wantDirChanges) && contains(objChanges, wantObjChanges)) || contains(uncChanges, wantUncChanges)
mutex.Unlock()
if ok {
break
}
t.Logf("Try %d/10 waiting for dirChanges and objChanges", tries)
t.Logf("Try %d/10 waiting for dirChanges, objChanges, and uncChanges", tries)
time.Sleep(3 * time.Second)
}
if !ok {
t.Errorf("%+v does not contain %+v or \n%+v does not contain %+v", dirChanges, wantDirChanges, objChanges, wantObjChanges)
t.Errorf("%+v does not contain %+v or \n%+v does not contain %+v, and %+v does not contain %+v", dirChanges, wantDirChanges, objChanges, wantObjChanges, uncChanges, wantUncChanges)
}
// tidy up afterwards