diff --git a/backend/dropbox/dropbox.go b/backend/dropbox/dropbox.go index 82eee8e69..b02ff6f4b 100644 --- a/backend/dropbox/dropbox.go +++ b/backend/dropbox/dropbox.go @@ -92,6 +92,9 @@ const ( maxFileNameLength = 255 ) +type exportAPIFormat string +type exportExtension string // dotless + var ( // Description of how to auth for this app dropboxConfig = &oauthutil.Config{ @@ -132,6 +135,16 @@ var ( DefaultTimeoutAsync: 10 * time.Second, DefaultBatchSizeAsync: 100, } + + exportKnownAPIFormats = map[exportAPIFormat]exportExtension{ + "markdown": "md", + "html": "html", + } + // Populated based on exportKnownAPIFormats + exportKnownExtensions = map[exportExtension]exportAPIFormat{} + + paperExtension = ".paper" + paperTemplateExtension = ".papert" ) // Gets an oauth config with the right scopes @@ -247,23 +260,61 @@ folders.`, Help: "Specify a different Dropbox namespace ID to use as the root for all paths.", Default: "", Advanced: true, - }}...), defaultBatcherOptions.FsOptions("For full info see [the main docs](https://rclone.org/dropbox/#batch-mode)\n\n")...), + }, { + Name: "export_formats", + Help: `Comma separated list of preferred formats for exporting files + +Certain Dropbox files can only be accessed by exporting them to another format. +These include Dropbox Paper documents. + +For each such file, rclone will choose the first format on this list that Dropbox +considers valid. If none is valid, it will choose Dropbox's default format. + +Known formats include: "html", "md" (markdown)`, + Default: fs.CommaSepList{"html", "md"}, + Advanced: true, + }, { + Name: "skip_exports", + Help: "Skip exportable files in all listings.\n\nIf given, exportable files practically become invisible to rclone.", + Default: false, + Advanced: true, + }, { + Name: "show_all_exports", + Default: false, + Help: `Show all exportable files in listings. + +Adding this flag will allow all exportable files to be server side copied. +Note that rclone doesn't add extensions to the exportable file names in this mode. + +Do **not** use this flag when trying to download exportable files - rclone +will fail to download them. +`, + Advanced: true, + }, + }...), defaultBatcherOptions.FsOptions("For full info see [the main docs](https://rclone.org/dropbox/#batch-mode)\n\n")...), }) + + for apiFormat, ext := range exportKnownAPIFormats { + exportKnownExtensions[ext] = apiFormat + } } // Options defines the configuration for this backend type Options struct { - ChunkSize fs.SizeSuffix `config:"chunk_size"` - Impersonate string `config:"impersonate"` - SharedFiles bool `config:"shared_files"` - SharedFolders bool `config:"shared_folders"` - BatchMode string `config:"batch_mode"` - BatchSize int `config:"batch_size"` - BatchTimeout fs.Duration `config:"batch_timeout"` - AsyncBatch bool `config:"async_batch"` - PacerMinSleep fs.Duration `config:"pacer_min_sleep"` - Enc encoder.MultiEncoder `config:"encoding"` - RootNsid string `config:"root_namespace"` + ChunkSize fs.SizeSuffix `config:"chunk_size"` + Impersonate string `config:"impersonate"` + SharedFiles bool `config:"shared_files"` + SharedFolders bool `config:"shared_folders"` + BatchMode string `config:"batch_mode"` + BatchSize int `config:"batch_size"` + BatchTimeout fs.Duration `config:"batch_timeout"` + AsyncBatch bool `config:"async_batch"` + PacerMinSleep fs.Duration `config:"pacer_min_sleep"` + Enc encoder.MultiEncoder `config:"encoding"` + RootNsid string `config:"root_namespace"` + ExportFormats fs.CommaSepList `config:"export_formats"` + SkipExports bool `config:"skip_exports"` + ShowAllExports bool `config:"show_all_exports"` } // Fs represents a remote dropbox server @@ -283,8 +334,18 @@ type Fs struct { pacer *fs.Pacer // To pace the API calls ns string // The namespace we are using or "" for none batcher *batcher.Batcher[*files.UploadSessionFinishArg, *files.FileMetadata] + exportExts []exportExtension } +type exportType int + +const ( + notExport exportType = iota // a regular file + exportHide // should be hidden + exportListOnly // listable, but can't export + exportExportable // can export +) + // Object describes a dropbox object // // Dropbox Objects always have full metadata @@ -296,6 +357,9 @@ type Object struct { bytes int64 // size of the object modTime time.Time // time it was last modified hash string // content_hash of the object + + exportType exportType + exportAPIFormat exportAPIFormat } // Name of the remote (as passed into NewFs) @@ -436,6 +500,14 @@ func NewFs(ctx context.Context, name, root string, m configmap.Mapper) (fs.Fs, e HeaderGenerator: f.headerGenerator, } + for _, e := range opt.ExportFormats { + ext := exportExtension(e) + if exportKnownExtensions[ext] == "" { + return nil, fmt.Errorf("dropbox: unknown export format '%s'", e) + } + f.exportExts = append(f.exportExts, ext) + } + // unauthorized config for endpoints that fail with auth ucfg := dropbox.Config{ LogLevel: dropbox.LogOff, // logging in the SDK: LogOff, LogDebug, LogInfo @@ -588,38 +660,126 @@ func (f *Fs) setRoot(root string) { } } +type getMetadataResult struct { + entry files.IsMetadata + notFound bool + err error +} + // getMetadata gets the metadata for a file or directory -func (f *Fs) getMetadata(ctx context.Context, objPath string) (entry files.IsMetadata, notFound bool, err error) { - err = f.pacer.Call(func() (bool, error) { - entry, err = f.srv.GetMetadata(&files.GetMetadataArg{ +func (f *Fs) getMetadata(ctx context.Context, objPath string) (res getMetadataResult) { + res.err = f.pacer.Call(func() (bool, error) { + res.entry, res.err = f.srv.GetMetadata(&files.GetMetadataArg{ Path: f.opt.Enc.FromStandardPath(objPath), }) - return shouldRetry(ctx, err) + return shouldRetry(ctx, res.err) }) - if err != nil { - switch e := err.(type) { + if res.err != nil { + switch e := res.err.(type) { case files.GetMetadataAPIError: if e.EndpointError != nil && e.EndpointError.Path != nil && e.EndpointError.Path.Tag == files.LookupErrorNotFound { - notFound = true - err = nil + res.notFound = true + res.err = nil } } } return } -// getFileMetadata gets the metadata for a file -func (f *Fs) getFileMetadata(ctx context.Context, filePath string) (fileInfo *files.FileMetadata, err error) { - entry, notFound, err := f.getMetadata(ctx, filePath) - if err != nil { - return nil, err +// Get metadata such that the result would be exported with the given extension +// Return a channel that will eventually receive the metadata +func (f *Fs) getMetadataForExt(ctx context.Context, filePath string, wantExportExtension exportExtension) chan getMetadataResult { + ch := make(chan getMetadataResult, 1) + wantDownloadable := (wantExportExtension == "") + go func() { + defer close(ch) + + res := f.getMetadata(ctx, filePath) + info, ok := res.entry.(*files.FileMetadata) + if !ok { // Can't check anything about file, just return what we have + ch <- res + return + } + + // Return notFound if downloadability or extension doesn't match + if wantDownloadable != info.IsDownloadable { + ch <- getMetadataResult{notFound: true} + return + } + if !info.IsDownloadable { + _, ext := f.chooseExportFormat(info) + if ext != wantExportExtension { + ch <- getMetadataResult{notFound: true} + return + } + } + + // Return our real result or error + ch <- res + }() + return ch +} + +// For a given rclone-path, figure out what the Dropbox-path may be, in order of preference. +// Multiple paths might be plausible, due to export path munging. +func (f *Fs) possibleMetadatas(ctx context.Context, filePath string) (ret []<-chan getMetadataResult) { + ret = []<-chan getMetadataResult{} + + // Prefer an exact match + ret = append(ret, f.getMetadataForExt(ctx, filePath, "")) + + // Check if we're plausibly an export path, otherwise we're done + if f.opt.SkipExports || f.opt.ShowAllExports { + return } - if notFound { + dotted := path.Ext(filePath) + if dotted == "" { + return + } + ext := exportExtension(dotted[1:]) + if exportKnownExtensions[ext] == "" { + return + } + + // We might be an export path! Try all possibilities + base := strings.TrimSuffix(filePath, dotted) + + // `foo.papert.md` will only come from `foo.papert`. Never check something like `foo.papert.paper` + if strings.HasSuffix(base, paperTemplateExtension) { + ret = append(ret, f.getMetadataForExt(ctx, base, ext)) + return + } + + // Otherwise, try both `foo.md` coming from `foo`, or from `foo.paper` + ret = append(ret, f.getMetadataForExt(ctx, base, ext)) + ret = append(ret, f.getMetadataForExt(ctx, base+paperExtension, ext)) + return +} + +// getFileMetadata gets the metadata for a file +func (f *Fs) getFileMetadata(ctx context.Context, filePath string) (*files.FileMetadata, error) { + var res getMetadataResult + + // Try all possible metadatas + possibleMetadatas := f.possibleMetadatas(ctx, filePath) + for _, ch := range possibleMetadatas { + res = <-ch + + if res.err != nil { + return nil, res.err + } + if !res.notFound { + break + } + } + + if res.notFound { return nil, fs.ErrorObjectNotFound } - fileInfo, ok := entry.(*files.FileMetadata) + + fileInfo, ok := res.entry.(*files.FileMetadata) if !ok { - if _, ok = entry.(*files.FolderMetadata); ok { + if _, ok = res.entry.(*files.FolderMetadata); ok { return nil, fs.ErrorIsDir } return nil, fs.ErrorNotAFile @@ -628,15 +788,15 @@ func (f *Fs) getFileMetadata(ctx context.Context, filePath string) (fileInfo *fi } // getDirMetadata gets the metadata for a directory -func (f *Fs) getDirMetadata(ctx context.Context, dirPath string) (dirInfo *files.FolderMetadata, err error) { - entry, notFound, err := f.getMetadata(ctx, dirPath) - if err != nil { - return nil, err +func (f *Fs) getDirMetadata(ctx context.Context, dirPath string) (*files.FolderMetadata, error) { + res := f.getMetadata(ctx, dirPath) + if res.err != nil { + return nil, res.err } - if notFound { + if res.notFound { return nil, fs.ErrorDirNotFound } - dirInfo, ok := entry.(*files.FolderMetadata) + dirInfo, ok := res.entry.(*files.FolderMetadata) if !ok { return nil, fs.ErrorIsFile } @@ -836,16 +996,15 @@ func (f *Fs) List(ctx context.Context, dir string) (entries fs.DirEntries, err e var res *files.ListFolderResult for { if !started { - arg := files.ListFolderArg{ - Path: f.opt.Enc.FromStandardPath(root), - Recursive: false, - Limit: 1000, - } + arg := files.NewListFolderArg(f.opt.Enc.FromStandardPath(root)) + arg.Recursive = false + arg.Limit = 1000 + if root == "/" { arg.Path = "" // Specify root folder as empty string } err = f.pacer.Call(func() (bool, error) { - res, err = f.srv.ListFolder(&arg) + res, err = f.srv.ListFolder(arg) return shouldRetry(ctx, err) }) if err != nil { @@ -898,7 +1057,9 @@ func (f *Fs) List(ctx context.Context, dir string) (entries fs.DirEntries, err e if err != nil { return nil, err } - entries = append(entries, o) + if o.(*Object).exportType.listable() { + entries = append(entries, o) + } } } if !res.HasMore { @@ -984,16 +1145,14 @@ func (f *Fs) purgeCheck(ctx context.Context, dir string, check bool) (err error) } // check directory empty - arg := files.ListFolderArg{ - Path: encRoot, - Recursive: false, - } + arg := files.NewListFolderArg(encRoot) + arg.Recursive = false if root == "/" { arg.Path = "" // Specify root folder as empty string } var res *files.ListFolderResult err = f.pacer.Call(func() (bool, error) { - res, err = f.srv.ListFolder(&arg) + res, err = f.srv.ListFolder(arg) return shouldRetry(ctx, err) }) if err != nil { @@ -1338,16 +1497,14 @@ func (f *Fs) changeNotifyCursor(ctx context.Context) (cursor string, err error) var startCursor *files.ListFolderGetLatestCursorResult err = f.pacer.Call(func() (bool, error) { - arg := files.ListFolderArg{ - Path: f.opt.Enc.FromStandardPath(f.slashRoot), - Recursive: true, - } + arg := files.NewListFolderArg(f.opt.Enc.FromStandardPath(f.slashRoot)) + arg.Recursive = true if arg.Path == "/" { arg.Path = "" } - startCursor, err = f.srv.ListFolderGetLatestCursor(&arg) + startCursor, err = f.srv.ListFolderGetLatestCursor(arg) return shouldRetry(ctx, err) }) @@ -1451,8 +1608,50 @@ func (f *Fs) Shutdown(ctx context.Context) error { return nil } +func (f *Fs) chooseExportFormat(info *files.FileMetadata) (exportAPIFormat, exportExtension) { + // Find API export formats Dropbox supports for this file + // Sometimes Dropbox lists a format in ExportAs but not ExportOptions, so check both + ei := info.ExportInfo + dropboxFormatStrings := append([]string{ei.ExportAs}, ei.ExportOptions...) + + // Find which extensions these correspond to + exportExtensions := map[exportExtension]exportAPIFormat{} + var dropboxPreferredAPIFormat exportAPIFormat + var dropboxPreferredExtension exportExtension + for _, format := range dropboxFormatStrings { + apiFormat := exportAPIFormat(format) + // Only consider formats we know about + if ext, ok := exportKnownAPIFormats[apiFormat]; ok { + if dropboxPreferredAPIFormat == "" { + dropboxPreferredAPIFormat = apiFormat + dropboxPreferredExtension = ext + } + exportExtensions[ext] = apiFormat + } + } + + // See if the user picked a valid extension + for _, ext := range f.exportExts { + if apiFormat, ok := exportExtensions[ext]; ok { + return apiFormat, ext + } + } + + // If no matches, prefer the first valid format Dropbox lists + return dropboxPreferredAPIFormat, dropboxPreferredExtension +} + // ------------------------------------------------------------ +func (et exportType) listable() bool { + return et != exportHide +} + +// something we should _try_ to export +func (et exportType) exportable() bool { + return et == exportExportable || et == exportListOnly +} + // Fs returns the parent Fs func (o *Object) Fs() fs.Info { return o.fs @@ -1496,6 +1695,32 @@ func (o *Object) Size() int64 { return o.bytes } +func (o *Object) setMetadataForExport(info *files.FileMetadata) { + o.bytes = -1 + o.hash = "" + + if o.fs.opt.SkipExports { + o.exportType = exportHide + return + } + if o.fs.opt.ShowAllExports { + o.exportType = exportListOnly + return + } + + var exportExt exportExtension + o.exportAPIFormat, exportExt = o.fs.chooseExportFormat(info) + if o.exportAPIFormat == "" { + o.exportType = exportHide + } else { + o.exportType = exportExportable + // get rid of any paper extension, if present + o.remote = strings.TrimSuffix(o.remote, paperExtension) + // add the export extension + o.remote += "." + string(exportExt) + } +} + // setMetadataFromEntry sets the fs data from a files.FileMetadata // // This isn't a complete set of metadata and has an inaccurate date @@ -1504,6 +1729,10 @@ func (o *Object) setMetadataFromEntry(info *files.FileMetadata) error { o.bytes = int64(info.Size) o.modTime = info.ClientModified o.hash = info.ContentHash + + if !info.IsDownloadable { + o.setMetadataForExport(info) + } return nil } @@ -1567,6 +1796,27 @@ func (o *Object) Storable() bool { return true } +func (o *Object) export(ctx context.Context) (in io.ReadCloser, err error) { + if o.exportType == exportListOnly || o.exportAPIFormat == "" { + fs.Debugf(o.remote, "No export format found") + return nil, fs.ErrorObjectNotFound + } + + arg := files.ExportArg{Path: o.id, ExportFormat: string(o.exportAPIFormat)} + var exportResult *files.ExportResult + err = o.fs.pacer.Call(func() (bool, error) { + exportResult, in, err = o.fs.srv.Export(&arg) + return shouldRetry(ctx, err) + }) + if err != nil { + return nil, err + } + + o.bytes = int64(exportResult.ExportMetadata.Size) + o.hash = exportResult.ExportMetadata.ExportHash + return +} + // Open an object for read func (o *Object) Open(ctx context.Context, options ...fs.OpenOption) (in io.ReadCloser, err error) { if o.fs.opt.SharedFiles { @@ -1586,6 +1836,10 @@ func (o *Object) Open(ctx context.Context, options ...fs.OpenOption) (in io.Read return } + if o.exportType.exportable() { + return o.export(ctx) + } + fs.FixRangeOption(options, o.bytes) headers := fs.OpenOptionHeaders(options) arg := files.DownloadArg{ diff --git a/backend/dropbox/dropbox_internal_test.go b/backend/dropbox/dropbox_internal_test.go index 75bbf4798..431660ab3 100644 --- a/backend/dropbox/dropbox_internal_test.go +++ b/backend/dropbox/dropbox_internal_test.go @@ -1,9 +1,16 @@ package dropbox import ( + "context" + "io" + "strings" "testing" + "github.com/dropbox/dropbox-sdk-go-unofficial/v6/dropbox" + "github.com/dropbox/dropbox-sdk-go-unofficial/v6/dropbox/files" + "github.com/rclone/rclone/fstest/fstests" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) func TestInternalCheckPathLength(t *testing.T) { @@ -42,3 +49,54 @@ func TestInternalCheckPathLength(t *testing.T) { assert.Equal(t, test.ok, err == nil, test.in) } } + +func (f *Fs) importPaperForTest(t *testing.T) { + content := `# test doc + +Lorem ipsum __dolor__ sit amet +[link](http://google.com) +` + + arg := files.PaperCreateArg{ + Path: f.slashRootSlash + "export.paper", + ImportFormat: &files.ImportFormat{Tagged: dropbox.Tagged{Tag: files.ImportFormatMarkdown}}, + } + var err error + err = f.pacer.Call(func() (bool, error) { + reader := strings.NewReader(content) + _, err = f.srv.PaperCreate(&arg, reader) + return shouldRetry(context.Background(), err) + }) + require.NoError(t, err) +} + +func (f *Fs) InternalTestPaperExport(t *testing.T) { + ctx := context.Background() + f.importPaperForTest(t) + + f.exportExts = []exportExtension{"html"} + + obj, err := f.NewObject(ctx, "export.html") + require.NoError(t, err) + + rc, err := obj.Open(ctx) + require.NoError(t, err) + defer func() { require.NoError(t, rc.Close()) }() + + buf, err := io.ReadAll(rc) + require.NoError(t, err) + text := string(buf) + + for _, excerpt := range []string{ + "Lorem ipsum", + "dolor", + `href="http://google.com"`, + } { + require.Contains(t, text, excerpt) + } +} +func (f *Fs) InternalTest(t *testing.T) { + t.Run("PaperExport", f.InternalTestPaperExport) +} + +var _ fstests.InternalTester = (*Fs)(nil) diff --git a/docs/content/dropbox.md b/docs/content/dropbox.md index 219e16f43..6a9211299 100644 --- a/docs/content/dropbox.md +++ b/docs/content/dropbox.md @@ -190,6 +190,42 @@ with `--dropbox-batch-mode async` then do a final transfer with Note that there may be a pause when quitting rclone while rclone finishes up the last batch using this mode. +### Exporting files + +Certain files in Dropbox are "exportable", such as Dropbox Paper +documents. These files need to be converted to another format in +order to be downloaded. Often multiple formats are available for +conversion. + +When rclone downloads a exportable file, it chooses the format to +download based on the `--dropbox-export-formats` setting. By +default, the export formats are `html,md`, which are sensible +defaults for Dropbox Paper. + +Rclone chooses the first format ID in the export formats list that +Dropbox supports for a given file. If no format in the list is +usable, rclone will choose the default format that Dropbox suggests. + +Rclone will change the extension to correspond to the export format. +Here are some examples of how extensions are mapped: + +| File type | Filename in Dropbox | Filename in rclone | +|----------------|---------------------|--------------------| +| Paper | mydoc.paper | mydoc.html | +| Paper template | mydoc.papert | mydoc.papert.html | +| other | mydoc | mydoc.html | + +_Importing_ exportable files is not yet supported by rclone. + +Here are the supported export extensions known by rclone. Note that +rclone does not currently support other formats not on this list, +even if Dropbox supports them. Also, Dropbox could change the list +of supported formats at any time. + +| Format ID | Name | Description | +|-----------|----------|----------------------| +| html | HTML | HTML document | +| md | Markdown | Markdown text format | {{< rem autogenerated options start" - DO NOT EDIT - instead edit fs.RegInfo in backend/dropbox/dropbox.go then run make backenddocs" >}} ### Standard options @@ -522,6 +558,11 @@ non-personal account otherwise the visibility may not be correct. [forum discussion](https://forum.rclone.org/t/rclone-link-dropbox-permissions/23211) and the [dropbox SDK issue](https://github.com/dropbox/dropbox-sdk-go-unofficial/issues/75). +Modification times for Dropbox Paper documents are not exact, and +may not change for some period after the document is edited. +To make sure you get recent changes in a sync, either wait an hour +or so, or use `--ignore-times` to force a full sync. + ## Get your own Dropbox App ID When you use rclone with Dropbox in its default configuration you are using rclone's App ID. This is shared between all the rclone users.