Dropbox: Support Dropbox Paper

These files must be "exported" to be useful. The export process
is controlled by the --dropbox-export-formats flag  and the ancilliary flags
--dropbox-skip-exports and --dropbox-show-all-exports modeled on the
Google drive equivalents
This commit is contained in:
Dave Vasilevsky 2025-02-17 13:20:37 -05:00 committed by GitHub
parent cdfd748241
commit 904c9b2e24
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 404 additions and 51 deletions

View File

@ -92,6 +92,9 @@ const (
maxFileNameLength = 255 maxFileNameLength = 255
) )
type exportAPIFormat string
type exportExtension string // dotless
var ( var (
// Description of how to auth for this app // Description of how to auth for this app
dropboxConfig = &oauthutil.Config{ dropboxConfig = &oauthutil.Config{
@ -132,6 +135,16 @@ var (
DefaultTimeoutAsync: 10 * time.Second, DefaultTimeoutAsync: 10 * time.Second,
DefaultBatchSizeAsync: 100, DefaultBatchSizeAsync: 100,
} }
exportKnownAPIFormats = map[exportAPIFormat]exportExtension{
"markdown": "md",
"html": "html",
}
// Populated based on exportKnownAPIFormats
exportKnownExtensions = map[exportExtension]exportAPIFormat{}
paperExtension = ".paper"
paperTemplateExtension = ".papert"
) )
// Gets an oauth config with the right scopes // Gets an oauth config with the right scopes
@ -247,8 +260,43 @@ folders.`,
Help: "Specify a different Dropbox namespace ID to use as the root for all paths.", Help: "Specify a different Dropbox namespace ID to use as the root for all paths.",
Default: "", Default: "",
Advanced: true, Advanced: true,
}}...), defaultBatcherOptions.FsOptions("For full info see [the main docs](https://rclone.org/dropbox/#batch-mode)\n\n")...), }, {
Name: "export_formats",
Help: `Comma separated list of preferred formats for exporting files
Certain Dropbox files can only be accessed by exporting them to another format.
These include Dropbox Paper documents.
For each such file, rclone will choose the first format on this list that Dropbox
considers valid. If none is valid, it will choose Dropbox's default format.
Known formats include: "html", "md" (markdown)`,
Default: fs.CommaSepList{"html", "md"},
Advanced: true,
}, {
Name: "skip_exports",
Help: "Skip exportable files in all listings.\n\nIf given, exportable files practically become invisible to rclone.",
Default: false,
Advanced: true,
}, {
Name: "show_all_exports",
Default: false,
Help: `Show all exportable files in listings.
Adding this flag will allow all exportable files to be server side copied.
Note that rclone doesn't add extensions to the exportable file names in this mode.
Do **not** use this flag when trying to download exportable files - rclone
will fail to download them.
`,
Advanced: true,
},
}...), defaultBatcherOptions.FsOptions("For full info see [the main docs](https://rclone.org/dropbox/#batch-mode)\n\n")...),
}) })
for apiFormat, ext := range exportKnownAPIFormats {
exportKnownExtensions[ext] = apiFormat
}
} }
// Options defines the configuration for this backend // Options defines the configuration for this backend
@ -264,6 +312,9 @@ type Options struct {
PacerMinSleep fs.Duration `config:"pacer_min_sleep"` PacerMinSleep fs.Duration `config:"pacer_min_sleep"`
Enc encoder.MultiEncoder `config:"encoding"` Enc encoder.MultiEncoder `config:"encoding"`
RootNsid string `config:"root_namespace"` RootNsid string `config:"root_namespace"`
ExportFormats fs.CommaSepList `config:"export_formats"`
SkipExports bool `config:"skip_exports"`
ShowAllExports bool `config:"show_all_exports"`
} }
// Fs represents a remote dropbox server // Fs represents a remote dropbox server
@ -283,8 +334,18 @@ type Fs struct {
pacer *fs.Pacer // To pace the API calls pacer *fs.Pacer // To pace the API calls
ns string // The namespace we are using or "" for none ns string // The namespace we are using or "" for none
batcher *batcher.Batcher[*files.UploadSessionFinishArg, *files.FileMetadata] batcher *batcher.Batcher[*files.UploadSessionFinishArg, *files.FileMetadata]
exportExts []exportExtension
} }
type exportType int
const (
notExport exportType = iota // a regular file
exportHide // should be hidden
exportListOnly // listable, but can't export
exportExportable // can export
)
// Object describes a dropbox object // Object describes a dropbox object
// //
// Dropbox Objects always have full metadata // Dropbox Objects always have full metadata
@ -296,6 +357,9 @@ type Object struct {
bytes int64 // size of the object bytes int64 // size of the object
modTime time.Time // time it was last modified modTime time.Time // time it was last modified
hash string // content_hash of the object hash string // content_hash of the object
exportType exportType
exportAPIFormat exportAPIFormat
} }
// Name of the remote (as passed into NewFs) // Name of the remote (as passed into NewFs)
@ -436,6 +500,14 @@ func NewFs(ctx context.Context, name, root string, m configmap.Mapper) (fs.Fs, e
HeaderGenerator: f.headerGenerator, HeaderGenerator: f.headerGenerator,
} }
for _, e := range opt.ExportFormats {
ext := exportExtension(e)
if exportKnownExtensions[ext] == "" {
return nil, fmt.Errorf("dropbox: unknown export format '%s'", e)
}
f.exportExts = append(f.exportExts, ext)
}
// unauthorized config for endpoints that fail with auth // unauthorized config for endpoints that fail with auth
ucfg := dropbox.Config{ ucfg := dropbox.Config{
LogLevel: dropbox.LogOff, // logging in the SDK: LogOff, LogDebug, LogInfo LogLevel: dropbox.LogOff, // logging in the SDK: LogOff, LogDebug, LogInfo
@ -588,38 +660,126 @@ func (f *Fs) setRoot(root string) {
} }
} }
type getMetadataResult struct {
entry files.IsMetadata
notFound bool
err error
}
// getMetadata gets the metadata for a file or directory // getMetadata gets the metadata for a file or directory
func (f *Fs) getMetadata(ctx context.Context, objPath string) (entry files.IsMetadata, notFound bool, err error) { func (f *Fs) getMetadata(ctx context.Context, objPath string) (res getMetadataResult) {
err = f.pacer.Call(func() (bool, error) { res.err = f.pacer.Call(func() (bool, error) {
entry, err = f.srv.GetMetadata(&files.GetMetadataArg{ res.entry, res.err = f.srv.GetMetadata(&files.GetMetadataArg{
Path: f.opt.Enc.FromStandardPath(objPath), Path: f.opt.Enc.FromStandardPath(objPath),
}) })
return shouldRetry(ctx, err) return shouldRetry(ctx, res.err)
}) })
if err != nil { if res.err != nil {
switch e := err.(type) { switch e := res.err.(type) {
case files.GetMetadataAPIError: case files.GetMetadataAPIError:
if e.EndpointError != nil && e.EndpointError.Path != nil && e.EndpointError.Path.Tag == files.LookupErrorNotFound { if e.EndpointError != nil && e.EndpointError.Path != nil && e.EndpointError.Path.Tag == files.LookupErrorNotFound {
notFound = true res.notFound = true
err = nil res.err = nil
} }
} }
} }
return return
} }
// getFileMetadata gets the metadata for a file // Get metadata such that the result would be exported with the given extension
func (f *Fs) getFileMetadata(ctx context.Context, filePath string) (fileInfo *files.FileMetadata, err error) { // Return a channel that will eventually receive the metadata
entry, notFound, err := f.getMetadata(ctx, filePath) func (f *Fs) getMetadataForExt(ctx context.Context, filePath string, wantExportExtension exportExtension) chan getMetadataResult {
if err != nil { ch := make(chan getMetadataResult, 1)
return nil, err wantDownloadable := (wantExportExtension == "")
go func() {
defer close(ch)
res := f.getMetadata(ctx, filePath)
info, ok := res.entry.(*files.FileMetadata)
if !ok { // Can't check anything about file, just return what we have
ch <- res
return
} }
if notFound {
// Return notFound if downloadability or extension doesn't match
if wantDownloadable != info.IsDownloadable {
ch <- getMetadataResult{notFound: true}
return
}
if !info.IsDownloadable {
_, ext := f.chooseExportFormat(info)
if ext != wantExportExtension {
ch <- getMetadataResult{notFound: true}
return
}
}
// Return our real result or error
ch <- res
}()
return ch
}
// For a given rclone-path, figure out what the Dropbox-path may be, in order of preference.
// Multiple paths might be plausible, due to export path munging.
func (f *Fs) possibleMetadatas(ctx context.Context, filePath string) (ret []<-chan getMetadataResult) {
ret = []<-chan getMetadataResult{}
// Prefer an exact match
ret = append(ret, f.getMetadataForExt(ctx, filePath, ""))
// Check if we're plausibly an export path, otherwise we're done
if f.opt.SkipExports || f.opt.ShowAllExports {
return
}
dotted := path.Ext(filePath)
if dotted == "" {
return
}
ext := exportExtension(dotted[1:])
if exportKnownExtensions[ext] == "" {
return
}
// We might be an export path! Try all possibilities
base := strings.TrimSuffix(filePath, dotted)
// `foo.papert.md` will only come from `foo.papert`. Never check something like `foo.papert.paper`
if strings.HasSuffix(base, paperTemplateExtension) {
ret = append(ret, f.getMetadataForExt(ctx, base, ext))
return
}
// Otherwise, try both `foo.md` coming from `foo`, or from `foo.paper`
ret = append(ret, f.getMetadataForExt(ctx, base, ext))
ret = append(ret, f.getMetadataForExt(ctx, base+paperExtension, ext))
return
}
// getFileMetadata gets the metadata for a file
func (f *Fs) getFileMetadata(ctx context.Context, filePath string) (*files.FileMetadata, error) {
var res getMetadataResult
// Try all possible metadatas
possibleMetadatas := f.possibleMetadatas(ctx, filePath)
for _, ch := range possibleMetadatas {
res = <-ch
if res.err != nil {
return nil, res.err
}
if !res.notFound {
break
}
}
if res.notFound {
return nil, fs.ErrorObjectNotFound return nil, fs.ErrorObjectNotFound
} }
fileInfo, ok := entry.(*files.FileMetadata)
fileInfo, ok := res.entry.(*files.FileMetadata)
if !ok { if !ok {
if _, ok = entry.(*files.FolderMetadata); ok { if _, ok = res.entry.(*files.FolderMetadata); ok {
return nil, fs.ErrorIsDir return nil, fs.ErrorIsDir
} }
return nil, fs.ErrorNotAFile return nil, fs.ErrorNotAFile
@ -628,15 +788,15 @@ func (f *Fs) getFileMetadata(ctx context.Context, filePath string) (fileInfo *fi
} }
// getDirMetadata gets the metadata for a directory // getDirMetadata gets the metadata for a directory
func (f *Fs) getDirMetadata(ctx context.Context, dirPath string) (dirInfo *files.FolderMetadata, err error) { func (f *Fs) getDirMetadata(ctx context.Context, dirPath string) (*files.FolderMetadata, error) {
entry, notFound, err := f.getMetadata(ctx, dirPath) res := f.getMetadata(ctx, dirPath)
if err != nil { if res.err != nil {
return nil, err return nil, res.err
} }
if notFound { if res.notFound {
return nil, fs.ErrorDirNotFound return nil, fs.ErrorDirNotFound
} }
dirInfo, ok := entry.(*files.FolderMetadata) dirInfo, ok := res.entry.(*files.FolderMetadata)
if !ok { if !ok {
return nil, fs.ErrorIsFile return nil, fs.ErrorIsFile
} }
@ -836,16 +996,15 @@ func (f *Fs) List(ctx context.Context, dir string) (entries fs.DirEntries, err e
var res *files.ListFolderResult var res *files.ListFolderResult
for { for {
if !started { if !started {
arg := files.ListFolderArg{ arg := files.NewListFolderArg(f.opt.Enc.FromStandardPath(root))
Path: f.opt.Enc.FromStandardPath(root), arg.Recursive = false
Recursive: false, arg.Limit = 1000
Limit: 1000,
}
if root == "/" { if root == "/" {
arg.Path = "" // Specify root folder as empty string arg.Path = "" // Specify root folder as empty string
} }
err = f.pacer.Call(func() (bool, error) { err = f.pacer.Call(func() (bool, error) {
res, err = f.srv.ListFolder(&arg) res, err = f.srv.ListFolder(arg)
return shouldRetry(ctx, err) return shouldRetry(ctx, err)
}) })
if err != nil { if err != nil {
@ -898,9 +1057,11 @@ func (f *Fs) List(ctx context.Context, dir string) (entries fs.DirEntries, err e
if err != nil { if err != nil {
return nil, err return nil, err
} }
if o.(*Object).exportType.listable() {
entries = append(entries, o) entries = append(entries, o)
} }
} }
}
if !res.HasMore { if !res.HasMore {
break break
} }
@ -984,16 +1145,14 @@ func (f *Fs) purgeCheck(ctx context.Context, dir string, check bool) (err error)
} }
// check directory empty // check directory empty
arg := files.ListFolderArg{ arg := files.NewListFolderArg(encRoot)
Path: encRoot, arg.Recursive = false
Recursive: false,
}
if root == "/" { if root == "/" {
arg.Path = "" // Specify root folder as empty string arg.Path = "" // Specify root folder as empty string
} }
var res *files.ListFolderResult var res *files.ListFolderResult
err = f.pacer.Call(func() (bool, error) { err = f.pacer.Call(func() (bool, error) {
res, err = f.srv.ListFolder(&arg) res, err = f.srv.ListFolder(arg)
return shouldRetry(ctx, err) return shouldRetry(ctx, err)
}) })
if err != nil { if err != nil {
@ -1338,16 +1497,14 @@ func (f *Fs) changeNotifyCursor(ctx context.Context) (cursor string, err error)
var startCursor *files.ListFolderGetLatestCursorResult var startCursor *files.ListFolderGetLatestCursorResult
err = f.pacer.Call(func() (bool, error) { err = f.pacer.Call(func() (bool, error) {
arg := files.ListFolderArg{ arg := files.NewListFolderArg(f.opt.Enc.FromStandardPath(f.slashRoot))
Path: f.opt.Enc.FromStandardPath(f.slashRoot), arg.Recursive = true
Recursive: true,
}
if arg.Path == "/" { if arg.Path == "/" {
arg.Path = "" arg.Path = ""
} }
startCursor, err = f.srv.ListFolderGetLatestCursor(&arg) startCursor, err = f.srv.ListFolderGetLatestCursor(arg)
return shouldRetry(ctx, err) return shouldRetry(ctx, err)
}) })
@ -1451,8 +1608,50 @@ func (f *Fs) Shutdown(ctx context.Context) error {
return nil return nil
} }
func (f *Fs) chooseExportFormat(info *files.FileMetadata) (exportAPIFormat, exportExtension) {
// Find API export formats Dropbox supports for this file
// Sometimes Dropbox lists a format in ExportAs but not ExportOptions, so check both
ei := info.ExportInfo
dropboxFormatStrings := append([]string{ei.ExportAs}, ei.ExportOptions...)
// Find which extensions these correspond to
exportExtensions := map[exportExtension]exportAPIFormat{}
var dropboxPreferredAPIFormat exportAPIFormat
var dropboxPreferredExtension exportExtension
for _, format := range dropboxFormatStrings {
apiFormat := exportAPIFormat(format)
// Only consider formats we know about
if ext, ok := exportKnownAPIFormats[apiFormat]; ok {
if dropboxPreferredAPIFormat == "" {
dropboxPreferredAPIFormat = apiFormat
dropboxPreferredExtension = ext
}
exportExtensions[ext] = apiFormat
}
}
// See if the user picked a valid extension
for _, ext := range f.exportExts {
if apiFormat, ok := exportExtensions[ext]; ok {
return apiFormat, ext
}
}
// If no matches, prefer the first valid format Dropbox lists
return dropboxPreferredAPIFormat, dropboxPreferredExtension
}
// ------------------------------------------------------------ // ------------------------------------------------------------
func (et exportType) listable() bool {
return et != exportHide
}
// something we should _try_ to export
func (et exportType) exportable() bool {
return et == exportExportable || et == exportListOnly
}
// Fs returns the parent Fs // Fs returns the parent Fs
func (o *Object) Fs() fs.Info { func (o *Object) Fs() fs.Info {
return o.fs return o.fs
@ -1496,6 +1695,32 @@ func (o *Object) Size() int64 {
return o.bytes return o.bytes
} }
func (o *Object) setMetadataForExport(info *files.FileMetadata) {
o.bytes = -1
o.hash = ""
if o.fs.opt.SkipExports {
o.exportType = exportHide
return
}
if o.fs.opt.ShowAllExports {
o.exportType = exportListOnly
return
}
var exportExt exportExtension
o.exportAPIFormat, exportExt = o.fs.chooseExportFormat(info)
if o.exportAPIFormat == "" {
o.exportType = exportHide
} else {
o.exportType = exportExportable
// get rid of any paper extension, if present
o.remote = strings.TrimSuffix(o.remote, paperExtension)
// add the export extension
o.remote += "." + string(exportExt)
}
}
// setMetadataFromEntry sets the fs data from a files.FileMetadata // setMetadataFromEntry sets the fs data from a files.FileMetadata
// //
// This isn't a complete set of metadata and has an inaccurate date // This isn't a complete set of metadata and has an inaccurate date
@ -1504,6 +1729,10 @@ func (o *Object) setMetadataFromEntry(info *files.FileMetadata) error {
o.bytes = int64(info.Size) o.bytes = int64(info.Size)
o.modTime = info.ClientModified o.modTime = info.ClientModified
o.hash = info.ContentHash o.hash = info.ContentHash
if !info.IsDownloadable {
o.setMetadataForExport(info)
}
return nil return nil
} }
@ -1567,6 +1796,27 @@ func (o *Object) Storable() bool {
return true return true
} }
func (o *Object) export(ctx context.Context) (in io.ReadCloser, err error) {
if o.exportType == exportListOnly || o.exportAPIFormat == "" {
fs.Debugf(o.remote, "No export format found")
return nil, fs.ErrorObjectNotFound
}
arg := files.ExportArg{Path: o.id, ExportFormat: string(o.exportAPIFormat)}
var exportResult *files.ExportResult
err = o.fs.pacer.Call(func() (bool, error) {
exportResult, in, err = o.fs.srv.Export(&arg)
return shouldRetry(ctx, err)
})
if err != nil {
return nil, err
}
o.bytes = int64(exportResult.ExportMetadata.Size)
o.hash = exportResult.ExportMetadata.ExportHash
return
}
// Open an object for read // Open an object for read
func (o *Object) Open(ctx context.Context, options ...fs.OpenOption) (in io.ReadCloser, err error) { func (o *Object) Open(ctx context.Context, options ...fs.OpenOption) (in io.ReadCloser, err error) {
if o.fs.opt.SharedFiles { if o.fs.opt.SharedFiles {
@ -1586,6 +1836,10 @@ func (o *Object) Open(ctx context.Context, options ...fs.OpenOption) (in io.Read
return return
} }
if o.exportType.exportable() {
return o.export(ctx)
}
fs.FixRangeOption(options, o.bytes) fs.FixRangeOption(options, o.bytes)
headers := fs.OpenOptionHeaders(options) headers := fs.OpenOptionHeaders(options)
arg := files.DownloadArg{ arg := files.DownloadArg{

View File

@ -1,9 +1,16 @@
package dropbox package dropbox
import ( import (
"context"
"io"
"strings"
"testing" "testing"
"github.com/dropbox/dropbox-sdk-go-unofficial/v6/dropbox"
"github.com/dropbox/dropbox-sdk-go-unofficial/v6/dropbox/files"
"github.com/rclone/rclone/fstest/fstests"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
) )
func TestInternalCheckPathLength(t *testing.T) { func TestInternalCheckPathLength(t *testing.T) {
@ -42,3 +49,54 @@ func TestInternalCheckPathLength(t *testing.T) {
assert.Equal(t, test.ok, err == nil, test.in) assert.Equal(t, test.ok, err == nil, test.in)
} }
} }
func (f *Fs) importPaperForTest(t *testing.T) {
content := `# test doc
Lorem ipsum __dolor__ sit amet
[link](http://google.com)
`
arg := files.PaperCreateArg{
Path: f.slashRootSlash + "export.paper",
ImportFormat: &files.ImportFormat{Tagged: dropbox.Tagged{Tag: files.ImportFormatMarkdown}},
}
var err error
err = f.pacer.Call(func() (bool, error) {
reader := strings.NewReader(content)
_, err = f.srv.PaperCreate(&arg, reader)
return shouldRetry(context.Background(), err)
})
require.NoError(t, err)
}
func (f *Fs) InternalTestPaperExport(t *testing.T) {
ctx := context.Background()
f.importPaperForTest(t)
f.exportExts = []exportExtension{"html"}
obj, err := f.NewObject(ctx, "export.html")
require.NoError(t, err)
rc, err := obj.Open(ctx)
require.NoError(t, err)
defer func() { require.NoError(t, rc.Close()) }()
buf, err := io.ReadAll(rc)
require.NoError(t, err)
text := string(buf)
for _, excerpt := range []string{
"Lorem ipsum",
"<b>dolor</b>",
`href="http://google.com"`,
} {
require.Contains(t, text, excerpt)
}
}
func (f *Fs) InternalTest(t *testing.T) {
t.Run("PaperExport", f.InternalTestPaperExport)
}
var _ fstests.InternalTester = (*Fs)(nil)

View File

@ -190,6 +190,42 @@ with `--dropbox-batch-mode async` then do a final transfer with
Note that there may be a pause when quitting rclone while rclone Note that there may be a pause when quitting rclone while rclone
finishes up the last batch using this mode. finishes up the last batch using this mode.
### Exporting files
Certain files in Dropbox are "exportable", such as Dropbox Paper
documents. These files need to be converted to another format in
order to be downloaded. Often multiple formats are available for
conversion.
When rclone downloads a exportable file, it chooses the format to
download based on the `--dropbox-export-formats` setting. By
default, the export formats are `html,md`, which are sensible
defaults for Dropbox Paper.
Rclone chooses the first format ID in the export formats list that
Dropbox supports for a given file. If no format in the list is
usable, rclone will choose the default format that Dropbox suggests.
Rclone will change the extension to correspond to the export format.
Here are some examples of how extensions are mapped:
| File type | Filename in Dropbox | Filename in rclone |
|----------------|---------------------|--------------------|
| Paper | mydoc.paper | mydoc.html |
| Paper template | mydoc.papert | mydoc.papert.html |
| other | mydoc | mydoc.html |
_Importing_ exportable files is not yet supported by rclone.
Here are the supported export extensions known by rclone. Note that
rclone does not currently support other formats not on this list,
even if Dropbox supports them. Also, Dropbox could change the list
of supported formats at any time.
| Format ID | Name | Description |
|-----------|----------|----------------------|
| html | HTML | HTML document |
| md | Markdown | Markdown text format |
{{< rem autogenerated options start" - DO NOT EDIT - instead edit fs.RegInfo in backend/dropbox/dropbox.go then run make backenddocs" >}} {{< rem autogenerated options start" - DO NOT EDIT - instead edit fs.RegInfo in backend/dropbox/dropbox.go then run make backenddocs" >}}
### Standard options ### Standard options
@ -522,6 +558,11 @@ non-personal account otherwise the visibility may not be correct.
[forum discussion](https://forum.rclone.org/t/rclone-link-dropbox-permissions/23211) and the [forum discussion](https://forum.rclone.org/t/rclone-link-dropbox-permissions/23211) and the
[dropbox SDK issue](https://github.com/dropbox/dropbox-sdk-go-unofficial/issues/75). [dropbox SDK issue](https://github.com/dropbox/dropbox-sdk-go-unofficial/issues/75).
Modification times for Dropbox Paper documents are not exact, and
may not change for some period after the document is edited.
To make sure you get recent changes in a sync, either wait an hour
or so, or use `--ignore-times` to force a full sync.
## Get your own Dropbox App ID ## Get your own Dropbox App ID
When you use rclone with Dropbox in its default configuration you are using rclone's App ID. This is shared between all the rclone users. When you use rclone with Dropbox in its default configuration you are using rclone's App ID. This is shared between all the rclone users.