Dropbox: Support Dropbox Paper

These files must be "exported" to be useful. The export process
is controlled by the --dropbox-export-formats flag  and the ancilliary flags
--dropbox-skip-exports and --dropbox-show-all-exports modeled on the
Google drive equivalents
This commit is contained in:
Dave Vasilevsky 2025-02-17 13:20:37 -05:00 committed by GitHub
parent cdfd748241
commit 904c9b2e24
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 404 additions and 51 deletions

View File

@ -92,6 +92,9 @@ const (
maxFileNameLength = 255
)
type exportAPIFormat string
type exportExtension string // dotless
var (
// Description of how to auth for this app
dropboxConfig = &oauthutil.Config{
@ -132,6 +135,16 @@ var (
DefaultTimeoutAsync: 10 * time.Second,
DefaultBatchSizeAsync: 100,
}
exportKnownAPIFormats = map[exportAPIFormat]exportExtension{
"markdown": "md",
"html": "html",
}
// Populated based on exportKnownAPIFormats
exportKnownExtensions = map[exportExtension]exportAPIFormat{}
paperExtension = ".paper"
paperTemplateExtension = ".papert"
)
// Gets an oauth config with the right scopes
@ -247,23 +260,61 @@ folders.`,
Help: "Specify a different Dropbox namespace ID to use as the root for all paths.",
Default: "",
Advanced: true,
}}...), defaultBatcherOptions.FsOptions("For full info see [the main docs](https://rclone.org/dropbox/#batch-mode)\n\n")...),
}, {
Name: "export_formats",
Help: `Comma separated list of preferred formats for exporting files
Certain Dropbox files can only be accessed by exporting them to another format.
These include Dropbox Paper documents.
For each such file, rclone will choose the first format on this list that Dropbox
considers valid. If none is valid, it will choose Dropbox's default format.
Known formats include: "html", "md" (markdown)`,
Default: fs.CommaSepList{"html", "md"},
Advanced: true,
}, {
Name: "skip_exports",
Help: "Skip exportable files in all listings.\n\nIf given, exportable files practically become invisible to rclone.",
Default: false,
Advanced: true,
}, {
Name: "show_all_exports",
Default: false,
Help: `Show all exportable files in listings.
Adding this flag will allow all exportable files to be server side copied.
Note that rclone doesn't add extensions to the exportable file names in this mode.
Do **not** use this flag when trying to download exportable files - rclone
will fail to download them.
`,
Advanced: true,
},
}...), defaultBatcherOptions.FsOptions("For full info see [the main docs](https://rclone.org/dropbox/#batch-mode)\n\n")...),
})
for apiFormat, ext := range exportKnownAPIFormats {
exportKnownExtensions[ext] = apiFormat
}
}
// Options defines the configuration for this backend
type Options struct {
ChunkSize fs.SizeSuffix `config:"chunk_size"`
Impersonate string `config:"impersonate"`
SharedFiles bool `config:"shared_files"`
SharedFolders bool `config:"shared_folders"`
BatchMode string `config:"batch_mode"`
BatchSize int `config:"batch_size"`
BatchTimeout fs.Duration `config:"batch_timeout"`
AsyncBatch bool `config:"async_batch"`
PacerMinSleep fs.Duration `config:"pacer_min_sleep"`
Enc encoder.MultiEncoder `config:"encoding"`
RootNsid string `config:"root_namespace"`
ChunkSize fs.SizeSuffix `config:"chunk_size"`
Impersonate string `config:"impersonate"`
SharedFiles bool `config:"shared_files"`
SharedFolders bool `config:"shared_folders"`
BatchMode string `config:"batch_mode"`
BatchSize int `config:"batch_size"`
BatchTimeout fs.Duration `config:"batch_timeout"`
AsyncBatch bool `config:"async_batch"`
PacerMinSleep fs.Duration `config:"pacer_min_sleep"`
Enc encoder.MultiEncoder `config:"encoding"`
RootNsid string `config:"root_namespace"`
ExportFormats fs.CommaSepList `config:"export_formats"`
SkipExports bool `config:"skip_exports"`
ShowAllExports bool `config:"show_all_exports"`
}
// Fs represents a remote dropbox server
@ -283,8 +334,18 @@ type Fs struct {
pacer *fs.Pacer // To pace the API calls
ns string // The namespace we are using or "" for none
batcher *batcher.Batcher[*files.UploadSessionFinishArg, *files.FileMetadata]
exportExts []exportExtension
}
type exportType int
const (
notExport exportType = iota // a regular file
exportHide // should be hidden
exportListOnly // listable, but can't export
exportExportable // can export
)
// Object describes a dropbox object
//
// Dropbox Objects always have full metadata
@ -296,6 +357,9 @@ type Object struct {
bytes int64 // size of the object
modTime time.Time // time it was last modified
hash string // content_hash of the object
exportType exportType
exportAPIFormat exportAPIFormat
}
// Name of the remote (as passed into NewFs)
@ -436,6 +500,14 @@ func NewFs(ctx context.Context, name, root string, m configmap.Mapper) (fs.Fs, e
HeaderGenerator: f.headerGenerator,
}
for _, e := range opt.ExportFormats {
ext := exportExtension(e)
if exportKnownExtensions[ext] == "" {
return nil, fmt.Errorf("dropbox: unknown export format '%s'", e)
}
f.exportExts = append(f.exportExts, ext)
}
// unauthorized config for endpoints that fail with auth
ucfg := dropbox.Config{
LogLevel: dropbox.LogOff, // logging in the SDK: LogOff, LogDebug, LogInfo
@ -588,38 +660,126 @@ func (f *Fs) setRoot(root string) {
}
}
type getMetadataResult struct {
entry files.IsMetadata
notFound bool
err error
}
// getMetadata gets the metadata for a file or directory
func (f *Fs) getMetadata(ctx context.Context, objPath string) (entry files.IsMetadata, notFound bool, err error) {
err = f.pacer.Call(func() (bool, error) {
entry, err = f.srv.GetMetadata(&files.GetMetadataArg{
func (f *Fs) getMetadata(ctx context.Context, objPath string) (res getMetadataResult) {
res.err = f.pacer.Call(func() (bool, error) {
res.entry, res.err = f.srv.GetMetadata(&files.GetMetadataArg{
Path: f.opt.Enc.FromStandardPath(objPath),
})
return shouldRetry(ctx, err)
return shouldRetry(ctx, res.err)
})
if err != nil {
switch e := err.(type) {
if res.err != nil {
switch e := res.err.(type) {
case files.GetMetadataAPIError:
if e.EndpointError != nil && e.EndpointError.Path != nil && e.EndpointError.Path.Tag == files.LookupErrorNotFound {
notFound = true
err = nil
res.notFound = true
res.err = nil
}
}
}
return
}
// getFileMetadata gets the metadata for a file
func (f *Fs) getFileMetadata(ctx context.Context, filePath string) (fileInfo *files.FileMetadata, err error) {
entry, notFound, err := f.getMetadata(ctx, filePath)
if err != nil {
return nil, err
// Get metadata such that the result would be exported with the given extension
// Return a channel that will eventually receive the metadata
func (f *Fs) getMetadataForExt(ctx context.Context, filePath string, wantExportExtension exportExtension) chan getMetadataResult {
ch := make(chan getMetadataResult, 1)
wantDownloadable := (wantExportExtension == "")
go func() {
defer close(ch)
res := f.getMetadata(ctx, filePath)
info, ok := res.entry.(*files.FileMetadata)
if !ok { // Can't check anything about file, just return what we have
ch <- res
return
}
// Return notFound if downloadability or extension doesn't match
if wantDownloadable != info.IsDownloadable {
ch <- getMetadataResult{notFound: true}
return
}
if !info.IsDownloadable {
_, ext := f.chooseExportFormat(info)
if ext != wantExportExtension {
ch <- getMetadataResult{notFound: true}
return
}
}
// Return our real result or error
ch <- res
}()
return ch
}
// For a given rclone-path, figure out what the Dropbox-path may be, in order of preference.
// Multiple paths might be plausible, due to export path munging.
func (f *Fs) possibleMetadatas(ctx context.Context, filePath string) (ret []<-chan getMetadataResult) {
ret = []<-chan getMetadataResult{}
// Prefer an exact match
ret = append(ret, f.getMetadataForExt(ctx, filePath, ""))
// Check if we're plausibly an export path, otherwise we're done
if f.opt.SkipExports || f.opt.ShowAllExports {
return
}
if notFound {
dotted := path.Ext(filePath)
if dotted == "" {
return
}
ext := exportExtension(dotted[1:])
if exportKnownExtensions[ext] == "" {
return
}
// We might be an export path! Try all possibilities
base := strings.TrimSuffix(filePath, dotted)
// `foo.papert.md` will only come from `foo.papert`. Never check something like `foo.papert.paper`
if strings.HasSuffix(base, paperTemplateExtension) {
ret = append(ret, f.getMetadataForExt(ctx, base, ext))
return
}
// Otherwise, try both `foo.md` coming from `foo`, or from `foo.paper`
ret = append(ret, f.getMetadataForExt(ctx, base, ext))
ret = append(ret, f.getMetadataForExt(ctx, base+paperExtension, ext))
return
}
// getFileMetadata gets the metadata for a file
func (f *Fs) getFileMetadata(ctx context.Context, filePath string) (*files.FileMetadata, error) {
var res getMetadataResult
// Try all possible metadatas
possibleMetadatas := f.possibleMetadatas(ctx, filePath)
for _, ch := range possibleMetadatas {
res = <-ch
if res.err != nil {
return nil, res.err
}
if !res.notFound {
break
}
}
if res.notFound {
return nil, fs.ErrorObjectNotFound
}
fileInfo, ok := entry.(*files.FileMetadata)
fileInfo, ok := res.entry.(*files.FileMetadata)
if !ok {
if _, ok = entry.(*files.FolderMetadata); ok {
if _, ok = res.entry.(*files.FolderMetadata); ok {
return nil, fs.ErrorIsDir
}
return nil, fs.ErrorNotAFile
@ -628,15 +788,15 @@ func (f *Fs) getFileMetadata(ctx context.Context, filePath string) (fileInfo *fi
}
// getDirMetadata gets the metadata for a directory
func (f *Fs) getDirMetadata(ctx context.Context, dirPath string) (dirInfo *files.FolderMetadata, err error) {
entry, notFound, err := f.getMetadata(ctx, dirPath)
if err != nil {
return nil, err
func (f *Fs) getDirMetadata(ctx context.Context, dirPath string) (*files.FolderMetadata, error) {
res := f.getMetadata(ctx, dirPath)
if res.err != nil {
return nil, res.err
}
if notFound {
if res.notFound {
return nil, fs.ErrorDirNotFound
}
dirInfo, ok := entry.(*files.FolderMetadata)
dirInfo, ok := res.entry.(*files.FolderMetadata)
if !ok {
return nil, fs.ErrorIsFile
}
@ -836,16 +996,15 @@ func (f *Fs) List(ctx context.Context, dir string) (entries fs.DirEntries, err e
var res *files.ListFolderResult
for {
if !started {
arg := files.ListFolderArg{
Path: f.opt.Enc.FromStandardPath(root),
Recursive: false,
Limit: 1000,
}
arg := files.NewListFolderArg(f.opt.Enc.FromStandardPath(root))
arg.Recursive = false
arg.Limit = 1000
if root == "/" {
arg.Path = "" // Specify root folder as empty string
}
err = f.pacer.Call(func() (bool, error) {
res, err = f.srv.ListFolder(&arg)
res, err = f.srv.ListFolder(arg)
return shouldRetry(ctx, err)
})
if err != nil {
@ -898,7 +1057,9 @@ func (f *Fs) List(ctx context.Context, dir string) (entries fs.DirEntries, err e
if err != nil {
return nil, err
}
entries = append(entries, o)
if o.(*Object).exportType.listable() {
entries = append(entries, o)
}
}
}
if !res.HasMore {
@ -984,16 +1145,14 @@ func (f *Fs) purgeCheck(ctx context.Context, dir string, check bool) (err error)
}
// check directory empty
arg := files.ListFolderArg{
Path: encRoot,
Recursive: false,
}
arg := files.NewListFolderArg(encRoot)
arg.Recursive = false
if root == "/" {
arg.Path = "" // Specify root folder as empty string
}
var res *files.ListFolderResult
err = f.pacer.Call(func() (bool, error) {
res, err = f.srv.ListFolder(&arg)
res, err = f.srv.ListFolder(arg)
return shouldRetry(ctx, err)
})
if err != nil {
@ -1338,16 +1497,14 @@ func (f *Fs) changeNotifyCursor(ctx context.Context) (cursor string, err error)
var startCursor *files.ListFolderGetLatestCursorResult
err = f.pacer.Call(func() (bool, error) {
arg := files.ListFolderArg{
Path: f.opt.Enc.FromStandardPath(f.slashRoot),
Recursive: true,
}
arg := files.NewListFolderArg(f.opt.Enc.FromStandardPath(f.slashRoot))
arg.Recursive = true
if arg.Path == "/" {
arg.Path = ""
}
startCursor, err = f.srv.ListFolderGetLatestCursor(&arg)
startCursor, err = f.srv.ListFolderGetLatestCursor(arg)
return shouldRetry(ctx, err)
})
@ -1451,8 +1608,50 @@ func (f *Fs) Shutdown(ctx context.Context) error {
return nil
}
func (f *Fs) chooseExportFormat(info *files.FileMetadata) (exportAPIFormat, exportExtension) {
// Find API export formats Dropbox supports for this file
// Sometimes Dropbox lists a format in ExportAs but not ExportOptions, so check both
ei := info.ExportInfo
dropboxFormatStrings := append([]string{ei.ExportAs}, ei.ExportOptions...)
// Find which extensions these correspond to
exportExtensions := map[exportExtension]exportAPIFormat{}
var dropboxPreferredAPIFormat exportAPIFormat
var dropboxPreferredExtension exportExtension
for _, format := range dropboxFormatStrings {
apiFormat := exportAPIFormat(format)
// Only consider formats we know about
if ext, ok := exportKnownAPIFormats[apiFormat]; ok {
if dropboxPreferredAPIFormat == "" {
dropboxPreferredAPIFormat = apiFormat
dropboxPreferredExtension = ext
}
exportExtensions[ext] = apiFormat
}
}
// See if the user picked a valid extension
for _, ext := range f.exportExts {
if apiFormat, ok := exportExtensions[ext]; ok {
return apiFormat, ext
}
}
// If no matches, prefer the first valid format Dropbox lists
return dropboxPreferredAPIFormat, dropboxPreferredExtension
}
// ------------------------------------------------------------
func (et exportType) listable() bool {
return et != exportHide
}
// something we should _try_ to export
func (et exportType) exportable() bool {
return et == exportExportable || et == exportListOnly
}
// Fs returns the parent Fs
func (o *Object) Fs() fs.Info {
return o.fs
@ -1496,6 +1695,32 @@ func (o *Object) Size() int64 {
return o.bytes
}
func (o *Object) setMetadataForExport(info *files.FileMetadata) {
o.bytes = -1
o.hash = ""
if o.fs.opt.SkipExports {
o.exportType = exportHide
return
}
if o.fs.opt.ShowAllExports {
o.exportType = exportListOnly
return
}
var exportExt exportExtension
o.exportAPIFormat, exportExt = o.fs.chooseExportFormat(info)
if o.exportAPIFormat == "" {
o.exportType = exportHide
} else {
o.exportType = exportExportable
// get rid of any paper extension, if present
o.remote = strings.TrimSuffix(o.remote, paperExtension)
// add the export extension
o.remote += "." + string(exportExt)
}
}
// setMetadataFromEntry sets the fs data from a files.FileMetadata
//
// This isn't a complete set of metadata and has an inaccurate date
@ -1504,6 +1729,10 @@ func (o *Object) setMetadataFromEntry(info *files.FileMetadata) error {
o.bytes = int64(info.Size)
o.modTime = info.ClientModified
o.hash = info.ContentHash
if !info.IsDownloadable {
o.setMetadataForExport(info)
}
return nil
}
@ -1567,6 +1796,27 @@ func (o *Object) Storable() bool {
return true
}
func (o *Object) export(ctx context.Context) (in io.ReadCloser, err error) {
if o.exportType == exportListOnly || o.exportAPIFormat == "" {
fs.Debugf(o.remote, "No export format found")
return nil, fs.ErrorObjectNotFound
}
arg := files.ExportArg{Path: o.id, ExportFormat: string(o.exportAPIFormat)}
var exportResult *files.ExportResult
err = o.fs.pacer.Call(func() (bool, error) {
exportResult, in, err = o.fs.srv.Export(&arg)
return shouldRetry(ctx, err)
})
if err != nil {
return nil, err
}
o.bytes = int64(exportResult.ExportMetadata.Size)
o.hash = exportResult.ExportMetadata.ExportHash
return
}
// Open an object for read
func (o *Object) Open(ctx context.Context, options ...fs.OpenOption) (in io.ReadCloser, err error) {
if o.fs.opt.SharedFiles {
@ -1586,6 +1836,10 @@ func (o *Object) Open(ctx context.Context, options ...fs.OpenOption) (in io.Read
return
}
if o.exportType.exportable() {
return o.export(ctx)
}
fs.FixRangeOption(options, o.bytes)
headers := fs.OpenOptionHeaders(options)
arg := files.DownloadArg{

View File

@ -1,9 +1,16 @@
package dropbox
import (
"context"
"io"
"strings"
"testing"
"github.com/dropbox/dropbox-sdk-go-unofficial/v6/dropbox"
"github.com/dropbox/dropbox-sdk-go-unofficial/v6/dropbox/files"
"github.com/rclone/rclone/fstest/fstests"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestInternalCheckPathLength(t *testing.T) {
@ -42,3 +49,54 @@ func TestInternalCheckPathLength(t *testing.T) {
assert.Equal(t, test.ok, err == nil, test.in)
}
}
func (f *Fs) importPaperForTest(t *testing.T) {
content := `# test doc
Lorem ipsum __dolor__ sit amet
[link](http://google.com)
`
arg := files.PaperCreateArg{
Path: f.slashRootSlash + "export.paper",
ImportFormat: &files.ImportFormat{Tagged: dropbox.Tagged{Tag: files.ImportFormatMarkdown}},
}
var err error
err = f.pacer.Call(func() (bool, error) {
reader := strings.NewReader(content)
_, err = f.srv.PaperCreate(&arg, reader)
return shouldRetry(context.Background(), err)
})
require.NoError(t, err)
}
func (f *Fs) InternalTestPaperExport(t *testing.T) {
ctx := context.Background()
f.importPaperForTest(t)
f.exportExts = []exportExtension{"html"}
obj, err := f.NewObject(ctx, "export.html")
require.NoError(t, err)
rc, err := obj.Open(ctx)
require.NoError(t, err)
defer func() { require.NoError(t, rc.Close()) }()
buf, err := io.ReadAll(rc)
require.NoError(t, err)
text := string(buf)
for _, excerpt := range []string{
"Lorem ipsum",
"<b>dolor</b>",
`href="http://google.com"`,
} {
require.Contains(t, text, excerpt)
}
}
func (f *Fs) InternalTest(t *testing.T) {
t.Run("PaperExport", f.InternalTestPaperExport)
}
var _ fstests.InternalTester = (*Fs)(nil)

View File

@ -190,6 +190,42 @@ with `--dropbox-batch-mode async` then do a final transfer with
Note that there may be a pause when quitting rclone while rclone
finishes up the last batch using this mode.
### Exporting files
Certain files in Dropbox are "exportable", such as Dropbox Paper
documents. These files need to be converted to another format in
order to be downloaded. Often multiple formats are available for
conversion.
When rclone downloads a exportable file, it chooses the format to
download based on the `--dropbox-export-formats` setting. By
default, the export formats are `html,md`, which are sensible
defaults for Dropbox Paper.
Rclone chooses the first format ID in the export formats list that
Dropbox supports for a given file. If no format in the list is
usable, rclone will choose the default format that Dropbox suggests.
Rclone will change the extension to correspond to the export format.
Here are some examples of how extensions are mapped:
| File type | Filename in Dropbox | Filename in rclone |
|----------------|---------------------|--------------------|
| Paper | mydoc.paper | mydoc.html |
| Paper template | mydoc.papert | mydoc.papert.html |
| other | mydoc | mydoc.html |
_Importing_ exportable files is not yet supported by rclone.
Here are the supported export extensions known by rclone. Note that
rclone does not currently support other formats not on this list,
even if Dropbox supports them. Also, Dropbox could change the list
of supported formats at any time.
| Format ID | Name | Description |
|-----------|----------|----------------------|
| html | HTML | HTML document |
| md | Markdown | Markdown text format |
{{< rem autogenerated options start" - DO NOT EDIT - instead edit fs.RegInfo in backend/dropbox/dropbox.go then run make backenddocs" >}}
### Standard options
@ -522,6 +558,11 @@ non-personal account otherwise the visibility may not be correct.
[forum discussion](https://forum.rclone.org/t/rclone-link-dropbox-permissions/23211) and the
[dropbox SDK issue](https://github.com/dropbox/dropbox-sdk-go-unofficial/issues/75).
Modification times for Dropbox Paper documents are not exact, and
may not change for some period after the document is edited.
To make sure you get recent changes in a sync, either wait an hour
or so, or use `--ignore-times` to force a full sync.
## Get your own Dropbox App ID
When you use rclone with Dropbox in its default configuration you are using rclone's App ID. This is shared between all the rclone users.