From 0855d7e230d87a82d4a4987e6b77286f4e7a2f02 Mon Sep 17 00:00:00 2001 From: Amos Ng Date: Sun, 5 May 2024 18:36:31 +0800 Subject: [PATCH] h5ai: prepared new h5ai http backend --- backend/all/all.go | 1 + backend/h5ai/h5ai.go | 787 +++++++++++++++++++++++++++++++++++++++++++ docs/content/h5ai.md | 265 +++++++++++++++ 3 files changed, 1053 insertions(+) create mode 100644 backend/h5ai/h5ai.go create mode 100644 docs/content/h5ai.md diff --git a/backend/all/all.go b/backend/all/all.go index e90877eda..c47dd6b53 100644 --- a/backend/all/all.go +++ b/backend/all/all.go @@ -20,6 +20,7 @@ import ( _ "github.com/rclone/rclone/backend/ftp" _ "github.com/rclone/rclone/backend/googlecloudstorage" _ "github.com/rclone/rclone/backend/googlephotos" + _ "github.com/rclone/rclone/backend/h5ai" _ "github.com/rclone/rclone/backend/hasher" _ "github.com/rclone/rclone/backend/hdfs" _ "github.com/rclone/rclone/backend/hidrive" diff --git a/backend/h5ai/h5ai.go b/backend/h5ai/h5ai.go new file mode 100644 index 000000000..037ad48c6 --- /dev/null +++ b/backend/h5ai/h5ai.go @@ -0,0 +1,787 @@ +// Package http provides a filesystem interface using golang.org/net/http +// +// It treats HTML pages served from the endpoint as directory +// listings, and includes any links found as files. +package h5ai + +import ( + "bytes" + "context" + "encoding/json" + "errors" + "fmt" + "io" + "mime" + "net/http" + "net/url" + "path" + "strings" + "sync" + "time" + + "github.com/rclone/rclone/fs" + "github.com/rclone/rclone/fs/config/configmap" + "github.com/rclone/rclone/fs/config/configstruct" + "github.com/rclone/rclone/fs/fshttp" + "github.com/rclone/rclone/fs/hash" + "github.com/rclone/rclone/lib/rest" + "golang.org/x/net/html" +) + +var ( + errorReadOnly = errors.New("h5ai remotes are read only") + timeUnset = time.Unix(0, 0) +) + +func init() { + fsi := &fs.RegInfo{ + Name: "h5ai", + Description: "H5AI", + NewFs: NewFs, + CommandHelp: commandHelp, + Options: []fs.Option{{ + Name: "url", + Help: "URL of HTTP host to connect to.\n\nE.g. \"https://example.com\", or \"https://user:pass@example.com\" to use a username and password.", + Required: true, + }, { + Name: "headers", + Help: `Set HTTP headers for all transactions. + +Use this to set additional HTTP headers for all transactions. + +The input format is comma separated list of key,value pairs. Standard +[CSV encoding](https://godoc.org/encoding/csv) may be used. + +For example, to set a Cookie use 'Cookie,name=value', or '"Cookie","name=value"'. + +You can set multiple headers, e.g. '"Cookie","name=value","Authorization","xxx"'.`, + Default: fs.CommaSepList{}, + Advanced: true, + }, { + Name: "no_slash", + Help: `Set this if the site doesn't end directories with /. + +Use this if your target website does not use / on the end of +directories. + +A / on the end of a path is how rclone normally tells the difference +between files and directories. If this flag is set, then rclone will +treat all files with Content-Type: text/html as directories and read +URLs from them rather than downloading them. + +Note that this may cause rclone to confuse genuine HTML files with +directories.`, + Default: false, + Advanced: true, + }, { + Name: "no_head", + Help: `Don't use HEAD requests. + +HEAD requests are mainly used to find file sizes in dir listing. +If your site is being very slow to load then you can try this option. +Normally rclone does a HEAD request for each potential file in a +directory listing to: + +- find its size +- check it really exists +- check to see if it is a directory + +If you set this option, rclone will not do the HEAD request. This will mean +that directory listings are much quicker, but rclone won't have the times or +sizes of any files, and some files that don't exist may be in the listing.`, + Default: false, + Advanced: true, + }}, + } + fs.Register(fsi) +} + +// Options defines the configuration for this backend +type Options struct { + Endpoint string `config:"url"` + NoSlash bool `config:"no_slash"` + NoHead bool `config:"no_head"` + Headers fs.CommaSepList `config:"headers"` +} + +// Fs stores the interface to the remote HTTP files +type Fs struct { + name string + root string + features *fs.Features // optional features + opt Options // options for this backend + ci *fs.ConfigInfo // global config + endpoint *url.URL + endpointURL string // endpoint as a string + httpClient *http.Client +} + +// Object is a remote object that has been stat'd (so it exists, but is not necessarily open for reading) +type Object struct { + fs *Fs + remote string + size int64 + modTime time.Time + contentType string +} + +// statusError returns an error if the res contained an error +func statusError(res *http.Response, err error) error { + if err != nil { + return err + } + if res.StatusCode < 200 || res.StatusCode > 299 { + _ = res.Body.Close() + return fmt.Errorf("HTTP Error: %s", res.Status) + } + return nil +} + +// getFsEndpoint decides if url is to be considered a file or directory, +// and returns a proper endpoint url to use for the fs. +func getFsEndpoint(ctx context.Context, client *http.Client, url string, opt *Options) (string, bool) { + // If url ends with '/' it is already a proper url always assumed to be a directory. + if url[len(url)-1] == '/' { + return url, false + } + + // If url does not end with '/' we send a HEAD request to decide + // if it is directory or file, and if directory appends the missing + // '/', or if file returns the directory url to parent instead. + createFileResult := func() (string, bool) { + fs.Debugf(nil, "If path is a directory you must add a trailing '/'") + parent, _ := path.Split(url) + return parent, true + } + createDirResult := func() (string, bool) { + fs.Debugf(nil, "To avoid the initial HEAD request add a trailing '/' to the path") + return url + "/", false + } + + // If HEAD requests are not allowed we just have to assume it is a file. + if opt.NoHead { + fs.Debugf(nil, "Assuming path is a file as --http-no-head is set") + return createFileResult() + } + + // Use a client which doesn't follow redirects so the server + // doesn't redirect http://host/dir to http://host/dir/ + noRedir := *client + noRedir.CheckRedirect = func(req *http.Request, via []*http.Request) error { + return http.ErrUseLastResponse + } + req, err := http.NewRequestWithContext(ctx, "HEAD", url, nil) + if err != nil { + fs.Debugf(nil, "Assuming path is a file as HEAD request could not be created: %v", err) + return createFileResult() + } + addHeaders(req, opt) + res, err := noRedir.Do(req) + + if err != nil { + fs.Debugf(nil, "Assuming path is a file as HEAD request could not be sent: %v", err) + return createFileResult() + } + if res.StatusCode == http.StatusNotFound { + fs.Debugf(nil, "Assuming path is a directory as HEAD response is it does not exist as a file (%s)", res.Status) + return createDirResult() + } + if res.StatusCode == http.StatusMovedPermanently || + res.StatusCode == http.StatusFound || + res.StatusCode == http.StatusSeeOther || + res.StatusCode == http.StatusTemporaryRedirect || + res.StatusCode == http.StatusPermanentRedirect { + redir := res.Header.Get("Location") + if redir != "" { + if redir[len(redir)-1] == '/' { + fs.Debugf(nil, "Assuming path is a directory as HEAD response is redirect (%s) to a path that ends with '/': %s", res.Status, redir) + return createDirResult() + } + fs.Debugf(nil, "Assuming path is a file as HEAD response is redirect (%s) to a path that does not end with '/': %s", res.Status, redir) + return createFileResult() + } + fs.Debugf(nil, "Assuming path is a file as HEAD response is redirect (%s) but no location header", res.Status) + return createFileResult() + } + if res.StatusCode < 200 || res.StatusCode > 299 { + // Example is 403 (http.StatusForbidden) for servers not allowing HEAD requests. + fs.Debugf(nil, "Assuming path is a file as HEAD response is an error (%s)", res.Status) + return createFileResult() + } + + fs.Debugf(nil, "Assuming path is a file as HEAD response is success (%s)", res.Status) + return createFileResult() +} + +// Make the http connection with opt +func (f *Fs) httpConnection(ctx context.Context, opt *Options) (isFile bool, err error) { + if len(opt.Headers)%2 != 0 { + return false, errors.New("odd number of headers supplied") + } + + if !strings.HasSuffix(opt.Endpoint, "/") { + opt.Endpoint += "/" + } + + // Parse the endpoint and stick the root onto it + base, err := url.Parse(opt.Endpoint) + if err != nil { + return false, err + } + u, err := rest.URLJoin(base, rest.URLPathEscape(f.root)) + if err != nil { + return false, err + } + + client := fshttp.NewClient(ctx) + + endpoint, isFile := getFsEndpoint(ctx, client, u.String(), opt) + fs.Debugf(nil, "Root: %s", endpoint) + u, err = url.Parse(endpoint) + if err != nil { + return false, err + } + + // Update f with the new parameters + f.httpClient = client + f.endpoint = u + f.endpointURL = u.String() + return isFile, nil +} + +// NewFs creates a new Fs object from the name and root. It connects to +// the host specified in the config file. +func NewFs(ctx context.Context, name, root string, m configmap.Mapper) (fs.Fs, error) { + // Parse config into Options struct + opt := new(Options) + err := configstruct.Set(m, opt) + if err != nil { + return nil, err + } + + ci := fs.GetConfig(ctx) + f := &Fs{ + name: name, + root: root, + opt: *opt, + ci: ci, + } + f.features = (&fs.Features{ + CanHaveEmptyDirectories: true, + }).Fill(ctx, f) + + // Make the http connection + isFile, err := f.httpConnection(ctx, opt) + if err != nil { + return nil, err + } + + if isFile { + // return an error with an fs which points to the parent + return f, fs.ErrorIsFile + } + + if !strings.HasSuffix(f.endpointURL, "/") { + return nil, errors.New("internal error: url doesn't end with /") + } + + return f, nil +} + +// Name returns the configured name of the file system +func (f *Fs) Name() string { + return f.name +} + +// Root returns the root for the filesystem +func (f *Fs) Root() string { + return f.root +} + +// String returns the URL for the filesystem +func (f *Fs) String() string { + return f.endpointURL +} + +// Features returns the optional features of this Fs +func (f *Fs) Features() *fs.Features { + return f.features +} + +// Precision is the remote http file system's modtime precision, which we have no way of knowing. We estimate at 1s +func (f *Fs) Precision() time.Duration { + return time.Second +} + +// NewObject creates a new remote http file object +func (f *Fs) NewObject(ctx context.Context, remote string) (fs.Object, error) { + o := &Object{ + fs: f, + remote: remote, + } + err := o.head(ctx) + if err != nil { + return nil, err + } + return o, nil +} + +// Join's the remote onto the base URL +func (f *Fs) url(remote string) string { + return f.endpointURL + rest.URLPathEscape(remote) +} + +// Errors returned by parseName +var ( + errURLJoinFailed = errors.New("URLJoin failed") + errFoundQuestionMark = errors.New("found ? in URL") + errHostMismatch = errors.New("host mismatch") + errSchemeMismatch = errors.New("scheme mismatch") + errNotUnderRoot = errors.New("not under root") + errNameIsEmpty = errors.New("name is empty") + errNameContainsSlash = errors.New("name contains /") +) + +// parseName turns a name as found in the page into a remote path or returns an error +func parseName(base *url.URL, name string) (string, error) { + // make URL absolute + u, err := rest.URLJoin(base, name) + if err != nil { + return "", errURLJoinFailed + } + // check it doesn't have URL parameters + uStr := u.String() + if strings.Contains(uStr, "?") { + return "", errFoundQuestionMark + } + // check that this is going back to the same host and scheme + if base.Host != u.Host { + return "", errHostMismatch + } + if base.Scheme != u.Scheme { + return "", errSchemeMismatch + } + // check has path prefix + if !strings.HasPrefix(u.Path, base.Path) { + return "", errNotUnderRoot + } + // calculate the name relative to the base + name = u.Path[len(base.Path):] + // mustn't be empty + if name == "" { + return "", errNameIsEmpty + } + // mustn't contain a / - we are looking for a single level directory + slash := strings.Index(name, "/") + if slash >= 0 && slash != len(name)-1 { + return "", errNameContainsSlash + } + return name, nil +} + +// Parse turns HTML for a directory into names +// base should be the base URL to resolve any relative names from +func parse(base *url.URL, in io.Reader) (names []string, err error) { + doc, err := html.Parse(in) + if err != nil { + return nil, err + } + var ( + walk func(*html.Node) + seen = make(map[string]struct{}) + ) + walk = func(n *html.Node) { + if n.Type == html.ElementNode && n.Data == "a" { + for _, a := range n.Attr { + if a.Key == "href" { + name, err := parseName(base, a.Val) + if err == nil { + if _, found := seen[name]; !found { + names = append(names, name) + seen[name] = struct{}{} + } + } + break + } + } + } + for c := n.FirstChild; c != nil; c = c.NextSibling { + walk(c) + } + } + walk(doc) + return names, nil +} + +// Adds the configured headers to the request if any +func addHeaders(req *http.Request, opt *Options) { + for i := 0; i < len(opt.Headers); i += 2 { + key := opt.Headers[i] + value := opt.Headers[i+1] + req.Header.Add(key, value) + } +} + +// Adds the configured headers to the request if any +func (f *Fs) addHeaders(req *http.Request) { + addHeaders(req, &f.opt) +} + +// Read the directory passed in +func (f *Fs) readDir(ctx context.Context, dir string) (names []string, err error) { + URL := f.url(dir) + u, err := url.Parse(URL) + if err != nil { + return nil, fmt.Errorf("failed to readDir: %w", err) + } + if !strings.HasSuffix(URL, "/") { + return nil, fmt.Errorf("internal error: readDir URL %q didn't end in /", URL) + } + // Do the request + payload := &struct { + Action string `json:"action"` + Items map[string]string `json:"items"` + }{ + Action: "get", + Items: map[string]string{ + "href": u.Path, + "what": "1", + }, + } + buf, err := json.Marshal(payload) + if err != nil { + return nil, fmt.Errorf("readDir failed: %w", err) + } + req, err := http.NewRequestWithContext(ctx, "POST", URL, bytes.NewReader(buf)) + if err != nil { + return nil, fmt.Errorf("readDir failed: %w", err) + } + f.addHeaders(req) + res, err := f.httpClient.Do(req) + if err == nil { + defer fs.CheckClose(res.Body, &err) + if res.StatusCode == http.StatusNotFound { + return nil, fs.ErrorDirNotFound + } + } + err = statusError(res, err) + if err != nil { + return nil, fmt.Errorf("failed to readDir: %w", err) + } + + body, err := io.ReadAll(res.Body) + type Item struct { + Href string `json:"href"` + } + type Items struct { + Items []Item `json:"items"` + } + items := Items{} + err = json.Unmarshal(body, &items) + if err != nil { + return nil, fmt.Errorf("failed to readDir: %w", err) + } + for _, v := range items.Items { + href, _ := url.QueryUnescape(v.Href) + if href != u.Path && strings.HasPrefix(href, u.Path) { + names = append(names, strings.TrimPrefix(href, u.Path)) + } + } + return names, nil +} + +// List the objects and directories in dir into entries. The +// entries can be returned in any order but should be for a +// complete directory. +// +// dir should be "" to list the root, and should not have +// trailing slashes. +// +// This should return ErrDirNotFound if the directory isn't +// found. +func (f *Fs) List(ctx context.Context, dir string) (entries fs.DirEntries, err error) { + if !strings.HasSuffix(dir, "/") && dir != "" { + dir += "/" + } + names, err := f.readDir(ctx, dir) + if err != nil { + return nil, fmt.Errorf("error listing %q: %w", dir, err) + } + var ( + entriesMu sync.Mutex // to protect entries + wg sync.WaitGroup + checkers = f.ci.Checkers + in = make(chan string, checkers) + ) + add := func(entry fs.DirEntry) { + entriesMu.Lock() + entries = append(entries, entry) + entriesMu.Unlock() + } + for i := 0; i < checkers; i++ { + wg.Add(1) + go func() { + defer wg.Done() + for remote := range in { + file := &Object{ + fs: f, + remote: remote, + } + switch err := file.head(ctx); err { + case nil: + add(file) + case fs.ErrorNotAFile: + // ...found a directory not a file + add(fs.NewDir(remote, time.Time{})) + default: + fs.Debugf(remote, "skipping because of error: %v", err) + } + } + }() + } + for _, name := range names { + isDir := name[len(name)-1] == '/' + name = strings.TrimRight(name, "/") + remote := path.Join(dir, name) + if isDir { + add(fs.NewDir(remote, time.Time{})) + } else { + in <- remote + } + } + close(in) + wg.Wait() + return entries, nil +} + +// Put in to the remote path with the modTime given of the given size +// +// May create the object even if it returns an error - if so +// will return the object and the error, otherwise will return +// nil and the error +func (f *Fs) Put(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) (fs.Object, error) { + return nil, errorReadOnly +} + +// PutStream uploads to the remote path with the modTime given of indeterminate size +func (f *Fs) PutStream(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) (fs.Object, error) { + return nil, errorReadOnly +} + +// Fs is the filesystem this remote http file object is located within +func (o *Object) Fs() fs.Info { + return o.fs +} + +// String returns the URL to the remote HTTP file +func (o *Object) String() string { + if o == nil { + return "" + } + return o.remote +} + +// Remote the name of the remote HTTP file, relative to the fs root +func (o *Object) Remote() string { + return o.remote +} + +// Hash returns "" since HTTP (in Go or OpenSSH) doesn't support remote calculation of hashes +func (o *Object) Hash(ctx context.Context, r hash.Type) (string, error) { + return "", hash.ErrUnsupported +} + +// Size returns the size in bytes of the remote http file +func (o *Object) Size() int64 { + return o.size +} + +// ModTime returns the modification time of the remote http file +func (o *Object) ModTime(ctx context.Context) time.Time { + return o.modTime +} + +// url returns the native url of the object +func (o *Object) url() string { + return o.fs.url(o.remote) +} + +// head sends a HEAD request to update info fields in the Object +func (o *Object) head(ctx context.Context) error { + if o.fs.opt.NoHead { + o.size = -1 + o.modTime = timeUnset + o.contentType = fs.MimeType(ctx, o) + return nil + } + url := o.url() + req, err := http.NewRequestWithContext(ctx, "HEAD", url, nil) + if err != nil { + return fmt.Errorf("stat failed: %w", err) + } + o.fs.addHeaders(req) + res, err := o.fs.httpClient.Do(req) + if err == nil && res.StatusCode == http.StatusNotFound { + return fs.ErrorObjectNotFound + } + err = statusError(res, err) + if err != nil { + return fmt.Errorf("failed to stat: %w", err) + } + return o.decodeMetadata(ctx, res) +} + +// decodeMetadata updates info fields in the Object according to HTTP response headers +func (o *Object) decodeMetadata(ctx context.Context, res *http.Response) error { + t, err := http.ParseTime(res.Header.Get("Last-Modified")) + if err != nil { + t = timeUnset + } + o.modTime = t + o.contentType = res.Header.Get("Content-Type") + o.size = rest.ParseSizeFromHeaders(res.Header) + + // If NoSlash is set then check ContentType to see if it is a directory + if o.fs.opt.NoSlash { + mediaType, _, err := mime.ParseMediaType(o.contentType) + if err != nil { + return fmt.Errorf("failed to parse Content-Type: %q: %w", o.contentType, err) + } + if mediaType == "text/html" { + return fs.ErrorNotAFile + } + } + return nil +} + +// SetModTime sets the modification and access time to the specified time +// +// it also updates the info field +func (o *Object) SetModTime(ctx context.Context, modTime time.Time) error { + return errorReadOnly +} + +// Storable returns whether the remote http file is a regular file (not a directory, symbolic link, block device, character device, named pipe, etc.) +func (o *Object) Storable() bool { + return true +} + +// Open a remote http file object for reading. Seek is supported +func (o *Object) Open(ctx context.Context, options ...fs.OpenOption) (in io.ReadCloser, err error) { + url := o.url() + req, err := http.NewRequestWithContext(ctx, "GET", url, nil) + if err != nil { + return nil, fmt.Errorf("Open failed: %w", err) + } + + // Add optional headers + for k, v := range fs.OpenOptionHeaders(options) { + req.Header.Add(k, v) + } + o.fs.addHeaders(req) + + // Do the request + res, err := o.fs.httpClient.Do(req) + err = statusError(res, err) + if err != nil { + return nil, fmt.Errorf("Open failed: %w", err) + } + if err = o.decodeMetadata(ctx, res); err != nil { + return nil, fmt.Errorf("decodeMetadata failed: %w", err) + } + return res.Body, nil +} + +// Hashes returns hash.HashNone to indicate remote hashing is unavailable +func (f *Fs) Hashes() hash.Set { + return hash.Set(hash.None) +} + +// Mkdir makes the root directory of the Fs object +func (f *Fs) Mkdir(ctx context.Context, dir string) error { + return errorReadOnly +} + +// Remove a remote http file object +func (o *Object) Remove(ctx context.Context) error { + return errorReadOnly +} + +// Rmdir removes the root directory of the Fs object +func (f *Fs) Rmdir(ctx context.Context, dir string) error { + return errorReadOnly +} + +// Update in to the object with the modTime given of the given size +func (o *Object) Update(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) error { + return errorReadOnly +} + +// MimeType of an Object if known, "" otherwise +func (o *Object) MimeType(ctx context.Context) string { + return o.contentType +} + +var commandHelp = []fs.CommandHelp{{ + Name: "set", + Short: "Set command for updating the config parameters.", + Long: `This set command can be used to update the config parameters +for a running http backend. + +Usage Examples: + + rclone backend set remote: [-o opt_name=opt_value] [-o opt_name2=opt_value2] + rclone rc backend/command command=set fs=remote: [-o opt_name=opt_value] [-o opt_name2=opt_value2] + rclone rc backend/command command=set fs=remote: -o url=https://example.com + +The option keys are named as they are in the config file. + +This rebuilds the connection to the http backend when it is called with +the new parameters. Only new parameters need be passed as the values +will default to those currently in use. + +It doesn't return anything. +`, +}} + +// Command the backend to run a named command +// +// The command run is name +// args may be used to read arguments from +// opts may be used to read optional arguments from +// +// The result should be capable of being JSON encoded +// If it is a string or a []string it will be shown to the user +// otherwise it will be JSON encoded and shown to the user like that +func (f *Fs) Command(ctx context.Context, name string, arg []string, opt map[string]string) (out interface{}, err error) { + switch name { + case "set": + newOpt := f.opt + err := configstruct.Set(configmap.Simple(opt), &newOpt) + if err != nil { + return nil, fmt.Errorf("reading config: %w", err) + } + _, err = f.httpConnection(ctx, &newOpt) + if err != nil { + return nil, fmt.Errorf("updating session: %w", err) + } + f.opt = newOpt + keys := []string{} + for k := range opt { + keys = append(keys, k) + } + fs.Logf(f, "Updated config values: %s", strings.Join(keys, ", ")) + return nil, nil + default: + return nil, fs.ErrorCommandNotFound + } +} + +// Check the interfaces are satisfied +var ( + _ fs.Fs = &Fs{} + _ fs.PutStreamer = &Fs{} + _ fs.Object = &Object{} + _ fs.MimeTyper = &Object{} + _ fs.Commander = &Fs{} +) diff --git a/docs/content/h5ai.md b/docs/content/h5ai.md new file mode 100644 index 000000000..34f5b246d --- /dev/null +++ b/docs/content/h5ai.md @@ -0,0 +1,265 @@ +--- +title: "H5AI Remote" +description: "Read only remote for H5AI servers" +versionIntroduced: "v1.65" +--- + +# {{< icon "fa fa-globe" >}} H5AI + +The H5AI remote is a read only remote for reading files of a +webserver. The webserver should provide file listings which rclone +will read and turn into a remote. This has been tested with common +webservers such as Apache/Nginx/Caddy and will likely work with file +listings from most web servers. (If it doesn't then please file an +issue, or send a pull request!) + +Paths are specified as `remote:` or `remote:path`. + +The `remote:` represents the configured [url](#http-url), and any path following +it will be resolved relative to this url, according to the URL standard. This +means with remote url `https://beta.rclone.org/branch` and path `fix`, the +resolved URL will be `https://beta.rclone.org/branch/fix`, while with path +`/fix` the resolved URL will be `https://beta.rclone.org/fix` as the absolute +path is resolved from the root of the domain. + +If the path following the `remote:` ends with `/` it will be assumed to point +to a directory. If the path does not end with `/`, then a HEAD request is sent +and the response used to decide if it it is treated as a file or a directory +(run with `-vv` to see details). When [--http-no-head](#http-no-head) is +specified, a path without ending `/` is always assumed to be a file. If rclone +incorrectly assumes the path is a file, the solution is to specify the path with +ending `/`. When you know the path is a directory, ending it with `/` is always +better as it avoids the initial HEAD request. + +To just download a single file it is easier to use +[copyurl](/commands/rclone_copyurl/). + +## Configuration + +Here is an example of how to make a remote called `remote`. First +run: + + rclone config + +This will guide you through an interactive setup process: + +``` +No remotes found, make a new one? +n) New remote +s) Set configuration password +q) Quit config +n/s/q> n +name> remote +Type of storage to configure. +Choose a number from below, or type in your own value +[snip] +XX / H5Ai + \ "h5ai" +[snip] +Storage> h5ai +URL of http host to connect to +Choose a number from below, or type in your own value + 1 / Connect to example.com + \ "https://example.com" +url> https://beta.rclone.org +Remote config +-------------------- +[remote] +url = https://beta.rclone.org +-------------------- +y) Yes this is OK +e) Edit this remote +d) Delete this remote +y/e/d> y +Current remotes: + +Name Type +==== ==== +remote h5ai + +e) Edit existing remote +n) New remote +d) Delete remote +r) Rename remote +c) Copy remote +s) Set configuration password +q) Quit config +e/n/d/r/c/s/q> q +``` + +This remote is called `remote` and can now be used like this + +See all the top level directories + + rclone lsd remote: + +List the contents of a directory + + rclone ls remote:directory + +Sync the remote `directory` to `/home/local/directory`, deleting any excess files. + + rclone sync --interactive remote:directory /home/local/directory + +### Read only + +This remote is read only - you can't upload files to an HTTP server. + +### Modification times + +Most HTTP servers store time accurate to 1 second. + +### Checksum + +No checksums are stored. + +### Usage without a config file + +Since the http remote only has one config parameter it is easy to use +without a config file: + + rclone lsd --http-url https://beta.rclone.org :h5ai: + +or: + + rclone lsd :http,url='https://beta.rclone.org': + +{{< rem autogenerated options start" - DO NOT EDIT - instead edit fs.RegInfo in backend/h5ai/h5ai.go then run make backenddocs" >}} +### Standard options + +Here are the Standard options specific to http (HTTP). + +#### --http-url + +URL of HTTP host to connect to. + +E.g. "https://example.com", or "https://user:pass@example.com" to use a username and password. + +Properties: + +- Config: url +- Env Var: RCLONE_HTTP_URL +- Type: string +- Required: true + +### Advanced options + +Here are the Advanced options specific to http (HTTP). + +#### --http-headers + +Set HTTP headers for all transactions. + +Use this to set additional HTTP headers for all transactions. + +The input format is comma separated list of key,value pairs. Standard +[CSV encoding](https://godoc.org/encoding/csv) may be used. + +For example, to set a Cookie use 'Cookie,name=value', or '"Cookie","name=value"'. + +You can set multiple headers, e.g. '"Cookie","name=value","Authorization","xxx"'. + +Properties: + +- Config: headers +- Env Var: RCLONE_HTTP_HEADERS +- Type: CommaSepList +- Default: + +#### --http-no-slash + +Set this if the site doesn't end directories with /. + +Use this if your target website does not use / on the end of +directories. + +A / on the end of a path is how rclone normally tells the difference +between files and directories. If this flag is set, then rclone will +treat all files with Content-Type: text/html as directories and read +URLs from them rather than downloading them. + +Note that this may cause rclone to confuse genuine HTML files with +directories. + +Properties: + +- Config: no_slash +- Env Var: RCLONE_HTTP_NO_SLASH +- Type: bool +- Default: false + +#### --http-no-head + +Don't use HEAD requests. + +HEAD requests are mainly used to find file sizes in dir listing. +If your site is being very slow to load then you can try this option. +Normally rclone does a HEAD request for each potential file in a +directory listing to: + +- find its size +- check it really exists +- check to see if it is a directory + +If you set this option, rclone will not do the HEAD request. This will mean +that directory listings are much quicker, but rclone won't have the times or +sizes of any files, and some files that don't exist may be in the listing. + +Properties: + +- Config: no_head +- Env Var: RCLONE_HTTP_NO_HEAD +- Type: bool +- Default: false + +## Backend commands + +Here are the commands specific to the http backend. + +Run them with + + rclone backend COMMAND remote: + +The help below will explain what arguments each command takes. + +See the [backend](/commands/rclone_backend/) command for more +info on how to pass options and arguments. + +These can be run on a running backend using the rc command +[backend/command](/rc/#backend-command). + +### set + +Set command for updating the config parameters. + + rclone backend set remote: [options] [+] + +This set command can be used to update the config parameters +for a running http backend. + +Usage Examples: + + rclone backend set remote: [-o opt_name=opt_value] [-o opt_name2=opt_value2] + rclone rc backend/command command=set fs=remote: [-o opt_name=opt_value] [-o opt_name2=opt_value2] + rclone rc backend/command command=set fs=remote: -o url=https://example.com + +The option keys are named as they are in the config file. + +This rebuilds the connection to the http backend when it is called with +the new parameters. Only new parameters need be passed as the values +will default to those currently in use. + +It doesn't return anything. + + +{{< rem autogenerated options stop >}} + +## Limitations + +`rclone about` is not supported by the HTTP backend. Backends without +this capability cannot determine free space for an rclone mount or +use policy `mfs` (most free space) as a member of an rclone union +remote. + +See [List of backends that do not support rclone about](https://rclone.org/overview/#optional-features) and [rclone about](https://rclone.org/commands/rclone_about/) +