restic/internal/backend/sftp/sftp.go

597 lines
14 KiB
Go
Raw Normal View History

2015-03-28 11:50:23 +01:00
package sftp
2014-10-04 19:20:15 +02:00
import (
"bufio"
2017-06-03 17:39:57 +02:00
"context"
"crypto/rand"
"encoding/hex"
2014-10-04 19:20:15 +02:00
"fmt"
"hash"
2014-10-04 19:20:15 +02:00
"io"
"os"
"os/exec"
"path"
"time"
2014-10-04 19:20:15 +02:00
2017-07-23 14:21:03 +02:00
"github.com/restic/restic/internal/backend"
"github.com/restic/restic/internal/backend/layout"
"github.com/restic/restic/internal/backend/limiter"
"github.com/restic/restic/internal/backend/location"
"github.com/restic/restic/internal/backend/util"
2017-07-23 14:21:03 +02:00
"github.com/restic/restic/internal/debug"
"github.com/restic/restic/internal/errors"
"github.com/restic/restic/internal/feature"
"github.com/cenkalti/backoff/v4"
"github.com/pkg/sftp"
"golang.org/x/sync/errgroup"
2014-10-04 19:20:15 +02:00
)
2016-01-24 20:23:50 +01:00
// SFTP is a backend in a directory accessed via SFTP.
2014-10-04 19:20:15 +02:00
type SFTP struct {
c *sftp.Client
p string
2014-10-04 19:20:15 +02:00
cmd *exec.Cmd
result <-chan error
2017-04-10 22:40:24 +02:00
posixRename bool
layout.Layout
2017-04-10 22:41:06 +02:00
Config
util.Modes
2014-10-04 19:20:15 +02:00
}
var _ backend.Backend = &SFTP{}
2016-08-31 22:51:35 +02:00
var errTooShort = fmt.Errorf("file is too short")
func NewFactory() location.Factory {
return location.NewLimitedBackendFactory("sftp", ParseConfig, location.NoPassword, limiter.WrapBackendConstructor(Create), limiter.WrapBackendConstructor(Open))
}
func startClient(cfg Config) (*SFTP, error) {
program, args, err := buildSSHCommand(cfg)
if err != nil {
return nil, err
}
2017-04-03 21:05:42 +02:00
debug.Log("start client %v %v", program, args)
2014-10-04 19:20:15 +02:00
// Connect to a remote host and request the sftp subsystem via the 'ssh'
// command. This assumes that passwordless login is correctly configured.
cmd := exec.Command(program, args...)
// prefix the errors with the program name
stderr, err := cmd.StderrPipe()
if err != nil {
2016-08-29 21:54:50 +02:00
return nil, errors.Wrap(err, "cmd.StderrPipe")
}
go func() {
sc := bufio.NewScanner(stderr)
for sc.Scan() {
fmt.Fprintf(os.Stderr, "subprocess %v: %v\n", program, sc.Text())
}
}()
2014-10-04 19:20:15 +02:00
// get stdin and stdout
wr, err := cmd.StdinPipe()
if err != nil {
2016-08-29 21:54:50 +02:00
return nil, errors.Wrap(err, "cmd.StdinPipe")
2014-10-04 19:20:15 +02:00
}
rd, err := cmd.StdoutPipe()
if err != nil {
2016-08-29 21:54:50 +02:00
return nil, errors.Wrap(err, "cmd.StdoutPipe")
2014-10-04 19:20:15 +02:00
}
bg, err := util.StartForeground(cmd)
if err != nil {
2024-07-26 19:07:14 +02:00
if errors.Is(err, exec.ErrDot) {
return nil, errors.Errorf("cannot implicitly run relative executable %v found in current directory, use -o sftp.command=./<command> to override", cmd.Path)
}
return nil, err
2014-10-04 19:20:15 +02:00
}
// wait in a different goroutine
ch := make(chan error, 1)
go func() {
err := cmd.Wait()
2016-09-27 22:35:08 +02:00
debug.Log("ssh command exited, err %v", err)
for {
ch <- errors.Wrap(err, "ssh command exited")
}
}()
2014-10-04 19:20:15 +02:00
// open the SFTP session
client, err := sftp.NewClientPipe(rd, wr,
// write multiple packets (32kb) in parallel per file
// not strictly necessary as we use ReadFromWithConcurrency
sftp.UseConcurrentWrites(true),
// increase send buffer per file to 4MB
sftp.MaxConcurrentRequestsPerFile(128))
2014-10-04 19:20:15 +02:00
if err != nil {
return nil, errors.Errorf("unable to start the sftp session, error: %v", err)
2014-10-04 19:20:15 +02:00
}
err = bg()
if err != nil {
return nil, errors.Wrap(err, "bg")
}
_, posixRename := client.HasExtension("posix-rename@openssh.com")
2024-08-26 20:28:39 +02:00
return &SFTP{
c: client,
cmd: cmd,
result: ch,
posixRename: posixRename,
Layout: layout.NewDefaultLayout(cfg.Path, path.Join),
}, nil
2014-10-04 19:20:15 +02:00
}
// clientError returns an error if the client has exited. Otherwise, nil is
// returned immediately.
func (r *SFTP) clientError() error {
select {
case err := <-r.result:
2016-09-27 22:35:08 +02:00
debug.Log("client has exited with err %v", err)
return backoff.Permanent(err)
default:
}
return nil
}
2017-04-10 22:41:20 +02:00
// Open opens an sftp backend as described by the config by running
// "ssh" with the appropriate arguments (or cfg.Command, if set).
2024-08-26 21:16:22 +02:00
func Open(_ context.Context, cfg Config) (*SFTP, error) {
2017-04-10 22:41:20 +02:00
debug.Log("open backend with config %#v", cfg)
sftp, err := startClient(cfg)
if err != nil {
debug.Log("unable to start program: %v", err)
return nil, err
}
2024-08-26 21:16:22 +02:00
return open(sftp, cfg)
}
2017-04-10 22:41:20 +02:00
2024-08-26 21:16:22 +02:00
func open(sftp *SFTP, cfg Config) (*SFTP, error) {
fi, err := sftp.c.Stat(sftp.Layout.Filename(backend.Handle{Type: backend.ConfigFile}))
m := util.DeriveModesFromFileInfo(fi, err)
debug.Log("using (%03O file, %03O dir) permissions", m.File, m.Dir)
2017-04-10 22:41:20 +02:00
sftp.Config = cfg
sftp.p = cfg.Path
sftp.Modes = m
2014-10-04 19:20:15 +02:00
return sftp, nil
}
func (r *SFTP) mkdirAllDataSubdirs(ctx context.Context, nconn uint) error {
// Run multiple MkdirAll calls concurrently. These involve multiple
// round-trips and we do a lot of them, so this whole operation can be slow
// on high-latency links.
g, _ := errgroup.WithContext(ctx)
// Use errgroup's built-in semaphore, because r.sem is not initialized yet.
g.SetLimit(int(nconn))
2017-07-16 15:10:06 +02:00
for _, d := range r.Paths() {
d := d
g.Go(func() error {
// First try Mkdir. For most directories in Paths, this takes one
// round trip, not counting duplicate parent creations causes by
// concurrency. MkdirAll first does Stat, then recursive MkdirAll
// on the parent, so calls typically take three round trips.
if err := r.c.Mkdir(d); err == nil {
return nil
}
return r.c.MkdirAll(d)
})
2017-07-16 15:10:06 +02:00
}
return g.Wait()
2017-07-16 15:10:06 +02:00
}
// IsNotExist returns true if the error is caused by a not existing file.
func (r *SFTP) IsNotExist(err error) bool {
return errors.Is(err, os.ErrNotExist)
}
func (r *SFTP) IsPermanentError(err error) bool {
return r.IsNotExist(err) || errors.Is(err, errTooShort) || errors.Is(err, os.ErrPermission)
}
2017-04-10 22:41:20 +02:00
func buildSSHCommand(cfg Config) (cmd string, args []string, err error) {
if cfg.Command != "" {
2018-03-13 20:50:37 +01:00
args, err := backend.SplitShellStrings(cfg.Command)
if err != nil {
return "", nil, err
}
if cfg.Args != "" {
return "", nil, errors.New("cannot specify both sftp.command and sftp.args options")
}
2018-03-13 20:50:37 +01:00
return args[0], args[1:], nil
2017-04-10 22:41:20 +02:00
}
cmd = "ssh"
host, port := cfg.Host, cfg.Port
args = []string{host}
if port != "" {
args = append(args, "-p", port)
}
2015-12-28 18:22:19 +01:00
if cfg.User != "" {
args = append(args, "-l", cfg.User)
}
if cfg.Args != "" {
a, err := backend.SplitShellStrings(cfg.Args)
if err != nil {
return "", nil, err
}
args = append(args, a...)
2015-12-28 18:22:19 +01:00
}
args = append(args, "-s", "sftp")
2017-04-10 22:41:20 +02:00
return cmd, args, nil
2015-12-28 18:22:19 +01:00
}
// Create creates an sftp backend as described by the config by running "ssh"
// with the appropriate arguments (or cfg.Command, if set).
func Create(ctx context.Context, cfg Config) (*SFTP, error) {
sftp, err := startClient(cfg)
2017-04-03 21:05:42 +02:00
if err != nil {
2017-04-10 22:41:20 +02:00
debug.Log("unable to start program: %v", err)
2017-04-03 21:05:42 +02:00
return nil, err
}
sftp.Modes = util.DefaultModes
2015-05-04 20:39:45 +02:00
// test if config file already exists
_, err = sftp.c.Lstat(sftp.Layout.Filename(backend.Handle{Type: backend.ConfigFile}))
2014-10-04 19:20:15 +02:00
if err == nil {
return nil, errors.New("config file already exists")
}
2017-04-19 18:56:01 +02:00
// create paths for data and refs
if err = sftp.mkdirAllDataSubdirs(ctx, cfg.Connections); err != nil {
2017-07-16 15:10:06 +02:00
return nil, err
2014-10-04 19:20:15 +02:00
}
// repurpose existing connection
2024-08-26 21:16:22 +02:00
return open(sftp, cfg)
2015-12-28 18:22:19 +01:00
}
func (r *SFTP) Connections() uint {
return r.Config.Connections
}
// Hasher may return a hash function for calculating a content hash for the backend
func (r *SFTP) Hasher() hash.Hash {
return nil
}
// HasAtomicReplace returns whether Save() can atomically replace files
func (r *SFTP) HasAtomicReplace() bool {
return r.posixRename
}
// tempSuffix generates a random string suffix that should be sufficiently long
2023-12-06 13:11:55 +01:00
// to avoid accidental conflicts
func tempSuffix() string {
var nonce [16]byte
_, err := rand.Read(nonce[:])
if err != nil {
panic(err)
}
return hex.EncodeToString(nonce[:])
}
2016-01-24 01:15:35 +01:00
// Save stores data in the backend at the handle.
func (r *SFTP) Save(_ context.Context, h backend.Handle, rd backend.RewindReader) error {
if err := r.clientError(); err != nil {
return err
}
2017-04-10 22:40:24 +02:00
filename := r.Filename(h)
tmpFilename := filename + "-restic-temp-" + tempSuffix()
dirname := r.Dirname(h)
2017-04-10 22:40:24 +02:00
// create new file
f, err := r.c.OpenFile(tmpFilename, os.O_CREATE|os.O_EXCL|os.O_WRONLY)
if r.IsNotExist(err) {
// error is caused by a missing directory, try to create it
mkdirErr := r.c.MkdirAll(r.Dirname(h))
if mkdirErr != nil {
debug.Log("error creating dir %v: %v", r.Dirname(h), mkdirErr)
} else {
// try again
f, err = r.c.OpenFile(tmpFilename, os.O_CREATE|os.O_EXCL|os.O_WRONLY)
}
2017-04-10 22:40:24 +02:00
}
// pkg/sftp doesn't allow creating with a mode.
// Chmod while the file is still empty.
if err == nil {
err = f.Chmod(r.Modes.File)
}
if err != nil {
return errors.Wrap(err, "OpenFile")
2017-04-10 22:40:24 +02:00
}
defer func() {
if err == nil {
return
}
// Try not to leave a partial file behind.
rmErr := r.c.Remove(f.Name())
if rmErr != nil {
debug.Log("sftp: failed to remove broken file %v: %v",
f.Name(), rmErr)
}
}()
// save data, make sure to use the optimized sftp upload method
wbytes, err := f.ReadFromWithConcurrency(rd, 0)
2017-04-10 22:40:24 +02:00
if err != nil {
2017-05-11 21:53:57 +02:00
_ = f.Close()
err = r.checkNoSpace(dirname, rd.Length(), err)
return errors.Wrap(err, "Write")
}
// sanity check
if wbytes != rd.Length() {
_ = f.Close()
return errors.Errorf("wrote %d bytes instead of the expected %d bytes", wbytes, rd.Length())
}
err = f.Close()
if err != nil {
return errors.Wrap(err, "Close")
}
// Prefer POSIX atomic rename if available.
if r.posixRename {
err = r.c.PosixRename(tmpFilename, filename)
} else {
err = r.c.Rename(tmpFilename, filename)
}
return errors.Wrap(err, "Rename")
}
// checkNoSpace checks if err was likely caused by lack of available space
// on the remote, and if so, makes it permanent.
func (r *SFTP) checkNoSpace(dir string, size int64, origErr error) error {
// The SFTP protocol has a message for ENOSPC,
// but pkg/sftp doesn't export it and OpenSSH's sftp-server
// sends FX_FAILURE instead.
e, ok := origErr.(*sftp.StatusError)
_, hasExt := r.c.HasExtension("statvfs@openssh.com")
if !ok || e.FxCode() != sftp.ErrSSHFxFailure || !hasExt {
return origErr
2017-04-10 22:40:24 +02:00
}
fsinfo, err := r.c.StatVFS(dir)
if err != nil {
debug.Log("sftp: StatVFS returned %v", err)
return origErr
}
if fsinfo.Favail == 0 || fsinfo.Frsize*fsinfo.Bavail < uint64(size) {
err := errors.New("sftp: no space left on device")
return backoff.Permanent(err)
}
return origErr
2016-01-24 01:15:35 +01:00
}
// Load runs fn with a reader that yields the contents of the file at h at the
// given offset.
func (r *SFTP) Load(ctx context.Context, h backend.Handle, length int, offset int64, fn func(rd io.Reader) error) error {
if err := r.clientError(); err != nil {
return err
}
return util.DefaultLoad(ctx, h, length, offset, r.openReader, func(rd io.Reader) error {
if length == 0 || !feature.Flag.Enabled(feature.BackendErrorRedesign) {
return fn(rd)
}
// there is no direct way to efficiently check whether the file is too short
// rd is already a LimitedReader which can be used to track the number of bytes read
err := fn(rd)
// check the underlying reader to be agnostic to however fn() handles the returned error
_, rderr := rd.Read([]byte{0})
if rderr == io.EOF && rd.(*util.LimitedReadCloser).N != 0 {
// file is too short
return fmt.Errorf("%w: %v", errTooShort, err)
}
return err
})
}
func (r *SFTP) openReader(_ context.Context, h backend.Handle, length int, offset int64) (io.ReadCloser, error) {
f, err := r.c.Open(r.Filename(h))
2017-01-22 22:01:12 +01:00
if err != nil {
return nil, err
}
if offset > 0 {
_, err = f.Seek(offset, 0)
if err != nil {
_ = f.Close()
2017-01-22 22:01:12 +01:00
return nil, err
}
}
if length > 0 {
// unlimited reads usually use io.Copy which needs WriteTo support at the underlying reader
// limited reads are usually combined with io.ReadFull which reads all required bytes into a buffer in one go
return util.LimitReadCloser(f, int64(length)), nil
2017-01-22 22:01:12 +01:00
}
return f, nil
2017-01-22 22:01:12 +01:00
}
2016-01-23 23:27:58 +01:00
// Stat returns information about a blob.
func (r *SFTP) Stat(_ context.Context, h backend.Handle) (backend.FileInfo, error) {
if err := r.clientError(); err != nil {
return backend.FileInfo{}, err
}
fi, err := r.c.Lstat(r.Filename(h))
2016-01-23 23:27:58 +01:00
if err != nil {
return backend.FileInfo{}, errors.Wrap(err, "Lstat")
2016-01-23 23:27:58 +01:00
}
return backend.FileInfo{Size: fi.Size(), Name: h.Name}, nil
2016-01-23 23:27:58 +01:00
}
2015-03-28 11:50:23 +01:00
// Remove removes the content stored at name.
func (r *SFTP) Remove(_ context.Context, h backend.Handle) error {
if err := r.clientError(); err != nil {
return err
}
return r.c.Remove(r.Filename(h))
2014-10-04 19:20:15 +02:00
}
// List runs fn for each file in the backend which has the type t. When an
// error occurs (or fn returns an error), List stops and returns it.
func (r *SFTP) List(ctx context.Context, t backend.FileType, fn func(backend.FileInfo) error) error {
if err := r.clientError(); err != nil {
return err
}
basedir, subdirs := r.Basedir(t)
walker := r.c.Walk(basedir)
for {
ok := walker.Step()
if !ok {
break
}
if walker.Err() != nil {
if r.IsNotExist(walker.Err()) {
debug.Log("ignoring non-existing directory")
return nil
}
return walker.Err()
}
if walker.Path() == basedir {
continue
}
if walker.Stat().IsDir() && !subdirs {
walker.SkipDir()
continue
}
fi := walker.Stat()
if !fi.Mode().IsRegular() {
continue
}
debug.Log("send %v\n", path.Base(walker.Path()))
rfi := backend.FileInfo{
Name: path.Base(walker.Path()),
Size: fi.Size(),
}
if ctx.Err() != nil {
return ctx.Err()
2014-10-04 19:20:15 +02:00
}
2015-03-28 11:50:23 +01:00
err := fn(rfi)
if err != nil {
return err
}
if ctx.Err() != nil {
return ctx.Err()
}
}
2014-10-04 19:20:15 +02:00
return ctx.Err()
2014-10-04 19:20:15 +02:00
}
var closeTimeout = 2 * time.Second
2014-10-04 19:20:15 +02:00
// Close closes the sftp connection and terminates the underlying command.
2016-01-24 20:23:50 +01:00
func (r *SFTP) Close() error {
if r == nil {
return nil
}
err := r.c.Close()
2016-09-27 22:35:08 +02:00
debug.Log("Close returned error %v", err)
// wait for closeTimeout before killing the process
select {
case err := <-r.result:
return err
case <-time.After(closeTimeout):
}
2016-01-24 20:23:50 +01:00
if err := r.cmd.Process.Kill(); err != nil {
return err
}
// get the error, but ignore it
<-r.result
return nil
2014-10-04 19:20:15 +02:00
}
func (r *SFTP) deleteRecursive(ctx context.Context, name string) error {
2024-08-26 21:16:22 +02:00
entries, err := r.c.ReadDir(name)
2017-10-14 16:08:15 +02:00
if err != nil {
2024-08-26 21:16:22 +02:00
return errors.Wrapf(err, "ReadDir(%v)", name)
2017-10-14 16:08:15 +02:00
}
for _, fi := range entries {
2024-07-31 19:30:47 +02:00
if ctx.Err() != nil {
return ctx.Err()
}
2024-08-26 20:28:39 +02:00
itemName := path.Join(name, fi.Name())
2017-10-14 16:08:15 +02:00
if fi.IsDir() {
err := r.deleteRecursive(ctx, itemName)
2017-10-14 16:08:15 +02:00
if err != nil {
return errors.Wrap(err, "ReadDir")
}
err = r.c.RemoveDirectory(itemName)
if err != nil {
return errors.Wrap(err, "RemoveDirectory")
}
continue
}
err := r.c.Remove(itemName)
if err != nil {
return errors.Wrap(err, "ReadDir")
}
}
return nil
}
// Delete removes all data in the backend.
func (r *SFTP) Delete(ctx context.Context) error {
return r.deleteRecursive(ctx, r.p)
}
feat(backends/s3): add warmup support before repacks and restores (#5173) * feat(backends/s3): add warmup support before repacks and restores This commit introduces basic support for transitioning pack files stored in cold storage to hot storage on S3 and S3-compatible providers. To prevent unexpected behavior for existing users, the feature is gated behind new flags: - `s3.enable-restore`: opt-in flag (defaults to false) - `s3.restore-days`: number of days for the restored objects to remain in hot storage (defaults to `7`) - `s3.restore-timeout`: maximum time to wait for a single restoration (default to `1 day`) - `s3.restore-tier`: retrieval tier at which the restore will be processed. (default to `Standard`) As restoration times can be lengthy, this implementation preemptively restores selected packs to prevent incessant restore-delays during downloads. This is slightly sub-optimal as we could process packs out-of-order (as soon as they're transitioned), but this would really add too much complexity for a marginal gain in speed. To maintain simplicity and prevent resources exhautions with lots of packs, no new concurrency mechanisms or goroutines were added. This just hooks gracefully into the existing routines. **Limitations:** - Tests against the backend were not written due to the lack of cold storage class support in MinIO. Testing was done manually on Scaleway's S3-compatible object storage. If necessary, we could explore testing with LocalStack or mocks, though this requires further discussion. - Currently, this feature only warms up before restores and repacks (prune/copy), as those are the two main use-cases I came across. Support for other commands may be added in future iterations, as long as affected packs can be calculated in advance. - The feature is gated behind a new alpha `s3-restore` feature flag to make it explicit that the feature is still wet behind the ears. - There is no explicit user notification for ongoing pack restorations. While I think it is not necessary because of the opt-in flag, showing some notice may improve usability (but would probably require major refactoring in the progress bar which I didn't want to start). Another possibility would be to add a flag to send restores requests and fail early. See https://github.com/restic/restic/issues/3202 * ui: warn user when files are warming up from cold storage * refactor: remove the PacksWarmer struct It's easier to handle multiple handles in the backend directly, and it may open the door to reducing the number of requests made to the backend in the future.
2025-02-01 19:26:27 +01:00
// Warmup not implemented
func (r *SFTP) Warmup(_ context.Context, _ []backend.Handle) ([]backend.Handle, error) {
return []backend.Handle{}, nil
}
func (r *SFTP) WarmupWait(_ context.Context, _ []backend.Handle) error { return nil }