Add PutMetadata function to storage backends (#171)

* Add PutMetadata function to storage backends

This function is not currently used, but it will be useful for helper
scripts that need to regenerate metadata on the fly, especially scripts
to migrate between storage backends. In the future, we can also use it
to automatically regenerate metadata if it is found to be missing or
corrupted.

* Add PutMetadata function to storage backend interface and
  implementations
* Rework metadata generation to be more efficient and work better with
  the PutMetadata function
* Add a basic test for metadata generation

* Change PutMetadata to take a Metadata type instead

It's unlikely that this function is useful if it always regenerates the
metadata. Instead, the caller should do that if it needs.
This commit is contained in:
mutantmonkey 2019-04-09 13:28:18 -07:00 committed by Andrei Marcu
parent 2c0b2b2e79
commit 8098b7e39e
5 changed files with 119 additions and 45 deletions

View File

@ -126,11 +126,16 @@ func (b LocalfsBackend) Put(key string, r io.Reader, expiry time.Time, deleteKey
return m, err return m, err
} }
dst.Seek(0 ,0)
m, err = helpers.GenerateMetadata(dst)
if err != nil {
os.Remove(filePath)
return
}
dst.Seek(0 ,0)
m.Expiry = expiry m.Expiry = expiry
m.DeleteKey = deleteKey m.DeleteKey = deleteKey
m.Size = bytes
m.Mimetype, _ = helpers.DetectMime(dst)
m.Sha256sum, _ = helpers.Sha256sum(dst)
m.ArchiveFiles, _ = helpers.ListArchiveFiles(m.Mimetype, m.Size, dst) m.ArchiveFiles, _ = helpers.ListArchiveFiles(m.Mimetype, m.Size, dst)
err = b.writeMetadata(key, m) err = b.writeMetadata(key, m)
@ -142,6 +147,15 @@ func (b LocalfsBackend) Put(key string, r io.Reader, expiry time.Time, deleteKey
return return
} }
func (b LocalfsBackend) PutMetadata(key string, m backends.Metadata) (err error) {
err = b.writeMetadata(key, m)
if err != nil {
return
}
return
}
func (b LocalfsBackend) Size(key string) (int64, error) { func (b LocalfsBackend) Size(key string) (int64, error) {
fileInfo, err := os.Stat(path.Join(b.filesPath, key)) fileInfo, err := os.Stat(path.Join(b.filesPath, key))
if err != nil { if err != nil {

View File

@ -18,13 +18,13 @@ import (
type S3Backend struct { type S3Backend struct {
bucket string bucket string
svc *s3.S3 svc *s3.S3
} }
func (b S3Backend) Delete(key string) error { func (b S3Backend) Delete(key string) error {
_, err := b.svc.DeleteObject(&s3.DeleteObjectInput{ _, err := b.svc.DeleteObject(&s3.DeleteObjectInput{
Bucket: aws.String(b.bucket), Bucket: aws.String(b.bucket),
Key: aws.String(key), Key: aws.String(key),
}) })
if err != nil { if err != nil {
return err return err
@ -35,7 +35,7 @@ func (b S3Backend) Delete(key string) error {
func (b S3Backend) Exists(key string) (bool, error) { func (b S3Backend) Exists(key string) (bool, error) {
_, err := b.svc.HeadObject(&s3.HeadObjectInput{ _, err := b.svc.HeadObject(&s3.HeadObjectInput{
Bucket: aws.String(b.bucket), Bucket: aws.String(b.bucket),
Key: aws.String(key), Key: aws.String(key),
}) })
return err == nil, err return err == nil, err
} }
@ -44,7 +44,7 @@ func (b S3Backend) Head(key string) (metadata backends.Metadata, err error) {
var result *s3.HeadObjectOutput var result *s3.HeadObjectOutput
result, err = b.svc.HeadObject(&s3.HeadObjectInput{ result, err = b.svc.HeadObject(&s3.HeadObjectInput{
Bucket: aws.String(b.bucket), Bucket: aws.String(b.bucket),
Key: aws.String(key), Key: aws.String(key),
}) })
if err != nil { if err != nil {
if aerr, ok := err.(awserr.Error); ok { if aerr, ok := err.(awserr.Error); ok {
@ -63,7 +63,7 @@ func (b S3Backend) Get(key string) (metadata backends.Metadata, r io.ReadCloser,
var result *s3.GetObjectOutput var result *s3.GetObjectOutput
result, err = b.svc.GetObject(&s3.GetObjectInput{ result, err = b.svc.GetObject(&s3.GetObjectInput{
Bucket: aws.String(b.bucket), Bucket: aws.String(b.bucket),
Key: aws.String(key), Key: aws.String(key),
}) })
if err != nil { if err != nil {
if aerr, ok := err.(awserr.Error); ok { if aerr, ok := err.(awserr.Error); ok {
@ -81,11 +81,11 @@ func (b S3Backend) Get(key string) (metadata backends.Metadata, r io.ReadCloser,
func mapMetadata(m backends.Metadata) map[string]*string { func mapMetadata(m backends.Metadata) map[string]*string {
return map[string]*string{ return map[string]*string{
"Expiry": aws.String(strconv.FormatInt(m.Expiry.Unix(), 10)), "Expiry": aws.String(strconv.FormatInt(m.Expiry.Unix(), 10)),
"Delete_key": aws.String(m.DeleteKey), "Delete_key": aws.String(m.DeleteKey),
"Size": aws.String(strconv.FormatInt(m.Size, 10)), "Size": aws.String(strconv.FormatInt(m.Size, 10)),
"Mimetype": aws.String(m.Mimetype), "Mimetype": aws.String(m.Mimetype),
"Sha256sum": aws.String(m.Sha256sum), "Sha256sum": aws.String(m.Sha256sum),
} }
} }
@ -122,19 +122,20 @@ func (b S3Backend) Put(key string, r io.Reader, expiry time.Time, deleteKey stri
return m, err return m, err
} }
m, err = helpers.GenerateMetadata(r)
if err != nil {
return
}
m.Expiry = expiry m.Expiry = expiry
m.DeleteKey = deleteKey m.DeleteKey = deleteKey
m.Size = bytes
m.Mimetype, _ = helpers.DetectMime(tmpDst)
m.Sha256sum, _ = helpers.Sha256sum(tmpDst)
// XXX: we may not be able to write this to AWS easily // XXX: we may not be able to write this to AWS easily
//m.ArchiveFiles, _ = helpers.ListArchiveFiles(m.Mimetype, m.Size, tmpDst) //m.ArchiveFiles, _ = helpers.ListArchiveFiles(m.Mimetype, m.Size, tmpDst)
uploader := s3manager.NewUploaderWithClient(b.svc) uploader := s3manager.NewUploaderWithClient(b.svc)
input := &s3manager.UploadInput{ input := &s3manager.UploadInput{
Bucket: aws.String(b.bucket), Bucket: aws.String(b.bucket),
Key: aws.String(key), Key: aws.String(key),
Body: tmpDst, Body: tmpDst,
Metadata: mapMetadata(m), Metadata: mapMetadata(m),
} }
_, err = uploader.Upload(input) _, err = uploader.Upload(input)
@ -145,10 +146,24 @@ func (b S3Backend) Put(key string, r io.Reader, expiry time.Time, deleteKey stri
return return
} }
func (b S3Backend) PutMetadata(key string, m backends.Metadata) (err error) {
_, err = b.svc.CopyObject(&s3.CopyObjectInput{
Bucket: aws.String(b.bucket),
Key: aws.String(key),
CopySource: aws.String("/" + b.bucket + "/" + key),
Metadata: mapMetadata(m),
})
if err != nil {
return
}
return
}
func (b S3Backend) Size(key string) (int64, error) { func (b S3Backend) Size(key string) (int64, error) {
input := &s3.HeadObjectInput{ input := &s3.HeadObjectInput{
Bucket: aws.String(b.bucket), Bucket: aws.String(b.bucket),
Key: aws.String(key), Key: aws.String(key),
} }
result, err := b.svc.HeadObject(input) result, err := b.svc.HeadObject(input)
if err != nil { if err != nil {
@ -169,7 +184,6 @@ func (b S3Backend) List() ([]string, error) {
return nil, err return nil, err
} }
for _, object := range results.Contents { for _, object := range results.Contents {
output = append(output, *object.Key) output = append(output, *object.Key)
} }

View File

@ -12,6 +12,7 @@ type StorageBackend interface {
Head(key string) (Metadata, error) Head(key string) (Metadata, error)
Get(key string) (Metadata, io.ReadCloser, error) Get(key string) (Metadata, io.ReadCloser, error)
Put(key string, r io.Reader, expiry time.Time, deleteKey string) (Metadata, error) Put(key string, r io.Reader, expiry time.Time, deleteKey string) (Metadata, error)
PutMetadata(key string, m Metadata) error
Size(key string) (int64, error) Size(key string) (int64, error)
} }

View File

@ -1,49 +1,65 @@
package helpers package helpers
import ( import (
"bytes"
"encoding/hex" "encoding/hex"
"io" "io"
"unicode" "unicode"
"github.com/andreimarcu/linx-server/backends"
"github.com/minio/sha256-simd" "github.com/minio/sha256-simd"
"gopkg.in/h2non/filetype.v1" "gopkg.in/h2non/filetype.v1"
) )
func DetectMime(r io.ReadSeeker) (string, error) { func GenerateMetadata(r io.Reader) (m backends.Metadata, err error) {
// Since we don't have the ability to seek within a file, we can use a
// Buffer in combination with a TeeReader to keep a copy of the bytes
// we read when detecting the file type. These bytes are still needed
// to hash the file and determine its size and cannot be discarded.
var buf bytes.Buffer
teeReader := io.TeeReader(r, &buf)
// Get first 512 bytes for mimetype detection // Get first 512 bytes for mimetype detection
header := make([]byte, 512) header := make([]byte, 512)
_, err = teeReader.Read(header)
if err != nil {
return
}
r.Seek(0, 0) // Create a Hash and a MultiReader that includes the Buffer we created
r.Read(header) // above along with the original Reader, which will have the rest of
r.Seek(0, 0) // the file.
hasher := sha256.New()
multiReader := io.MultiReader(&buf, r)
// Copy everything into the Hash, then use the number of bytes written
// as the file size.
var readLen int64
readLen, err = io.Copy(hasher, multiReader)
if err != nil {
return
} else {
m.Size += readLen
}
// Get the hex-encoded string version of the Hash checksum
m.Sha256sum = hex.EncodeToString(hasher.Sum(nil))
// Use the bytes we extracted earlier and attempt to determine the file
// type
kind, err := filetype.Match(header) kind, err := filetype.Match(header)
if err != nil { if err != nil {
return "application/octet-stream", err m.Mimetype = "application/octet-stream"
return m, err
} else if kind.MIME.Value != "" { } else if kind.MIME.Value != "" {
return kind.MIME.Value, nil m.Mimetype = kind.MIME.Value
} } else if printable(header) {
m.Mimetype = "text/plain"
// Check if the file seems anything like text
if printable(header) {
return "text/plain", nil
} else { } else {
return "application/octet-stream", nil m.Mimetype = "application/octet-stream"
}
}
func Sha256sum(r io.ReadSeeker) (string, error) {
hasher := sha256.New()
r.Seek(0, 0)
_, err := io.Copy(hasher, r)
if err != nil {
return "", err
} }
r.Seek(0, 0) return
return hex.EncodeToString(hasher.Sum(nil)), nil
} }
func printable(data []byte) bool { func printable(data []byte) bool {

29
helpers/helpers_test.go Normal file
View File

@ -0,0 +1,29 @@
package helpers
import (
"strings"
"testing"
)
func TestGenerateMetadata(t *testing.T) {
r := strings.NewReader("This is my test content")
m, err := GenerateMetadata(r)
if err != nil {
t.Fatal(err)
}
expectedSha256sum := "966152d20a77e739716a625373ee15af16e8f4aec631a329a27da41c204b0171"
if m.Sha256sum != expectedSha256sum {
t.Fatalf("Sha256sum was %q instead of expected value of %q", m.Sha256sum, expectedSha256sum)
}
expectedMimetype := "text/plain"
if m.Mimetype != expectedMimetype {
t.Fatalf("Mimetype was %q instead of expected value of %q", m.Mimetype, expectedMimetype)
}
expectedSize := int64(23)
if m.Size != expectedSize {
t.Fatalf("Size was %d instead of expected value of %d", m.Size, expectedSize)
}
}