From fe60db404acc581dcadf400a9fb8a41cecb4bb09 Mon Sep 17 00:00:00 2001 From: Chris Lu Date: Tue, 23 Jun 2020 09:12:02 -0700 Subject: [PATCH] prepare for zstd --- weed/operation/upload_content.go | 8 ++--- weed/server/volume_server_handlers_read.go | 22 ++++++------- weed/storage/needle/needle_parse_upload.go | 5 ++- weed/util/compression.go | 38 ++++++++++++---------- 4 files changed, 38 insertions(+), 35 deletions(-) diff --git a/weed/operation/upload_content.go b/weed/operation/upload_content.go index 1ed240836..05b016457 100644 --- a/weed/operation/upload_content.go +++ b/weed/operation/upload_content.go @@ -98,7 +98,7 @@ func doUploadData(uploadUrl string, filename string, cipher bool, data []byte, i mtype = "" } } - if shouldBeZipped, iAmSure := util.IsGzippableFileType(filepath.Base(filename), mtype); iAmSure && shouldBeZipped { + if shouldBeCompressed, iAmSure := util.IsCompressableFileType(filepath.Base(filename), mtype); iAmSure && shouldBeCompressed { shouldGzipNow = true } else if !iAmSure && mtype == "" && len(data) > 128 { var compressed []byte @@ -142,7 +142,7 @@ func doUploadData(uploadUrl string, filename string, cipher bool, data []byte, i uploadResult, err = upload_content(uploadUrl, func(w io.Writer) (err error) { _, err = w.Write(encryptedData) return - }, "", false, "", nil, jwt) + }, "", false, len(encryptedData), "", nil, jwt) if uploadResult != nil { uploadResult.Name = filename uploadResult.Mime = mtype @@ -153,7 +153,7 @@ func doUploadData(uploadUrl string, filename string, cipher bool, data []byte, i uploadResult, err = upload_content(uploadUrl, func(w io.Writer) (err error) { _, err = w.Write(data) return - }, filename, contentIsGzipped, mtype, pairMap, jwt) + }, filename, contentIsGzipped, 0, mtype, pairMap, jwt) } if uploadResult == nil { @@ -168,7 +168,7 @@ func doUploadData(uploadUrl string, filename string, cipher bool, data []byte, i return uploadResult, err } -func upload_content(uploadUrl string, fillBufferFunction func(w io.Writer) error, filename string, isGzipped bool, mtype string, pairMap map[string]string, jwt security.EncodedJwt) (*UploadResult, error) { +func upload_content(uploadUrl string, fillBufferFunction func(w io.Writer) error, filename string, isGzipped bool, originalDataSize int, mtype string, pairMap map[string]string, jwt security.EncodedJwt) (*UploadResult, error) { body_buf := bytes.NewBufferString("") body_writer := multipart.NewWriter(body_buf) h := make(textproto.MIMEHeader) diff --git a/weed/server/volume_server_handlers_read.go b/weed/server/volume_server_handlers_read.go index aff1eab8d..7ef1170b3 100644 --- a/weed/server/volume_server_handlers_read.go +++ b/weed/server/volume_server_handlers_read.go @@ -142,22 +142,20 @@ func (vs *VolumeServer) GetOrHeadHandler(w http.ResponseWriter, r *http.Request) } } - if ext != ".gz" { + if ext != ".gz" && ext != ".zst" { if n.IsCompressed() { - if strings.Contains(r.Header.Get("Accept-Encoding"), "gzip") { - if _, _, _, shouldResize := shouldResizeImages(ext, r); shouldResize { - if n.Data, err = util.DecompressData(n.Data); err != nil { - glog.V(0).Infoln("ungzip error:", err, r.URL.Path) - } - } else { - if util.IsGzippedContent(n.Data) { - w.Header().Set("Content-Encoding", "gzip") - } - } - } else { + if _, _, _, shouldResize := shouldResizeImages(ext, r); shouldResize { if n.Data, err = util.DecompressData(n.Data); err != nil { glog.V(0).Infoln("ungzip error:", err, r.URL.Path) } + } else if strings.Contains(r.Header.Get("Accept-Encoding"), "zstd") && util.IsZstdContent(n.Data) { + w.Header().Set("Content-Encoding", "zstd") + } else if strings.Contains(r.Header.Get("Accept-Encoding"), "gzip") && util.IsGzippedContent(n.Data) { + w.Header().Set("Content-Encoding", "gzip") + } else { + if n.Data, err = util.DecompressData(n.Data); err != nil { + glog.V(0).Infoln("uncompress error:", err, r.URL.Path) + } } } } diff --git a/weed/storage/needle/needle_parse_upload.go b/weed/storage/needle/needle_parse_upload.go index 67b798821..dcbfd3819 100644 --- a/weed/storage/needle/needle_parse_upload.go +++ b/weed/storage/needle/needle_parse_upload.go @@ -21,6 +21,7 @@ type ParsedUpload struct { MimeType string PairMap map[string]string IsGzipped bool + IsZstd bool OriginalDataSize int ModifiedTime uint64 Ttl *TTL @@ -67,7 +68,7 @@ func ParseUpload(r *http.Request, sizeLimit int64) (pu *ParsedUpload, e error) { } } else { ext := filepath.Base(pu.FileName) - if shouldGzip, iAmSure := util.IsGzippableFileType(ext, pu.MimeType); pu.MimeType == "" && !iAmSure || shouldGzip && iAmSure { + if shouldBeCompressed, iAmSure := util.IsCompressableFileType(ext, pu.MimeType); pu.MimeType == "" && !iAmSure || shouldBeCompressed && iAmSure { // println("ext", ext, "iAmSure", iAmSure, "shouldGzip", shouldGzip, "mimeType", pu.MimeType) if compressedData, err := util.GzipData(pu.Data); err == nil { if len(compressedData)*10 < len(pu.Data)*9 { @@ -83,6 +84,7 @@ func ParseUpload(r *http.Request, sizeLimit int64) (pu *ParsedUpload, e error) { func parsePut(r *http.Request, sizeLimit int64, pu *ParsedUpload) (e error) { pu.IsGzipped = r.Header.Get("Content-Encoding") == "gzip" + pu.IsZstd = r.Header.Get("Content-Encoding") == "zstd" pu.MimeType = r.Header.Get("Content-Type") pu.FileName = "" pu.Data, e = ioutil.ReadAll(io.LimitReader(r.Body, sizeLimit+1)) @@ -176,6 +178,7 @@ func parseMultipart(r *http.Request, sizeLimit int64, pu *ParsedUpload) (e error } pu.IsGzipped = part.Header.Get("Content-Encoding") == "gzip" + pu.IsZstd = part.Header.Get("Content-Encoding") == "zstd" } return diff --git a/weed/util/compression.go b/weed/util/compression.go index 003f06cbf..de6bf0800 100644 --- a/weed/util/compression.go +++ b/weed/util/compression.go @@ -8,9 +8,8 @@ import ( "io/ioutil" "strings" - "golang.org/x/tools/godoc/util" - "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/klauspost/compress/zstd" ) func GzipData(input []byte) ([]byte, error) { @@ -30,6 +29,9 @@ func DecompressData(input []byte) ([]byte, error) { if IsGzippedContent(input) { return ungzipData(input) } + if IsZstdContent(input) { + return unzstdData(input) + } return nil, fmt.Errorf("unsupported compression") } @@ -44,19 +46,10 @@ func ungzipData(input []byte) ([]byte, error) { return output, err } -/* -* Default more not to gzip since gzip can be done on client side. - */ -func IsGzippable(ext, mtype string, data []byte) bool { +var zstdEncoder, _ = zstd.NewWriter(nil) - shouldBeZipped, iAmSure := IsGzippableFileType(ext, mtype) - if iAmSure { - return shouldBeZipped - } - - isMostlyText := util.IsText(data) - - return isMostlyText +func unzstdData(input []byte) ([]byte, error) { + return zstdEncoder.EncodeAll(input, nil), nil } func IsGzippedContent(data []byte) bool { @@ -66,9 +59,16 @@ func IsGzippedContent(data []byte) bool { return data[0] == 31 && data[1] == 139 } +func IsZstdContent(data []byte) bool { + if len(data) < 4 { + return false + } + return data[0] == 0xFD && data[1] == 0x2F && data[2] == 0xB5 && data[3] == 0x28 +} + /* -* Default more not to gzip since gzip can be done on client side. - */func IsGzippableFileType(ext, mtype string) (shouldBeZipped, iAmSure bool) { +* Default not to compressed since compression can be done on client side. + */func IsCompressableFileType(ext, mtype string) (shouldBeCompressed, iAmSure bool) { // text if strings.HasPrefix(mtype, "text/") { @@ -86,7 +86,7 @@ func IsGzippedContent(data []byte) bool { // by file name extension switch ext { - case ".zip", ".rar", ".gz", ".bz2", ".xz": + case ".zip", ".rar", ".gz", ".bz2", ".xz", ".zst": return false, true case ".pdf", ".txt", ".html", ".htm", ".css", ".js", ".json": return true, true @@ -98,13 +98,15 @@ func IsGzippedContent(data []byte) bool { // by mime type if strings.HasPrefix(mtype, "application/") { + if strings.HasSuffix(mtype, "zstd") { + return false, true + } if strings.HasSuffix(mtype, "xml") { return true, true } if strings.HasSuffix(mtype, "script") { return true, true } - } if strings.HasPrefix(mtype, "audio/") {