1
mirror of https://github.com/rclone/rclone synced 2025-01-13 17:06:24 +01:00

pikpak: implement custom hash to replace wrong sha1

This improves PikPak's file integrity verification by implementing a custom 
hash function named gcid and replacing the previously used SHA-1 hash.
This commit is contained in:
wiserain 2024-06-20 00:57:21 +09:00 committed by GitHub
parent cbccad9491
commit 300851e8bf
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 63 additions and 44 deletions

View File

@ -176,7 +176,7 @@ type File struct {
FileCategory string `json:"file_category,omitempty"` // "AUDIO", "VIDEO"
FileExtension string `json:"file_extension,omitempty"`
FolderType string `json:"folder_type,omitempty"`
Hash string `json:"hash,omitempty"` // sha1 but NOT a valid file hash. looks like a torrent hash
Hash string `json:"hash,omitempty"` // custom hash with a form of sha1sum
IconLink string `json:"icon_link,omitempty"`
ID string `json:"id,omitempty"`
Kind string `json:"kind,omitempty"` // "drive#file"
@ -486,7 +486,7 @@ type RequestNewFile struct {
ParentID string `json:"parent_id"`
FolderType string `json:"folder_type"`
// only when uploading a new file
Hash string `json:"hash,omitempty"` // sha1sum
Hash string `json:"hash,omitempty"` // gcid
Resumable map[string]string `json:"resumable,omitempty"` // {"provider": "PROVIDER_ALIYUN"}
Size int64 `json:"size,omitempty"`
UploadType string `json:"upload_type,omitempty"` // "UPLOAD_TYPE_FORM" or "UPLOAD_TYPE_RESUMABLE"

View File

@ -20,7 +20,7 @@ import (
// Globals
const (
cachePrefix = "rclone-pikpak-sha1sum-"
cachePrefix = "rclone-pikpak-gcid-"
)
// requestDecompress requests decompress of compressed files
@ -151,6 +151,9 @@ func (f *Fs) getFile(ctx context.Context, ID string) (info *api.File, err error)
}
return f.shouldRetry(ctx, resp, err)
})
if err == nil {
info.Name = f.opt.Enc.ToStandardName(info.Name)
}
return
}
@ -250,16 +253,11 @@ func (f *Fs) requestShare(ctx context.Context, req *api.RequestShare) (info *api
return
}
// Read the sha1 of in returning a reader which will read the same contents
// Read the gcid of in returning a reader which will read the same contents
//
// The cleanup function should be called when out is finished with
// regardless of whether this function returned an error or not.
func readSHA1(in io.Reader, size, threshold int64) (sha1sum string, out io.Reader, cleanup func(), err error) {
// we need an SHA1
hash := sha1.New()
// use the teeReader to write to the local file AND calculate the SHA1 while doing so
teeReader := io.TeeReader(in, hash)
func readGcid(in io.Reader, size, threshold int64) (gcid string, out io.Reader, cleanup func(), err error) {
// nothing to clean up by default
cleanup = func() {}
@ -282,8 +280,11 @@ func readSHA1(in io.Reader, size, threshold int64) (sha1sum string, out io.Reade
_ = os.Remove(tempFile.Name()) // delete the cache file after we are done - may be deleted already
}
// copy the ENTIRE file to disc and calculate the SHA1 in the process
if _, err = io.Copy(tempFile, teeReader); err != nil {
// use the teeReader to write to the local file AND calculate the gcid while doing so
teeReader := io.TeeReader(in, tempFile)
// copy the ENTIRE file to disk and calculate the gcid in the process
if gcid, err = calcGcid(teeReader, size); err != nil {
return
}
// jump to the start of the local file so we can pass it along
@ -294,15 +295,38 @@ func readSHA1(in io.Reader, size, threshold int64) (sha1sum string, out io.Reade
// replace the already read source with a reader of our cached file
out = tempFile
} else {
// that's a small file, just read it into memory
var inData []byte
inData, err = io.ReadAll(teeReader)
if err != nil {
buf := &bytes.Buffer{}
teeReader := io.TeeReader(in, buf)
if gcid, err = calcGcid(teeReader, size); err != nil {
return
}
// set the reader to our read memory block
out = bytes.NewReader(inData)
out = buf
}
return hex.EncodeToString(hash.Sum(nil)), out, cleanup, nil
return
}
func calcGcid(r io.Reader, size int64) (string, error) {
calcBlockSize := func(j int64) int64 {
var psize int64 = 0x40000
for float64(j)/float64(psize) > 0x200 && psize < 0x200000 {
psize = psize << 1
}
return psize
}
totalHash := sha1.New()
blockHash := sha1.New()
readSize := calcBlockSize(size)
for {
blockHash.Reset()
if n, err := io.CopyN(blockHash, r, readSize); err != nil && n == 0 {
if err != io.EOF {
return "", err
}
break
}
totalHash.Write(blockHash.Sum(nil))
}
return hex.EncodeToString(totalHash.Sum(nil)), nil
}

View File

@ -7,8 +7,6 @@ package pikpak
// md5sum is not always available, sometimes given empty.
// sha1sum used for upload differs from the one with official apps.
// Trashed files are not restored to the original location when using `batchUntrash`
// Can't stream without `--vfs-cache-mode=full`
@ -291,6 +289,7 @@ type Object struct {
modTime time.Time // modification time of the object
mimeType string // The object MIME type
parent string // ID of the parent directories
gcid string // custom hash of the object
md5sum string // md5sum of the object
link *api.Link // link to download the object
linkMu *sync.Mutex
@ -1224,7 +1223,7 @@ func (f *Fs) uploadByResumable(ctx context.Context, in io.Reader, name string, s
return
}
func (f *Fs) upload(ctx context.Context, in io.Reader, leaf, dirID, sha1Str string, size int64, options ...fs.OpenOption) (info *api.File, err error) {
func (f *Fs) upload(ctx context.Context, in io.Reader, leaf, dirID, gcid string, size int64, options ...fs.OpenOption) (info *api.File, err error) {
// determine upload type
uploadType := api.UploadTypeResumable
// if size >= 0 && size < int64(5*fs.Mebi) {
@ -1239,7 +1238,7 @@ func (f *Fs) upload(ctx context.Context, in io.Reader, leaf, dirID, sha1Str stri
ParentID: parentIDForRequest(dirID),
FolderType: "NORMAL",
Size: size,
Hash: strings.ToUpper(sha1Str),
Hash: strings.ToUpper(gcid),
UploadType: uploadType,
}
if uploadType == api.UploadTypeResumable {
@ -1503,6 +1502,7 @@ func (o *Object) setMetaData(info *api.File) (err error) {
} else {
o.parent = info.ParentID
}
o.gcid = info.Hash
o.md5sum = info.Md5Checksum
if info.Links.ApplicationOctetStream != nil {
o.link = info.Links.ApplicationOctetStream
@ -1576,9 +1576,6 @@ func (o *Object) Hash(ctx context.Context, t hash.Type) (string, error) {
if t != hash.MD5 {
return "", hash.ErrUnsupported
}
if o.md5sum == "" {
return "", nil
}
return strings.ToLower(o.md5sum), nil
}
@ -1702,25 +1699,23 @@ func (o *Object) upload(ctx context.Context, in io.Reader, src fs.ObjectInfo, wi
return err
}
// Calculate sha1sum; grabbed from package jottacloud
hashStr, err := src.Hash(ctx, hash.SHA1)
if err != nil || hashStr == "" {
// unwrap the accounting from the input, we use wrap to put it
// back on after the buffering
var wrap accounting.WrapFn
in, wrap = accounting.UnWrap(in)
var cleanup func()
hashStr, in, cleanup, err = readSHA1(in, size, int64(o.fs.opt.HashMemoryThreshold))
defer cleanup()
if err != nil {
return fmt.Errorf("failed to calculate SHA1: %w", err)
}
// Wrap the accounting back onto the stream
in = wrap(in)
// Calculate gcid; grabbed from package jottacloud
var gcid string
// unwrap the accounting from the input, we use wrap to put it
// back on after the buffering
var wrap accounting.WrapFn
in, wrap = accounting.UnWrap(in)
var cleanup func()
gcid, in, cleanup, err = readGcid(in, size, int64(o.fs.opt.HashMemoryThreshold))
defer cleanup()
if err != nil {
return fmt.Errorf("failed to calculate gcid: %w", err)
}
// Wrap the accounting back onto the stream
in = wrap(in)
if !withTemp {
info, err := o.fs.upload(ctx, in, leaf, dirID, hashStr, size, options...)
info, err := o.fs.upload(ctx, in, leaf, dirID, gcid, size, options...)
if err != nil {
return err
}
@ -1729,7 +1724,7 @@ func (o *Object) upload(ctx context.Context, in io.Reader, src fs.ObjectInfo, wi
// We have to fall back to upload + rename
tempName := "rcloneTemp" + random.String(8)
info, err := o.fs.upload(ctx, in, tempName, dirID, hashStr, size, options...)
info, err := o.fs.upload(ctx, in, tempName, dirID, gcid, size, options...)
if err != nil {
return err
}