1
0
Fork 0
mirror of https://github.com/chrislusf/seaweedfs synced 2024-07-03 07:36:45 +02:00

Fix hanging reads in chunk cacher (#3473)

Sometimes when an unexpected error occurs the cacher would set an
error and return. However, it would not broadcast the condition
signal in that case, therefore leaving the goroutine that runs
readChunkAt stuck forever.
I figured that the condition is unnecessary because readChunkAt is
acquiring a lock that is still held by the cacher goroutine anyway.
Callees of startCaching have to wait for a WaitGroup which makes sure
that readChunkAt can't acquire the lock before startCaching.
This way readChunkAt can execute normally and check for the error.
This commit is contained in:
Patrick Schmidt 2022-08-21 20:54:02 +02:00 committed by GitHub
parent 388f82f322
commit f49a9297c2
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -2,11 +2,12 @@ package filer
import ( import (
"fmt" "fmt"
"sync"
"time"
"github.com/seaweedfs/seaweedfs/weed/util/chunk_cache" "github.com/seaweedfs/seaweedfs/weed/util/chunk_cache"
"github.com/seaweedfs/seaweedfs/weed/util/mem" "github.com/seaweedfs/seaweedfs/weed/util/mem"
"github.com/seaweedfs/seaweedfs/weed/wdclient" "github.com/seaweedfs/seaweedfs/weed/wdclient"
"sync"
"time"
) )
type ReaderCache struct { type ReaderCache struct {
@ -19,17 +20,17 @@ type ReaderCache struct {
type SingleChunkCacher struct { type SingleChunkCacher struct {
sync.Mutex sync.Mutex
cond *sync.Cond parent *ReaderCache
parent *ReaderCache chunkFileId string
chunkFileId string data []byte
data []byte err error
err error cipherKey []byte
cipherKey []byte isGzipped bool
isGzipped bool chunkSize int
chunkSize int shouldCache bool
shouldCache bool wg sync.WaitGroup
wg sync.WaitGroup cacheStartedCh chan struct{}
completedTime time.Time completedTime time.Time
} }
func newReaderCache(limit int, chunkCache chunk_cache.ChunkCache, lookupFileIdFn wdclient.LookupFileIdFunctionType) *ReaderCache { func newReaderCache(limit int, chunkCache chunk_cache.ChunkCache, lookupFileIdFn wdclient.LookupFileIdFunctionType) *ReaderCache {
@ -62,9 +63,8 @@ func (rc *ReaderCache) MaybeCache(chunkViews []*ChunkView) {
// glog.V(4).Infof("prefetch %s offset %d", chunkView.FileId, chunkView.LogicOffset) // glog.V(4).Infof("prefetch %s offset %d", chunkView.FileId, chunkView.LogicOffset)
// cache this chunk if not yet // cache this chunk if not yet
cacher := newSingleChunkCacher(rc, chunkView.FileId, chunkView.CipherKey, chunkView.IsGzipped, int(chunkView.ChunkSize), false) cacher := newSingleChunkCacher(rc, chunkView.FileId, chunkView.CipherKey, chunkView.IsGzipped, int(chunkView.ChunkSize), false)
cacher.wg.Add(1)
go cacher.startCaching() go cacher.startCaching()
cacher.wg.Wait() <-cacher.cacheStartedCh
rc.downloaders[chunkView.FileId] = cacher rc.downloaders[chunkView.FileId] = cacher
} }
@ -87,6 +87,7 @@ func (rc *ReaderCache) ReadChunkAt(buffer []byte, fileId string, cipherKey []byt
} }
} }
// clean up old downloaders
if len(rc.downloaders) >= rc.limit { if len(rc.downloaders) >= rc.limit {
oldestFid, oldestTime := "", time.Now() oldestFid, oldestTime := "", time.Now()
for fid, downloader := range rc.downloaders { for fid, downloader := range rc.downloaders {
@ -106,9 +107,8 @@ func (rc *ReaderCache) ReadChunkAt(buffer []byte, fileId string, cipherKey []byt
// glog.V(4).Infof("cache1 %s", fileId) // glog.V(4).Infof("cache1 %s", fileId)
cacher := newSingleChunkCacher(rc, fileId, cipherKey, isGzipped, chunkSize, shouldCache) cacher := newSingleChunkCacher(rc, fileId, cipherKey, isGzipped, chunkSize, shouldCache)
cacher.wg.Add(1)
go cacher.startCaching() go cacher.startCaching()
cacher.wg.Wait() <-cacher.cacheStartedCh
rc.downloaders[fileId] = cacher rc.downloaders[fileId] = cacher
return cacher.readChunkAt(buffer, offset) return cacher.readChunkAt(buffer, offset)
@ -135,23 +135,24 @@ func (rc *ReaderCache) destroy() {
} }
func newSingleChunkCacher(parent *ReaderCache, fileId string, cipherKey []byte, isGzipped bool, chunkSize int, shouldCache bool) *SingleChunkCacher { func newSingleChunkCacher(parent *ReaderCache, fileId string, cipherKey []byte, isGzipped bool, chunkSize int, shouldCache bool) *SingleChunkCacher {
t := &SingleChunkCacher{ return &SingleChunkCacher{
parent: parent, parent: parent,
chunkFileId: fileId, chunkFileId: fileId,
cipherKey: cipherKey, cipherKey: cipherKey,
isGzipped: isGzipped, isGzipped: isGzipped,
chunkSize: chunkSize, chunkSize: chunkSize,
shouldCache: shouldCache, shouldCache: shouldCache,
cacheStartedCh: make(chan struct{}),
} }
t.cond = sync.NewCond(t)
return t
} }
func (s *SingleChunkCacher) startCaching() { func (s *SingleChunkCacher) startCaching() {
s.wg.Add(1)
defer s.wg.Done()
s.Lock() s.Lock()
defer s.Unlock() defer s.Unlock()
s.wg.Done() // means this has been started s.cacheStartedCh <- struct{}{} // means this has been started
urlStrings, err := s.parent.lookupFileIdFn(s.chunkFileId) urlStrings, err := s.parent.lookupFileIdFn(s.chunkFileId)
if err != nil { if err != nil {
@ -168,16 +169,17 @@ func (s *SingleChunkCacher) startCaching() {
return return
} }
s.completedTime = time.Now()
if s.shouldCache { if s.shouldCache {
s.parent.chunkCache.SetChunk(s.chunkFileId, s.data) s.parent.chunkCache.SetChunk(s.chunkFileId, s.data)
} }
s.cond.Broadcast() s.completedTime = time.Now()
return return
} }
func (s *SingleChunkCacher) destroy() { func (s *SingleChunkCacher) destroy() {
// wait for all reads to finish before destroying the data
s.wg.Wait()
s.Lock() s.Lock()
defer s.Unlock() defer s.Unlock()
@ -185,16 +187,15 @@ func (s *SingleChunkCacher) destroy() {
mem.Free(s.data) mem.Free(s.data)
s.data = nil s.data = nil
} }
close(s.cacheStartedCh)
} }
func (s *SingleChunkCacher) readChunkAt(buf []byte, offset int64) (int, error) { func (s *SingleChunkCacher) readChunkAt(buf []byte, offset int64) (int, error) {
s.wg.Add(1)
defer s.wg.Done()
s.Lock() s.Lock()
defer s.Unlock() defer s.Unlock()
for s.completedTime.IsZero() {
s.cond.Wait()
}
if s.err != nil { if s.err != nil {
return 0, s.err return 0, s.err
} }