From 9405eaefdbd8ee94dac9bd12e324e81afaa474f5 Mon Sep 17 00:00:00 2001 From: chrislu Date: Mon, 7 Feb 2022 03:46:28 -0800 Subject: [PATCH] filer.sync: fix replicating partially updated file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Run two servers with volumes and fillers: server -dir=Server1alpha -master.port=11000 -filer -filer.port=11001 -volume.port=11002 server -dir=Server1sigma -master.port=11006 -filer -filer.port=11007 -volume.port=11008 Run Active-Passive filler.sync: filer.sync -a localhost:11007 -b localhost:11001 -isActivePassive Upload file to 11007 port: curl -F file=@/Desktop/9.xml "http://localhost:11007/testFacebook/" If we request a file on two servers now, everything will be correct, even if we add data to the file and upload it again: curl "http://localhost:11007/testFacebook/9.xml" EQUALS curl "http://localhost:11001/testFacebook/9.xml" However, if we change the already existing data in the file (for example, we change the first line in the file, reducing its length), then this file on the second server will not be valid and will not be equivalent to the first file Снимок экрана 2022-02-07 в 14 21 11 This problem occurs on line 202 in the filer_sink.go file. In particular, this is due to incorrect mapping of chunk names in the DoMinusChunks function. The names of deletedChunks do not match the chunks of existingEntry.Chunks, since the first chunks come from another server and have a different addressing (name) compared to the addressing on the server where the file is being overwritten. Deleted chunks are not actually deleted on the server to which the file is replicated. --- weed/filer/filechunks.go | 15 +++++++++++++++ weed/replication/sink/filersink/filer_sink.go | 2 +- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/weed/filer/filechunks.go b/weed/filer/filechunks.go index be18d45ac..d18d06f2c 100644 --- a/weed/filer/filechunks.go +++ b/weed/filer/filechunks.go @@ -101,6 +101,21 @@ func DoMinusChunks(as, bs []*filer_pb.FileChunk) (delta []*filer_pb.FileChunk) { return } +func DoMinusChunksBySourceFileId(as, bs []*filer_pb.FileChunk) (delta []*filer_pb.FileChunk) { + + fileIds := make(map[string]bool) + for _, interval := range bs { + fileIds[interval.GetFileIdString()] = true + } + for _, chunk := range as { + if _, found := fileIds[chunk.GetSourceFileId()]; !found { + delta = append(delta, chunk) + } + } + + return +} + type ChunkView struct { FileId string Offset int64 diff --git a/weed/replication/sink/filersink/filer_sink.go b/weed/replication/sink/filersink/filer_sink.go index c48ab2368..345c7f13b 100644 --- a/weed/replication/sink/filersink/filer_sink.go +++ b/weed/replication/sink/filersink/filer_sink.go @@ -199,7 +199,7 @@ func (fs *FilerSink) UpdateEntry(key string, oldEntry *filer_pb.Entry, newParent // delete the chunks that are deleted from the source if deleteIncludeChunks { // remove the deleted chunks. Actual data deletion happens in filer UpdateEntry FindUnusedFileChunks - existingEntry.Chunks = filer.DoMinusChunks(existingEntry.Chunks, deletedChunks) + existingEntry.Chunks = filer.DoMinusChunksBySourceFileId(existingEntry.Chunks, deletedChunks) } // replicate the chunks that are new in the source