From f7a0a0e62cad5d625cf1a138c5da3f361c29bdf1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ingard=20Mev=C3=A5g?= Date: Fri, 30 Aug 2019 11:26:50 +0200 Subject: [PATCH] new tool based on see_dat to remove duplicate fids. Duped fids could happen if a volume server was shut down before it could complete vacumming --- .../remove_duplicate_fids.go | 92 +++++++++++++++++++ 1 file changed, 92 insertions(+) create mode 100644 unmaintained/remove_duplicate_fids/remove_duplicate_fids.go diff --git a/unmaintained/remove_duplicate_fids/remove_duplicate_fids.go b/unmaintained/remove_duplicate_fids/remove_duplicate_fids.go new file mode 100644 index 000000000..5716ffa90 --- /dev/null +++ b/unmaintained/remove_duplicate_fids/remove_duplicate_fids.go @@ -0,0 +1,92 @@ +package main + +import ( + "flag" + + "github.com/chrislusf/seaweedfs/weed/glog" + "github.com/chrislusf/seaweedfs/weed/storage" + "github.com/chrislusf/seaweedfs/weed/storage/needle" + "os" + "path/filepath" + + "fmt" +) + +var ( + volumePath = flag.String("dir", "/tmp", "data directory to store files") + volumeCollection = flag.String("collection", "", "the volume collection name") + volumeId = flag.Int("volumeId", -1, "a volume id. The volume should already exist in the dir. The volume index file should not exist.") +) + +func Checksum(n* needle.Needle) string { + return fmt.Sprintf("%s%x", n.Id, n.Cookie) +} + +type VolumeFileScanner4SeeDat struct { + version needle.Version + block storage.SuperBlock + + dir string + hashes map[string]bool + dat * os.File +} + +func (scanner *VolumeFileScanner4SeeDat) VisitSuperBlock(superBlock storage.SuperBlock) error { + scanner.version = superBlock.Version() + scanner.block = superBlock + return nil + +} +func (scanner *VolumeFileScanner4SeeDat) ReadNeedleBody() bool { + return true +} + +func (scanner *VolumeFileScanner4SeeDat) VisitNeedle(n *needle.Needle, offset int64) error { + + if scanner.dat == nil { + newDatFile, err := os.Create(filepath.Join(*volumePath, "dat_fixed")) + if err != nil { + glog.Fatalf("Write New Volume Data %v", err) + } + scanner.dat = newDatFile + scanner.dat.Write(scanner.block.Bytes()) + } + + checksum := Checksum(n) + + if scanner.hashes[checksum] { + glog.V(0).Infof("duplicate checksum:%s fid:%d,%s%x @ offset:%d", checksum, *volumeId, n.Id, n.Cookie, offset) + return nil + } + scanner.hashes[checksum] = true + + _, s, _, e := n.Append(scanner.dat, scanner.version) + fmt.Printf("size %d error %v\n", s, e) + + return nil +} + +func main() { + flag.Parse() + + vid := needle.VolumeId(*volumeId) + + outpath, _ := filepath.Abs(filepath.Dir(os.Args[0])) + + scanner := &VolumeFileScanner4SeeDat{ + dir: filepath.Join(outpath, "out"), + hashes: map[string]bool{}, + } + + if _, err := os.Stat(scanner.dir); err != nil { + if err := os.MkdirAll(scanner.dir, os.ModePerm); err != nil { + glog.Fatalf("could not create output dir : %s", err) + } + } + + err := storage.ScanVolumeFile(*volumePath, *volumeCollection, vid, storage.NeedleMapInMemory, scanner) + if err != nil { + glog.Fatalf("Reading Volume File [ERROR] %s\n", err) + } + +}