diff options
author | Roland Shoemaker <roland@golang.org> | 2021-11-01 10:03:36 -0700 |
---|---|---|
committer | Roland Shoemaker <roland@golang.org> | 2022-01-27 16:07:55 +0000 |
commit | b2dc66c64db933120c34d2223e670e8594543bd9 (patch) | |
tree | e7db8fd85eec9a074819b381f9db8c0549891b6b | |
parent | a991d9dc27bda23018e23488806c8f8d027e4f7b (diff) | |
download | go-b2dc66c64db933120c34d2223e670e8594543bd9.tar.gz go-b2dc66c64db933120c34d2223e670e8594543bd9.zip |
internal/fuzz: centralize corpus entry addition
Adds an addCorpusEntry method to coordinator which manages checking for
duplicate entries, writing entries to the cache directory, and adding
entries to the corpus. Also moves readCache to be a method on the
coordinator.
Fixes #50606
Change-Id: Id6721384a2ad1cfb4c5471cf0cd0a7510d250a6c
Reviewed-on: https://go-review.googlesource.com/c/go/+/360394
Trust: Katie Hockman <katie@golang.org>
Reviewed-by: Katie Hockman <katie@golang.org>
Trust: Roland Shoemaker <roland@golang.org>
Run-TryBot: Roland Shoemaker <roland@golang.org>
TryBot-Result: Gopher Robot <gobot@golang.org>
-rw-r--r-- | src/internal/fuzz/fuzz.go | 81 |
1 files changed, 44 insertions, 37 deletions
diff --git a/src/internal/fuzz/fuzz.go b/src/internal/fuzz/fuzz.go index 37b6d2b391..73f32dd4c7 100644 --- a/src/internal/fuzz/fuzz.go +++ b/src/internal/fuzz/fuzz.go @@ -316,32 +316,15 @@ func CoordinateFuzzing(ctx context.Context, opts CoordinateFuzzingOpts) (err err } else { // Update the coordinator's coverage mask and save the value. inputSize := len(result.entry.Data) - if opts.CacheDir != "" { - // It is possible that the input that was discovered is already - // present in the corpus, but the worker produced a coverage map - // that still expanded our total coverage (this may happen due to - // flakiness in the coverage counters). In order to prevent adding - // duplicate entries to the corpus (and re-writing the file on - // disk), skip it if the on disk file already exists. - // TODO(roland): this check is limited in that it will only be - // applied if we are using the CacheDir. Another option would be - // to iterate through the corpus and check if it is already present, - // which would catch cases where we are not caching entries. - // A slightly faster approach would be to keep some kind of map of - // entry hashes, which would allow us to avoid iterating through - // all entries. - _, err = os.Stat(result.entry.Path) - if err == nil { - continue - } - err := writeToCorpus(&result.entry, opts.CacheDir) - if err != nil { - stop(err) - } - result.entry.Data = nil + duplicate, err := c.addCorpusEntries(true, result.entry) + if err != nil { + stop(err) + break + } + if duplicate { + continue } c.updateCoverage(keepCoverage) - c.corpus.entries = append(c.corpus.entries, result.entry) c.inputQueue.enqueue(result.entry) c.interestingCount++ if shouldPrintDebugInfo() { @@ -433,6 +416,28 @@ func (e *crashError) CrashPath() string { type corpus struct { entries []CorpusEntry + hashes map[[sha256.Size]byte]bool +} + +func (c *coordinator) addCorpusEntries(addToCache bool, entries ...CorpusEntry) (bool, error) { + for _, e := range entries { + h := sha256.Sum256(e.Data) + if c.corpus.hashes[h] { + return true, nil + } + if addToCache { + if err := writeToCorpus(&e, c.opts.CacheDir); err != nil { + return false, err + } + // For entries written to disk, we don't hold onto the bytes, + // since the corpus would consume a significant amount of + // memory. + e.Data = nil + } + c.corpus.hashes[h] = true + c.corpus.entries = append(c.corpus.entries, e) + } + return false, nil } // CorpusEntry represents an individual input for fuzzing. @@ -640,18 +645,17 @@ func newCoordinator(opts CoordinateFuzzingOpts) (*coordinator, error) { opts.Seed[i].Data = marshalCorpusFile(opts.Seed[i].Values...) } } - corpus, err := readCache(opts.Seed, opts.Types, opts.CacheDir) - if err != nil { - return nil, err - } c := &coordinator{ opts: opts, startTime: time.Now(), inputC: make(chan fuzzInput), minimizeC: make(chan fuzzMinimizeInput), resultC: make(chan fuzzResult), - corpus: corpus, timeLastLog: time.Now(), + corpus: corpus{hashes: make(map[[sha256.Size]byte]bool)}, + } + if err := c.readCache(); err != nil { + return nil, err } if opts.MinimizeLimit > 0 || opts.MinimizeTimeout > 0 { for _, t := range opts.Types { @@ -691,7 +695,7 @@ func newCoordinator(opts CoordinateFuzzingOpts) (*coordinator, error) { data := marshalCorpusFile(vals...) h := sha256.Sum256(data) name := fmt.Sprintf("%x", h[:4]) - c.corpus.entries = append(c.corpus.entries, CorpusEntry{Path: name, Data: data}) + c.addCorpusEntries(false, CorpusEntry{Path: name, Data: data}) } return c, nil @@ -908,22 +912,25 @@ func (c *coordinator) elapsed() time.Duration { // // TODO(fuzzing): need a mechanism that can remove values that // aren't useful anymore, for example, because they have the wrong type. -func readCache(seed []CorpusEntry, types []reflect.Type, cacheDir string) (corpus, error) { - var c corpus - c.entries = append(c.entries, seed...) - entries, err := ReadCorpus(cacheDir, types) +func (c *coordinator) readCache() error { + if _, err := c.addCorpusEntries(false, c.opts.Seed...); err != nil { + return err + } + entries, err := ReadCorpus(c.opts.CacheDir, c.opts.Types) if err != nil { if _, ok := err.(*MalformedCorpusError); !ok { // It's okay if some files in the cache directory are malformed and // are not included in the corpus, but fail if it's an I/O error. - return corpus{}, err + return err } // TODO(jayconrod,katiehockman): consider printing some kind of warning // indicating the number of files which were skipped because they are // malformed. } - c.entries = append(c.entries, entries...) - return c, nil + if _, err := c.addCorpusEntries(false, entries...); err != nil { + return err + } + return nil } // MalformedCorpusError is an error found while reading the corpus from the |