123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520 |
- // Copyright 2018 The go-ethereum Authors
- // This file is part of the go-ethereum library.
- //
- // The go-ethereum library is free software: you can redistribute it and/or modify
- // it under the terms of the GNU Lesser General Public License as published by
- // the Free Software Foundation, either version 3 of the License, or
- // (at your option) any later version.
- //
- // The go-ethereum library is distributed in the hope that it will be useful,
- // but WITHOUT ANY WARRANTY; without even the implied warranty of
- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- // GNU Lesser General Public License for more details.
- //
- // You should have received a copy of the GNU Lesser General Public License
- // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
- // +build !js
- // Package leveldb implements the key-value database layer based on LevelDB.
- package leveldb
- import (
- "fmt"
- "strconv"
- "strings"
- "sync"
- "time"
- "github.com/ethereum/go-ethereum/common"
- "github.com/ethereum/go-ethereum/ethdb"
- "github.com/ethereum/go-ethereum/log"
- "github.com/ethereum/go-ethereum/metrics"
- "github.com/syndtr/goleveldb/leveldb"
- "github.com/syndtr/goleveldb/leveldb/errors"
- "github.com/syndtr/goleveldb/leveldb/filter"
- "github.com/syndtr/goleveldb/leveldb/opt"
- "github.com/syndtr/goleveldb/leveldb/util"
- )
- const (
- // degradationWarnInterval specifies how often warning should be printed if the
- // leveldb database cannot keep up with requested writes.
- degradationWarnInterval = time.Minute
- // minCache is the minimum amount of memory in megabytes to allocate to leveldb
- // read and write caching, split half and half.
- minCache = 16
- // minHandles is the minimum number of files handles to allocate to the open
- // database files.
- minHandles = 16
- // metricsGatheringInterval specifies the interval to retrieve leveldb database
- // compaction, io and pause stats to report to the user.
- metricsGatheringInterval = 3 * time.Second
- )
- // Database is a persistent key-value store. Apart from basic data storage
- // functionality it also supports batch writes and iterating over the keyspace in
- // binary-alphabetical order.
- type Database struct {
- fn string // filename for reporting
- db *leveldb.DB // LevelDB instance
- compTimeMeter metrics.Meter // Meter for measuring the total time spent in database compaction
- compReadMeter metrics.Meter // Meter for measuring the data read during compaction
- compWriteMeter metrics.Meter // Meter for measuring the data written during compaction
- writeDelayNMeter metrics.Meter // Meter for measuring the write delay number due to database compaction
- writeDelayMeter metrics.Meter // Meter for measuring the write delay duration due to database compaction
- diskSizeGauge metrics.Gauge // Gauge for tracking the size of all the levels in the database
- diskReadMeter metrics.Meter // Meter for measuring the effective amount of data read
- diskWriteMeter metrics.Meter // Meter for measuring the effective amount of data written
- memCompGauge metrics.Gauge // Gauge for tracking the number of memory compaction
- level0CompGauge metrics.Gauge // Gauge for tracking the number of table compaction in level0
- nonlevel0CompGauge metrics.Gauge // Gauge for tracking the number of table compaction in non0 level
- seekCompGauge metrics.Gauge // Gauge for tracking the number of table compaction caused by read opt
- quitLock sync.Mutex // Mutex protecting the quit channel access
- quitChan chan chan error // Quit channel to stop the metrics collection before closing the database
- log log.Logger // Contextual logger tracking the database path
- }
- // New returns a wrapped LevelDB object. The namespace is the prefix that the
- // metrics reporting should use for surfacing internal stats.
- func New(file string, cache int, handles int, namespace string, readonly bool) (*Database, error) {
- return NewCustom(file, namespace, func(options *opt.Options) {
- // Ensure we have some minimal caching and file guarantees
- if cache < minCache {
- cache = minCache
- }
- if handles < minHandles {
- handles = minHandles
- }
- // Set default options
- options.OpenFilesCacheCapacity = handles
- options.BlockCacheCapacity = cache / 2 * opt.MiB
- options.WriteBuffer = cache / 4 * opt.MiB // Two of these are used internally
- if readonly {
- options.ReadOnly = true
- }
- })
- }
- // NewCustom returns a wrapped LevelDB object. The namespace is the prefix that the
- // metrics reporting should use for surfacing internal stats.
- // The customize function allows the caller to modify the leveldb options.
- func NewCustom(file string, namespace string, customize func(options *opt.Options)) (*Database, error) {
- options := configureOptions(customize)
- logger := log.New("database", file)
- usedCache := options.GetBlockCacheCapacity() + options.GetWriteBuffer()*2
- logCtx := []interface{}{"cache", common.StorageSize(usedCache), "handles", options.GetOpenFilesCacheCapacity()}
- if options.ReadOnly {
- logCtx = append(logCtx, "readonly", "true")
- }
- logger.Info("Allocated cache and file handles", logCtx...)
- // Open the db and recover any potential corruptions
- db, err := leveldb.OpenFile(file, options)
- if _, corrupted := err.(*errors.ErrCorrupted); corrupted {
- db, err = leveldb.RecoverFile(file, nil)
- }
- if err != nil {
- return nil, err
- }
- // Assemble the wrapper with all the registered metrics
- ldb := &Database{
- fn: file,
- db: db,
- log: logger,
- quitChan: make(chan chan error),
- }
- ldb.compTimeMeter = metrics.NewRegisteredMeter(namespace+"compact/time", nil)
- ldb.compReadMeter = metrics.NewRegisteredMeter(namespace+"compact/input", nil)
- ldb.compWriteMeter = metrics.NewRegisteredMeter(namespace+"compact/output", nil)
- ldb.diskSizeGauge = metrics.NewRegisteredGauge(namespace+"disk/size", nil)
- ldb.diskReadMeter = metrics.NewRegisteredMeter(namespace+"disk/read", nil)
- ldb.diskWriteMeter = metrics.NewRegisteredMeter(namespace+"disk/write", nil)
- ldb.writeDelayMeter = metrics.NewRegisteredMeter(namespace+"compact/writedelay/duration", nil)
- ldb.writeDelayNMeter = metrics.NewRegisteredMeter(namespace+"compact/writedelay/counter", nil)
- ldb.memCompGauge = metrics.NewRegisteredGauge(namespace+"compact/memory", nil)
- ldb.level0CompGauge = metrics.NewRegisteredGauge(namespace+"compact/level0", nil)
- ldb.nonlevel0CompGauge = metrics.NewRegisteredGauge(namespace+"compact/nonlevel0", nil)
- ldb.seekCompGauge = metrics.NewRegisteredGauge(namespace+"compact/seek", nil)
- // Start up the metrics gathering and return
- go ldb.meter(metricsGatheringInterval)
- return ldb, nil
- }
- // configureOptions sets some default options, then runs the provided setter.
- func configureOptions(customizeFn func(*opt.Options)) *opt.Options {
- // Set default options
- options := &opt.Options{
- Filter: filter.NewBloomFilter(10),
- DisableSeeksCompaction: true,
- }
- // Allow caller to make custom modifications to the options
- if customizeFn != nil {
- customizeFn(options)
- }
- return options
- }
- // Close stops the metrics collection, flushes any pending data to disk and closes
- // all io accesses to the underlying key-value store.
- func (db *Database) Close() error {
- db.quitLock.Lock()
- defer db.quitLock.Unlock()
- if db.quitChan != nil {
- errc := make(chan error)
- db.quitChan <- errc
- if err := <-errc; err != nil {
- db.log.Error("Metrics collection failed", "err", err)
- }
- db.quitChan = nil
- }
- return db.db.Close()
- }
- // Has retrieves if a key is present in the key-value store.
- func (db *Database) Has(key []byte) (bool, error) {
- return db.db.Has(key, nil)
- }
- // Get retrieves the given key if it's present in the key-value store.
- func (db *Database) Get(key []byte) ([]byte, error) {
- dat, err := db.db.Get(key, nil)
- if err != nil {
- return nil, err
- }
- return dat, nil
- }
- // Put inserts the given value into the key-value store.
- func (db *Database) Put(key []byte, value []byte) error {
- return db.db.Put(key, value, nil)
- }
- // Delete removes the key from the key-value store.
- func (db *Database) Delete(key []byte) error {
- return db.db.Delete(key, nil)
- }
- // NewBatch creates a write-only key-value store that buffers changes to its host
- // database until a final write is called.
- func (db *Database) NewBatch() ethdb.Batch {
- return &batch{
- db: db.db,
- b: new(leveldb.Batch),
- }
- }
- // NewIterator creates a binary-alphabetical iterator over a subset
- // of database content with a particular key prefix, starting at a particular
- // initial key (or after, if it does not exist).
- func (db *Database) NewIterator(prefix []byte, start []byte) ethdb.Iterator {
- return db.db.NewIterator(bytesPrefixRange(prefix, start), nil)
- }
- // Stat returns a particular internal stat of the database.
- func (db *Database) Stat(property string) (string, error) {
- return db.db.GetProperty(property)
- }
- // Compact flattens the underlying data store for the given key range. In essence,
- // deleted and overwritten versions are discarded, and the data is rearranged to
- // reduce the cost of operations needed to access them.
- //
- // A nil start is treated as a key before all keys in the data store; a nil limit
- // is treated as a key after all keys in the data store. If both is nil then it
- // will compact entire data store.
- func (db *Database) Compact(start []byte, limit []byte) error {
- return db.db.CompactRange(util.Range{Start: start, Limit: limit})
- }
- // Path returns the path to the database directory.
- func (db *Database) Path() string {
- return db.fn
- }
- // meter periodically retrieves internal leveldb counters and reports them to
- // the metrics subsystem.
- //
- // This is how a LevelDB stats table looks like (currently):
- // Compactions
- // Level | Tables | Size(MB) | Time(sec) | Read(MB) | Write(MB)
- // -------+------------+---------------+---------------+---------------+---------------
- // 0 | 0 | 0.00000 | 1.27969 | 0.00000 | 12.31098
- // 1 | 85 | 109.27913 | 28.09293 | 213.92493 | 214.26294
- // 2 | 523 | 1000.37159 | 7.26059 | 66.86342 | 66.77884
- // 3 | 570 | 1113.18458 | 0.00000 | 0.00000 | 0.00000
- //
- // This is how the write delay look like (currently):
- // DelayN:5 Delay:406.604657ms Paused: false
- //
- // This is how the iostats look like (currently):
- // Read(MB):3895.04860 Write(MB):3654.64712
- func (db *Database) meter(refresh time.Duration) {
- // Create the counters to store current and previous compaction values
- compactions := make([][]float64, 2)
- for i := 0; i < 2; i++ {
- compactions[i] = make([]float64, 4)
- }
- // Create storage for iostats.
- var iostats [2]float64
- // Create storage and warning log tracer for write delay.
- var (
- delaystats [2]int64
- lastWritePaused time.Time
- )
- var (
- errc chan error
- merr error
- )
- timer := time.NewTimer(refresh)
- defer timer.Stop()
- // Iterate ad infinitum and collect the stats
- for i := 1; errc == nil && merr == nil; i++ {
- // Retrieve the database stats
- stats, err := db.db.GetProperty("leveldb.stats")
- if err != nil {
- db.log.Error("Failed to read database stats", "err", err)
- merr = err
- continue
- }
- // Find the compaction table, skip the header
- lines := strings.Split(stats, "\n")
- for len(lines) > 0 && strings.TrimSpace(lines[0]) != "Compactions" {
- lines = lines[1:]
- }
- if len(lines) <= 3 {
- db.log.Error("Compaction leveldbTable not found")
- merr = errors.New("compaction leveldbTable not found")
- continue
- }
- lines = lines[3:]
- // Iterate over all the leveldbTable rows, and accumulate the entries
- for j := 0; j < len(compactions[i%2]); j++ {
- compactions[i%2][j] = 0
- }
- for _, line := range lines {
- parts := strings.Split(line, "|")
- if len(parts) != 6 {
- break
- }
- for idx, counter := range parts[2:] {
- value, err := strconv.ParseFloat(strings.TrimSpace(counter), 64)
- if err != nil {
- db.log.Error("Compaction entry parsing failed", "err", err)
- merr = err
- continue
- }
- compactions[i%2][idx] += value
- }
- }
- // Update all the requested meters
- if db.diskSizeGauge != nil {
- db.diskSizeGauge.Update(int64(compactions[i%2][0] * 1024 * 1024))
- }
- if db.compTimeMeter != nil {
- db.compTimeMeter.Mark(int64((compactions[i%2][1] - compactions[(i-1)%2][1]) * 1000 * 1000 * 1000))
- }
- if db.compReadMeter != nil {
- db.compReadMeter.Mark(int64((compactions[i%2][2] - compactions[(i-1)%2][2]) * 1024 * 1024))
- }
- if db.compWriteMeter != nil {
- db.compWriteMeter.Mark(int64((compactions[i%2][3] - compactions[(i-1)%2][3]) * 1024 * 1024))
- }
- // Retrieve the write delay statistic
- writedelay, err := db.db.GetProperty("leveldb.writedelay")
- if err != nil {
- db.log.Error("Failed to read database write delay statistic", "err", err)
- merr = err
- continue
- }
- var (
- delayN int64
- delayDuration string
- duration time.Duration
- paused bool
- )
- if n, err := fmt.Sscanf(writedelay, "DelayN:%d Delay:%s Paused:%t", &delayN, &delayDuration, &paused); n != 3 || err != nil {
- db.log.Error("Write delay statistic not found")
- merr = err
- continue
- }
- duration, err = time.ParseDuration(delayDuration)
- if err != nil {
- db.log.Error("Failed to parse delay duration", "err", err)
- merr = err
- continue
- }
- if db.writeDelayNMeter != nil {
- db.writeDelayNMeter.Mark(delayN - delaystats[0])
- }
- if db.writeDelayMeter != nil {
- db.writeDelayMeter.Mark(duration.Nanoseconds() - delaystats[1])
- }
- // If a warning that db is performing compaction has been displayed, any subsequent
- // warnings will be withheld for one minute not to overwhelm the user.
- if paused && delayN-delaystats[0] == 0 && duration.Nanoseconds()-delaystats[1] == 0 &&
- time.Now().After(lastWritePaused.Add(degradationWarnInterval)) {
- db.log.Warn("Database compacting, degraded performance")
- lastWritePaused = time.Now()
- }
- delaystats[0], delaystats[1] = delayN, duration.Nanoseconds()
- // Retrieve the database iostats.
- ioStats, err := db.db.GetProperty("leveldb.iostats")
- if err != nil {
- db.log.Error("Failed to read database iostats", "err", err)
- merr = err
- continue
- }
- var nRead, nWrite float64
- parts := strings.Split(ioStats, " ")
- if len(parts) < 2 {
- db.log.Error("Bad syntax of ioStats", "ioStats", ioStats)
- merr = fmt.Errorf("bad syntax of ioStats %s", ioStats)
- continue
- }
- if n, err := fmt.Sscanf(parts[0], "Read(MB):%f", &nRead); n != 1 || err != nil {
- db.log.Error("Bad syntax of read entry", "entry", parts[0])
- merr = err
- continue
- }
- if n, err := fmt.Sscanf(parts[1], "Write(MB):%f", &nWrite); n != 1 || err != nil {
- db.log.Error("Bad syntax of write entry", "entry", parts[1])
- merr = err
- continue
- }
- if db.diskReadMeter != nil {
- db.diskReadMeter.Mark(int64((nRead - iostats[0]) * 1024 * 1024))
- }
- if db.diskWriteMeter != nil {
- db.diskWriteMeter.Mark(int64((nWrite - iostats[1]) * 1024 * 1024))
- }
- iostats[0], iostats[1] = nRead, nWrite
- compCount, err := db.db.GetProperty("leveldb.compcount")
- if err != nil {
- db.log.Error("Failed to read database iostats", "err", err)
- merr = err
- continue
- }
- var (
- memComp uint32
- level0Comp uint32
- nonLevel0Comp uint32
- seekComp uint32
- )
- if n, err := fmt.Sscanf(compCount, "MemComp:%d Level0Comp:%d NonLevel0Comp:%d SeekComp:%d", &memComp, &level0Comp, &nonLevel0Comp, &seekComp); n != 4 || err != nil {
- db.log.Error("Compaction count statistic not found")
- merr = err
- continue
- }
- db.memCompGauge.Update(int64(memComp))
- db.level0CompGauge.Update(int64(level0Comp))
- db.nonlevel0CompGauge.Update(int64(nonLevel0Comp))
- db.seekCompGauge.Update(int64(seekComp))
- // Sleep a bit, then repeat the stats collection
- select {
- case errc = <-db.quitChan:
- // Quit requesting, stop hammering the database
- case <-timer.C:
- timer.Reset(refresh)
- // Timeout, gather a new set of stats
- }
- }
- if errc == nil {
- errc = <-db.quitChan
- }
- errc <- merr
- }
- // batch is a write-only leveldb batch that commits changes to its host database
- // when Write is called. A batch cannot be used concurrently.
- type batch struct {
- db *leveldb.DB
- b *leveldb.Batch
- size int
- }
- // Put inserts the given value into the batch for later committing.
- func (b *batch) Put(key, value []byte) error {
- b.b.Put(key, value)
- b.size += len(value)
- return nil
- }
- // Delete inserts the a key removal into the batch for later committing.
- func (b *batch) Delete(key []byte) error {
- b.b.Delete(key)
- b.size += len(key)
- return nil
- }
- // ValueSize retrieves the amount of data queued up for writing.
- func (b *batch) ValueSize() int {
- return b.size
- }
- // Write flushes any accumulated data to disk.
- func (b *batch) Write() error {
- return b.db.Write(b.b, nil)
- }
- // Reset resets the batch for reuse.
- func (b *batch) Reset() {
- b.b.Reset()
- b.size = 0
- }
- // Replay replays the batch contents.
- func (b *batch) Replay(w ethdb.KeyValueWriter) error {
- return b.b.Replay(&replayer{writer: w})
- }
- // replayer is a small wrapper to implement the correct replay methods.
- type replayer struct {
- writer ethdb.KeyValueWriter
- failure error
- }
- // Put inserts the given value into the key-value data store.
- func (r *replayer) Put(key, value []byte) {
- // If the replay already failed, stop executing ops
- if r.failure != nil {
- return
- }
- r.failure = r.writer.Put(key, value)
- }
- // Delete removes the key from the key-value data store.
- func (r *replayer) Delete(key []byte) {
- // If the replay already failed, stop executing ops
- if r.failure != nil {
- return
- }
- r.failure = r.writer.Delete(key)
- }
- // bytesPrefixRange returns key range that satisfy
- // - the given prefix, and
- // - the given seek position
- func bytesPrefixRange(prefix, start []byte) *util.Range {
- r := util.BytesPrefix(prefix)
- r.Start = append(r.Start, start...)
- return r
- }
|