gc.go 7.61 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
// Copyright 2018 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.

package localstore

import (
20
	"errors"
21
	"fmt"
22 23
	"time"

24
	"github.com/ethersphere/bee/pkg/shed"
25
	"github.com/syndtr/goleveldb/leveldb"
26 27 28 29 30 31 32 33 34 35 36 37 38
)

var (
	// gcTargetRatio defines the target number of items
	// in garbage collection index that will not be removed
	// on garbage collection. The target number of items
	// is calculated by gcTarget function. This value must be
	// in range (0,1]. For example, with 0.9 value,
	// garbage collection will leave 90% of defined capacity
	// in database after its run. This prevents frequent
	// garbage collection runs.
	gcTargetRatio = 0.9
	// gcBatchSize limits the number of chunks in a single
39
	// badger transaction on garbage collection.
40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57
	gcBatchSize uint64 = 200
)

// collectGarbageWorker is a long running function that waits for
// collectGarbageTrigger channel to signal a garbage collection
// run. GC run iterates on gcIndex and removes older items
// form retrieval and other indexes.
func (db *DB) collectGarbageWorker() {
	defer close(db.collectGarbageWorkerDone)

	for {
		select {
		case <-db.collectGarbageTrigger:
			// run a single collect garbage run and
			// if done is false, gcBatchSize is reached and
			// another collect garbage run is needed
			collectedCount, done, err := db.collectGarbage()
			if err != nil {
58
				db.logger.Errorf("localstore: collect garbage: %v", err)
59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
			}
			// check if another gc run is needed
			if !done {
				db.triggerGarbageCollection()
			}

			if testHookCollectGarbage != nil {
				testHookCollectGarbage(collectedCount)
			}
		case <-db.close:
			return
		}
	}
}

// collectGarbage removes chunks from retrieval and other
// indexes if maximal number of chunks in database is reached.
// This function returns the number of removed chunks. If done
// is false, another call to this function is needed to collect
// the rest of the garbage as the batch size limit is reached.
// This function is called in collectGarbageWorker.
func (db *DB) collectGarbage() (collectedCount uint64, done bool, err error) {
81 82
	db.metrics.GCCounter.Inc()
	defer totalTimeMetric(db.metrics.TotalTimeCollectGarbage, time.Now())
83 84
	defer func() {
		if err != nil {
85
			db.metrics.GCErrorCounter.Inc()
86 87 88
		}
	}()

89
	batch := new(leveldb.Batch)
90 91 92 93 94 95 96 97 98 99
	target := db.gcTarget()

	// protect database from changing idexes and gcSize
	db.batchMu.Lock()
	defer db.batchMu.Unlock()

	// run through the recently pinned chunks and
	// remove them from the gcIndex before iterating through gcIndex
	err = db.removeChunksInExcludeIndexFromGC()
	if err != nil {
100
		return 0, true, fmt.Errorf("remove chunks in exclude index: %v", err)
101 102 103 104 105 106
	}

	gcSize, err := db.gcSize.Get()
	if err != nil {
		return 0, true, err
	}
107
	db.metrics.GCSize.Inc()
108 109 110 111 112 113 114

	done = true
	err = db.gcIndex.Iterate(func(item shed.Item) (stop bool, err error) {
		if gcSize-collectedCount <= target {
			return true, nil
		}

115 116
		db.metrics.GCStoreTimeStamps.Set(float64(item.StoreTimestamp))
		db.metrics.GCStoreAccessTimeStamps.Set(float64(item.AccessTimestamp))
117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146

		// delete from retrieve, pull, gc
		err = db.retrievalDataIndex.DeleteInBatch(batch, item)
		if err != nil {
			return true, nil
		}
		err = db.retrievalAccessIndex.DeleteInBatch(batch, item)
		if err != nil {
			return true, nil
		}
		err = db.pullIndex.DeleteInBatch(batch, item)
		if err != nil {
			return true, nil
		}
		err = db.gcIndex.DeleteInBatch(batch, item)
		if err != nil {
			return true, nil
		}
		collectedCount++
		if collectedCount >= gcBatchSize {
			// bach size limit reached,
			// another gc run is needed
			done = false
			return true, nil
		}
		return false, nil
	}, nil)
	if err != nil {
		return 0, false, err
	}
147
	db.metrics.GCCollectedCounter.Inc()
148

149
	db.gcSize.PutInBatch(batch, gcSize-collectedCount)
150 151
	err = db.shed.WriteBatch(batch)
	if err != nil {
152
		db.metrics.GCExcludeWriteBatchError.Inc()
153 154 155 156 157 158 159
		return 0, false, err
	}
	return collectedCount, done, nil
}

// removeChunksInExcludeIndexFromGC removed any recently chunks in the exclude Index, from the gcIndex.
func (db *DB) removeChunksInExcludeIndexFromGC() (err error) {
160 161
	db.metrics.GCExcludeCounter.Inc()
	defer totalTimeMetric(db.metrics.TotalTimeGCExclude, time.Now())
162 163
	defer func() {
		if err != nil {
164
			db.metrics.GCExcludeError.Inc()
165 166 167
		}
	}()

168
	batch := new(leveldb.Batch)
169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217
	excludedCount := 0
	var gcSizeChange int64
	err = db.gcExcludeIndex.Iterate(func(item shed.Item) (stop bool, err error) {
		// Get access timestamp
		retrievalAccessIndexItem, err := db.retrievalAccessIndex.Get(item)
		if err != nil {
			return false, err
		}
		item.AccessTimestamp = retrievalAccessIndexItem.AccessTimestamp

		// Get the binId
		retrievalDataIndexItem, err := db.retrievalDataIndex.Get(item)
		if err != nil {
			return false, err
		}
		item.BinID = retrievalDataIndexItem.BinID

		// Check if this item is in gcIndex and remove it
		ok, err := db.gcIndex.Has(item)
		if err != nil {
			return false, nil
		}
		if ok {
			err = db.gcIndex.DeleteInBatch(batch, item)
			if err != nil {
				return false, nil
			}
			if _, err := db.gcIndex.Get(item); err == nil {
				gcSizeChange--
			}
			excludedCount++
			err = db.gcExcludeIndex.DeleteInBatch(batch, item)
			if err != nil {
				return false, nil
			}
		}

		return false, nil
	}, nil)
	if err != nil {
		return err
	}

	// update the gc size based on the no of entries deleted in gcIndex
	err = db.incGCSizeInBatch(batch, gcSizeChange)
	if err != nil {
		return err
	}

218
	db.metrics.GCExcludeCounter.Inc()
219 220
	err = db.shed.WriteBatch(batch)
	if err != nil {
221
		db.metrics.GCExcludeWriteBatchError.Inc()
222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246
		return err
	}

	return nil
}

// gcTrigger retruns the absolute value for garbage collection
// target value, calculated from db.capacity and gcTargetRatio.
func (db *DB) gcTarget() (target uint64) {
	return uint64(float64(db.capacity) * gcTargetRatio)
}

// triggerGarbageCollection signals collectGarbageWorker
// to call collectGarbage.
func (db *DB) triggerGarbageCollection() {
	select {
	case db.collectGarbageTrigger <- struct{}{}:
	case <-db.close:
	default:
	}
}

// incGCSizeInBatch changes gcSize field value
// by change which can be negative. This function
// must be called under batchMu lock.
247
func (db *DB) incGCSizeInBatch(batch *leveldb.Batch, change int64) (err error) {
248 249 250 251
	if change == 0 {
		return nil
	}
	gcSize, err := db.gcSize.Get()
252
	if err != nil && !errors.Is(err, leveldb.ErrNotFound) {
253 254 255
		return err
	}

256
	var newSize uint64
257
	if change > 0 {
258
		newSize = gcSize + uint64(change)
259 260 261 262 263 264 265 266
	} else {
		// 'change' is an int64 and is negative
		// a conversion is needed with correct sign
		c := uint64(-change)
		if c > gcSize {
			// protect uint64 undeflow
			return nil
		}
267
		newSize = gcSize - c
268
	}
269
	db.gcSize.PutInBatch(batch, newSize)
270 271

	// trigger garbage collection if we reached the capacity
272
	if newSize >= db.capacity {
273 274 275 276 277 278 279 280 281
		db.triggerGarbageCollection()
	}
	return nil
}

// testHookCollectGarbage is a hook that can provide
// information when a garbage collection run is done
// and how many items it removed.
var testHookCollectGarbage func(collectedCount uint64)