/* * Copyright 2019 Dgraph Labs, Inc. and Contributors * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package badger import ( "fmt" "math" ) // PrintHistogram builds and displays the key-value size histogram. // When keyPrefix is set, only the keys that have prefix "keyPrefix" are // considered for creating the histogram func (db *DB) PrintHistogram(keyPrefix []byte) { if db == nil { fmt.Println("\nCannot build histogram: DB is nil.") return } histogram := db.buildHistogram(keyPrefix) fmt.Printf("Histogram of key sizes (in bytes)\n") histogram.keySizeHistogram.printHistogram() fmt.Printf("Histogram of value sizes (in bytes)\n") histogram.valueSizeHistogram.printHistogram() } // histogramData stores information about a histogram type histogramData struct { bins []int64 countPerBin []int64 totalCount int64 min int64 max int64 sum int64 } // sizeHistogram contains keySize histogram and valueSize histogram type sizeHistogram struct { keySizeHistogram, valueSizeHistogram histogramData } // newSizeHistogram returns a new instance of keyValueSizeHistogram with // properly initialized fields. func newSizeHistogram() *sizeHistogram { // TODO(ibrahim): find appropriate bin size. keyBins := createHistogramBins(1, 16) valueBins := createHistogramBins(1, 30) return &sizeHistogram{ keySizeHistogram: histogramData{ bins: keyBins, countPerBin: make([]int64, len(keyBins)+1), max: math.MinInt64, min: math.MaxInt64, sum: 0, }, valueSizeHistogram: histogramData{ bins: valueBins, countPerBin: make([]int64, len(valueBins)+1), max: math.MinInt64, min: math.MaxInt64, sum: 0, }, } } // createHistogramBins creates bins for an histogram. The bin sizes are powers // of two of the form [2^min_exponent, ..., 2^max_exponent]. func createHistogramBins(minExponent, maxExponent uint32) []int64 { var bins []int64 for i := minExponent; i <= maxExponent; i++ { bins = append(bins, int64(1)< histogram.max { histogram.max = value } if value < histogram.min { histogram.min = value } histogram.sum += value histogram.totalCount++ for index := 0; index <= len(histogram.bins); index++ { // Allocate value in the last buckets if we reached the end of the Bounds array. if index == len(histogram.bins) { histogram.countPerBin[index]++ break } // Check if the value should be added to the "index" bin if value < int64(histogram.bins[index]) { histogram.countPerBin[index]++ break } } } // buildHistogram builds the key-value size histogram. // When keyPrefix is set, only the keys that have prefix "keyPrefix" are // considered for creating the histogram func (db *DB) buildHistogram(keyPrefix []byte) *sizeHistogram { txn := db.NewTransaction(false) defer txn.Discard() itr := txn.NewIterator(DefaultIteratorOptions) defer itr.Close() badgerHistogram := newSizeHistogram() // Collect key and value sizes. for itr.Seek(keyPrefix); itr.ValidForPrefix(keyPrefix); itr.Next() { item := itr.Item() badgerHistogram.keySizeHistogram.Update(item.KeySize()) badgerHistogram.valueSizeHistogram.Update(item.ValueSize()) } return badgerHistogram } // printHistogram prints the histogram data in a human-readable format. func (histogram histogramData) printHistogram() { fmt.Printf("Total count: %d\n", histogram.totalCount) fmt.Printf("Min value: %d\n", histogram.min) fmt.Printf("Max value: %d\n", histogram.max) fmt.Printf("Mean: %.2f\n", float64(histogram.sum)/float64(histogram.totalCount)) fmt.Printf("%24s %9s\n", "Range", "Count") numBins := len(histogram.bins) for index, count := range histogram.countPerBin { if count == 0 { continue } // The last bin represents the bin that contains the range from // the last bin up to infinity so it's processed differently than the // other bins. if index == len(histogram.countPerBin)-1 { lowerBound := int(histogram.bins[numBins-1]) fmt.Printf("[%10d, %10s) %9d\n", lowerBound, "infinity", count) continue } upperBound := int(histogram.bins[index]) lowerBound := 0 if index > 0 { lowerBound = int(histogram.bins[index-1]) } fmt.Printf("[%10d, %10d) %9d\n", lowerBound, upperBound, count) } fmt.Println() }