View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.util;
19  
20  import java.util.concurrent.atomic.AtomicBoolean;
21  import java.util.concurrent.atomic.AtomicLong;
22  import java.util.concurrent.atomic.AtomicLongArray;
23  
24  import org.apache.hadoop.hbase.classification.InterfaceAudience;
25  import org.apache.hadoop.hbase.classification.InterfaceStability;
26  
27  /**
28   * FastLongHistogram is a thread-safe class that estimate distribution of data and computes the
29   * quantiles.
30   */
31  @InterfaceAudience.Public
32  @InterfaceStability.Evolving
33  public class FastLongHistogram {
34    /**
35     * Bins is a class containing a list of buckets(or bins) for estimation histogram of some data.
36     */
37    private static class Bins {
38      private final AtomicLongArray counts;
39      // inclusive
40      private final long binsMin;
41      // exclusive
42      private final long binsMax;
43      private final long bins10XMax;
44      private final AtomicLong min = new AtomicLong(Long.MAX_VALUE);
45      private final AtomicLong max = new AtomicLong(0L);
46      // set to true when any of data has been inserted to the Bins. It is set after the counts are
47      // updated.
48      private final AtomicBoolean hasData = new AtomicBoolean(false);
49      
50      /**
51       * The constructor for creating a Bins without any prior data.
52       */
53      public Bins() {
54        this.counts = new AtomicLongArray(4);
55        this.binsMin = 0L;
56        this.binsMax = Long.MAX_VALUE;
57        this.bins10XMax = Long.MAX_VALUE;
58      }
59      
60      /**
61       * The constructor for creating a Bins with last Bins.
62       * @param last the last Bins instance.
63       * @param quantiles the quantiles for creating the bins of the histogram.
64       */
65      public Bins(Bins last, int numOfBins, double minQ, double maxQ) {
66        long[] values = last.getQuantiles(new double[] { minQ, maxQ });
67        long wd = values[1] - values[0] + 1;
68        // expand minQ and maxQ in two ends back assuming uniform distribution
69        this.binsMin = Math.max(0L, (long) (values[0] - wd * minQ));
70        long binsMax = (long) (values[1] + wd * (1 - maxQ)) + 1;
71        // make sure each of bins is at least of width 1
72        this.binsMax = Math.max(binsMax, this.binsMin + numOfBins);
73        this.bins10XMax = Math.max((long) (values[1] + (binsMax - 1) * 9), this.binsMax + 1);
74  
75        this.counts = new AtomicLongArray(numOfBins + 3);
76      }
77  
78      /**
79       * Adds a value to the histogram.
80       */
81      public void add(long value, long count) {
82        AtomicUtils.updateMin(min, value);
83        AtomicUtils.updateMax(max, value);
84  
85        if (value < this.binsMin) {
86          this.counts.addAndGet(0, count);
87        } else if (value > this.bins10XMax) {
88          this.counts.addAndGet(this.counts.length() - 1, count);
89        } else if (value >= this.binsMax) {
90          this.counts.addAndGet(this.counts.length() - 2, count);
91        } else {
92          // compute the position
93          int pos =
94              1 + (int) ((value - this.binsMin) * (this.counts.length() - 3) / (this.binsMax - this.binsMin));
95          this.counts.addAndGet(pos, count);
96        }
97  
98        // hasData needs to be updated as last
99        this.hasData.set(true);
100     }
101     
102     /**
103      * Computes the quantiles give the ratios.
104      * @param smooth set to true to have a prior on the distribution. Used for recreating the bins.
105      */
106     public long[] getQuantiles(double[] quantiles) {
107       if (!this.hasData.get()) {
108         // No data yet.
109         return new long[quantiles.length];
110       }
111 
112       // Make a snapshot of lowerCounter, higherCounter and bins.counts to counts.
113       // This is not synchronized, but since the counter are accumulating, the result is a good
114       // estimation of a snapshot.
115       long[] counts = new long[this.counts.length()];
116       long total = 0L;
117       for (int i = 0; i < this.counts.length(); i++) {
118         counts[i] = this.counts.get(i);
119         total += counts[i];
120       }
121 
122       int rIndex = 0;
123       double qCount = total * quantiles[0];
124       long cum = 0L;
125       
126       long[] res = new long[quantiles.length];
127       countsLoop: for (int i = 0; i < counts.length; i++) {
128         // mn and mx define a value range
129         long mn, mx;
130         if (i == 0) {
131           mn = this.min.get();
132           mx = this.binsMin;
133         } else if (i == counts.length - 1) {
134           mn = this.bins10XMax;
135           mx = this.max.get();
136         } else if (i == counts.length - 2) {
137           mn = this.binsMax;
138           mx = this.bins10XMax;
139         } else {
140           mn = this.binsMin + (i - 1) * (this.binsMax - this.binsMin) / (this.counts.length() - 3);
141           mx = this.binsMin + i * (this.binsMax - this.binsMin) / (this.counts.length() - 3);
142         }
143 
144         if (mx < this.min.get()) {
145           continue;
146         }
147         if (mn > this.max.get()) {
148           break;
149         }
150         mn = Math.max(mn, this.min.get());
151         mx = Math.min(mx, this.max.get());
152 
153         // lastCum/cum are the corresponding counts to mn/mx
154         double lastCum = cum;
155         cum += counts[i];
156 
157         // fill the results for qCount is within current range.
158         while (qCount <= cum) {
159           if (cum == lastCum) {
160             res[rIndex] = mn;
161           } else {
162             res[rIndex] = (long) ((qCount - lastCum) * (mx - mn) / (cum - lastCum) + mn);
163           }
164 
165           // move to next quantile
166           rIndex++;
167           if (rIndex >= quantiles.length) {
168             break countsLoop;
169           }
170           qCount = total * quantiles[rIndex];
171         }
172       }
173       // In case quantiles contains values >= 100%
174       for (; rIndex < quantiles.length; rIndex++) {
175         res[rIndex] = this.max.get();
176       }
177 
178       return res;
179     }
180   }
181 
182   // The bins counting values. It is replaced with a new one in calling of reset().
183   private volatile Bins bins = new Bins();
184   // The quantiles for creating a Bins with last Bins.
185   private final int numOfBins;
186 
187   /**
188    * Constructor.
189    * @param numOfBins the number of bins for the histogram. A larger value results in more precise
190    *          results but with lower efficiency, and vice versus.
191    */
192   public FastLongHistogram(int numOfBins) {
193     this.numOfBins = numOfBins;
194   }
195 
196   /**
197    * Constructor setting the bins assuming a uniform distribution within a range.
198    * @param numOfBins the number of bins for the histogram. A larger value results in more precise
199    *          results but with lower efficiency, and vice versus.
200    * @param min lower bound of the region, inclusive.
201    * @param max higher bound of the region, inclusive.
202    */
203   public FastLongHistogram(int numOfBins, long min, long max) {
204     this(numOfBins);
205     Bins bins = new Bins();
206     bins.add(min, 1);
207     bins.add(max, 1);
208     this.bins = new Bins(bins, numOfBins, 0.01, 0.99);
209   }
210 
211   /**
212    * Adds a value to the histogram.
213    */
214   public void add(long value, long count) {
215     this.bins.add(value, count);
216   }
217 
218   /**
219    * Computes the quantiles give the ratios.
220    */
221   public long[] getQuantiles(double[] quantiles) {
222     return this.bins.getQuantiles(quantiles);
223   }
224 
225   /**
226    * Resets the histogram for new counting.
227    */
228   public void reset() {
229     if (this.bins.hasData.get()) {
230       this.bins = new Bins(this.bins, numOfBins, 0.01, 0.99);
231     }
232   }
233 }