001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.math4.legacy.stat.descriptive;
018
019import java.util.Arrays;
020
021import org.apache.commons.math4.legacy.exception.DimensionMismatchException;
022import org.apache.commons.math4.legacy.exception.MathIllegalStateException;
023import org.apache.commons.math4.legacy.exception.util.LocalizedFormats;
024import org.apache.commons.math4.legacy.linear.RealMatrix;
025import org.apache.commons.math4.legacy.stat.descriptive.moment.GeometricMean;
026import org.apache.commons.math4.legacy.stat.descriptive.moment.Mean;
027import org.apache.commons.math4.legacy.stat.descriptive.moment.VectorialCovariance;
028import org.apache.commons.math4.legacy.stat.descriptive.rank.Max;
029import org.apache.commons.math4.legacy.stat.descriptive.rank.Min;
030import org.apache.commons.math4.legacy.stat.descriptive.summary.Sum;
031import org.apache.commons.math4.legacy.stat.descriptive.summary.SumOfLogs;
032import org.apache.commons.math4.legacy.stat.descriptive.summary.SumOfSquares;
033import org.apache.commons.math4.core.jdkmath.JdkMath;
034import org.apache.commons.math4.legacy.core.MathArrays;
035import org.apache.commons.numbers.core.Precision;
036
037/**
038 * <p>Computes summary statistics for a stream of n-tuples added using the
039 * {@link #addValue(double[]) addValue} method. The data values are not stored
040 * in memory, so this class can be used to compute statistics for very large
041 * n-tuple streams.</p>
042 *
043 * <p>The {@link StorelessUnivariateStatistic} instances used to maintain
044 * summary state and compute statistics are configurable via setters.
045 * For example, the default implementation for the mean can be overridden by
046 * calling {@link #setMeanImpl(StorelessUnivariateStatistic[])}. Actual
047 * parameters to these methods must implement the
048 * {@link StorelessUnivariateStatistic} interface and configuration must be
049 * completed before <code>addValue</code> is called. No configuration is
050 * necessary to use the default, commons-math provided implementations.</p>
051 *
052 * <p>To compute statistics for a stream of n-tuples, construct a
053 * MultivariateStatistics instance with dimension n and then use
054 * {@link #addValue(double[])} to add n-tuples. The <code>getXxx</code>
055 * methods where Xxx is a statistic return an array of <code>double</code>
056 * values, where for <code>i = 0,...,n-1</code> the i<sup>th</sup> array element is the
057 * value of the given statistic for data range consisting of the i<sup>th</sup> element of
058 * each of the input n-tuples.  For example, if <code>addValue</code> is called
059 * with actual parameters {0, 1, 2}, then {3, 4, 5} and finally {6, 7, 8},
060 * <code>getSum</code> will return a three-element array with values
061 * {0+3+6, 1+4+7, 2+5+8}</p>
062 *
063 * <p>Note: This class is not thread-safe. Use
064 * {@link SynchronizedMultivariateSummaryStatistics} if concurrent access from multiple
065 * threads is required.</p>
066 *
067 * @since 1.2
068 */
069public class MultivariateSummaryStatistics
070    implements StatisticalMultivariateSummary {
071    /** Dimension of the data. */
072    private final int k;
073
074    /** Count of values that have been added. */
075    private long n;
076
077    /** Sum statistic implementation - can be reset by setter. */
078    private final StorelessUnivariateStatistic[] sumImpl;
079
080    /** Sum of squares statistic implementation - can be reset by setter. */
081    private final StorelessUnivariateStatistic[] sumSqImpl;
082
083    /** Minimum statistic implementation - can be reset by setter. */
084    private final StorelessUnivariateStatistic[] minImpl;
085
086    /** Maximum statistic implementation - can be reset by setter. */
087    private final StorelessUnivariateStatistic[] maxImpl;
088
089    /** Sum of log statistic implementation - can be reset by setter. */
090    private final StorelessUnivariateStatistic[] sumLogImpl;
091
092    /** Geometric mean statistic implementation - can be reset by setter. */
093    private final StorelessUnivariateStatistic[] geoMeanImpl;
094
095    /** Mean statistic implementation - can be reset by setter. */
096    private final StorelessUnivariateStatistic[] meanImpl;
097
098    /** Covariance statistic implementation - cannot be reset. */
099    private final VectorialCovariance covarianceImpl;
100
101    /**
102     * Construct a MultivariateSummaryStatistics instance.
103     * @param k dimension of the data
104     * @param isCovarianceBiasCorrected if true, the unbiased sample
105     * covariance is computed, otherwise the biased population covariance
106     * is computed
107     */
108    public MultivariateSummaryStatistics(int k, boolean isCovarianceBiasCorrected) {
109        this.k = k;
110
111        sumImpl     = new StorelessUnivariateStatistic[k];
112        sumSqImpl   = new StorelessUnivariateStatistic[k];
113        minImpl     = new StorelessUnivariateStatistic[k];
114        maxImpl     = new StorelessUnivariateStatistic[k];
115        sumLogImpl  = new StorelessUnivariateStatistic[k];
116        geoMeanImpl = new StorelessUnivariateStatistic[k];
117        meanImpl    = new StorelessUnivariateStatistic[k];
118
119        for (int i = 0; i < k; ++i) {
120            sumImpl[i]     = new Sum();
121            sumSqImpl[i]   = new SumOfSquares();
122            minImpl[i]     = new Min();
123            maxImpl[i]     = new Max();
124            sumLogImpl[i]  = new SumOfLogs();
125            geoMeanImpl[i] = new GeometricMean();
126            meanImpl[i]    = new Mean();
127        }
128
129        covarianceImpl =
130            new VectorialCovariance(k, isCovarianceBiasCorrected);
131    }
132
133    /**
134     * Add an n-tuple to the data.
135     *
136     * @param value  the n-tuple to add
137     * @throws DimensionMismatchException if the length of the array
138     * does not match the one used at construction
139     */
140    public void addValue(double[] value) throws DimensionMismatchException {
141        checkDimension(value.length);
142        for (int i = 0; i < k; ++i) {
143            double v = value[i];
144            sumImpl[i].increment(v);
145            sumSqImpl[i].increment(v);
146            minImpl[i].increment(v);
147            maxImpl[i].increment(v);
148            sumLogImpl[i].increment(v);
149            geoMeanImpl[i].increment(v);
150            meanImpl[i].increment(v);
151        }
152        covarianceImpl.increment(value);
153        n++;
154    }
155
156    /**
157     * Returns the dimension of the data.
158     * @return The dimension of the data
159     */
160    @Override
161    public int getDimension() {
162        return k;
163    }
164
165    /**
166     * Returns the number of available values.
167     * @return The number of available values
168     */
169    @Override
170    public long getN() {
171        return n;
172    }
173
174    /**
175     * Returns an array of the results of a statistic.
176     * @param stats univariate statistic array
177     * @return results array
178     */
179    private double[] getResults(StorelessUnivariateStatistic[] stats) {
180        double[] results = new double[stats.length];
181        for (int i = 0; i < results.length; ++i) {
182            results[i] = stats[i].getResult();
183        }
184        return results;
185    }
186
187    /**
188     * Returns an array whose i<sup>th</sup> entry is the sum of the.
189     * i<sup>th</sup> entries of the arrays that have been added using
190     * {@link #addValue(double[])}
191     *
192     * @return the array of component sums
193     */
194    @Override
195    public double[] getSum() {
196        return getResults(sumImpl);
197    }
198
199    /**
200     * Returns an array whose i<sup>th</sup> entry is the sum of squares of the.
201     * i<sup>th</sup> entries of the arrays that have been added using
202     * {@link #addValue(double[])}
203     *
204     * @return the array of component sums of squares
205     */
206    @Override
207    public double[] getSumSq() {
208        return getResults(sumSqImpl);
209    }
210
211    /**
212     * Returns an array whose i<sup>th</sup> entry is the sum of logs of the.
213     * i<sup>th</sup> entries of the arrays that have been added using
214     * {@link #addValue(double[])}
215     *
216     * @return the array of component log sums
217     */
218    @Override
219    public double[] getSumLog() {
220        return getResults(sumLogImpl);
221    }
222
223    /**
224     * Returns an array whose i<sup>th</sup> entry is the mean of the.
225     * i<sup>th</sup> entries of the arrays that have been added using
226     * {@link #addValue(double[])}
227     *
228     * @return the array of component means
229     */
230    @Override
231    public double[] getMean() {
232        return getResults(meanImpl);
233    }
234
235    /**
236     * Returns an array whose i<sup>th</sup> entry is the standard deviation of the.
237     * i<sup>th</sup> entries of the arrays that have been added using
238     * {@link #addValue(double[])}
239     *
240     * @return the array of component standard deviations
241     */
242    @Override
243    public double[] getStandardDeviation() {
244        double[] stdDev = new double[k];
245        if (getN() < 1) {
246            Arrays.fill(stdDev, Double.NaN);
247        } else if (getN() < 2) {
248            Arrays.fill(stdDev, 0.0);
249        } else {
250            RealMatrix matrix = covarianceImpl.getResult();
251            for (int i = 0; i < k; ++i) {
252                stdDev[i] = JdkMath.sqrt(matrix.getEntry(i, i));
253            }
254        }
255        return stdDev;
256    }
257
258    /**
259     * Returns the covariance matrix of the values that have been added.
260     *
261     * @return the covariance matrix
262     */
263    @Override
264    public RealMatrix getCovariance() {
265        return covarianceImpl.getResult();
266    }
267
268    /**
269     * Returns an array whose i<sup>th</sup> entry is the maximum of the.
270     * i<sup>th</sup> entries of the arrays that have been added using
271     * {@link #addValue(double[])}
272     *
273     * @return the array of component maxima
274     */
275    @Override
276    public double[] getMax() {
277        return getResults(maxImpl);
278    }
279
280    /**
281     * Returns an array whose i<sup>th</sup> entry is the minimum of the.
282     * i<sup>th</sup> entries of the arrays that have been added using
283     * {@link #addValue(double[])}
284     *
285     * @return the array of component minima
286     */
287    @Override
288    public double[] getMin() {
289        return getResults(minImpl);
290    }
291
292    /**
293     * Returns an array whose i<sup>th</sup> entry is the geometric mean of the.
294     * i<sup>th</sup> entries of the arrays that have been added using
295     * {@link #addValue(double[])}
296     *
297     * @return the array of component geometric means
298     */
299    @Override
300    public double[] getGeometricMean() {
301        return getResults(geoMeanImpl);
302    }
303
304    /**
305     * Generates a text report displaying
306     * summary statistics from values that
307     * have been added.
308     * @return String with line feeds displaying statistics
309     */
310    @Override
311    public String toString() {
312        final String separator = ", ";
313        final String suffix = System.getProperty("line.separator");
314        StringBuilder outBuffer = new StringBuilder();
315        outBuffer.append("MultivariateSummaryStatistics:").append(suffix);
316        outBuffer.append("n: ").append(getN()).append(suffix);
317        append(outBuffer, getMin(), "min: ", separator, suffix);
318        append(outBuffer, getMax(), "max: ", separator, suffix);
319        append(outBuffer, getMean(), "mean: ", separator, suffix);
320        append(outBuffer, getGeometricMean(), "geometric mean: ", separator, suffix);
321        append(outBuffer, getSumSq(), "sum of squares: ", separator, suffix);
322        append(outBuffer, getSumLog(), "sum of logarithms: ", separator, suffix);
323        append(outBuffer, getStandardDeviation(), "standard deviation: ", separator, suffix);
324        outBuffer.append("covariance: ").append(getCovariance()).append(suffix);
325        return outBuffer.toString();
326    }
327
328    /**
329     * Append a text representation of an array to a buffer.
330     * @param buffer buffer to fill
331     * @param data data array
332     * @param prefix text prefix
333     * @param separator elements separator
334     * @param suffix text suffix
335     */
336    private void append(StringBuilder buffer, double[] data,
337                        String prefix, String separator, String suffix) {
338        buffer.append(prefix);
339        for (int i = 0; i < data.length; ++i) {
340            if (i > 0) {
341                buffer.append(separator);
342            }
343            buffer.append(data[i]);
344        }
345        buffer.append(suffix);
346    }
347
348    /**
349     * Resets all statistics and storage.
350     */
351    public void clear() {
352        this.n = 0;
353        for (int i = 0; i < k; ++i) {
354            minImpl[i].clear();
355            maxImpl[i].clear();
356            sumImpl[i].clear();
357            sumLogImpl[i].clear();
358            sumSqImpl[i].clear();
359            geoMeanImpl[i].clear();
360            meanImpl[i].clear();
361        }
362        covarianceImpl.clear();
363    }
364
365    /**
366     * Returns true iff <code>object</code> is a <code>MultivariateSummaryStatistics</code>
367     * instance and all statistics have the same values as this.
368     * @param object the object to test equality against.
369     * @return true if object equals this
370     */
371    @Override
372    public boolean equals(Object object) {
373        if (object == this ) {
374            return true;
375        }
376        if (!(object instanceof MultivariateSummaryStatistics)) {
377            return false;
378        }
379        MultivariateSummaryStatistics stat = (MultivariateSummaryStatistics) object;
380        return MathArrays.equalsIncludingNaN(stat.getGeometricMean(), getGeometricMean()) &&
381               MathArrays.equalsIncludingNaN(stat.getMax(),           getMax())           &&
382               MathArrays.equalsIncludingNaN(stat.getMean(),          getMean())          &&
383               MathArrays.equalsIncludingNaN(stat.getMin(),           getMin())           &&
384               Precision.equalsIncludingNaN(stat.getN(),             getN())             &&
385               MathArrays.equalsIncludingNaN(stat.getSum(),           getSum())           &&
386               MathArrays.equalsIncludingNaN(stat.getSumSq(),         getSumSq())         &&
387               MathArrays.equalsIncludingNaN(stat.getSumLog(),        getSumLog())        &&
388               stat.getCovariance().equals( getCovariance());
389    }
390
391    /**
392     * Returns hash code based on values of statistics.
393     *
394     * @return hash code
395     */
396    @Override
397    public int hashCode() {
398        int result = 31 + Arrays.hashCode(getGeometricMean());
399        result = result * 31 + Arrays.hashCode(getGeometricMean());
400        result = result * 31 + Arrays.hashCode(getMax());
401        result = result * 31 + Arrays.hashCode(getMean());
402        result = result * 31 + Arrays.hashCode(getMin());
403        result = result * 31 + Double.hashCode(getN());
404        result = result * 31 + Arrays.hashCode(getSum());
405        result = result * 31 + Arrays.hashCode(getSumSq());
406        result = result * 31 + Arrays.hashCode(getSumLog());
407        result = result * 31 + getCovariance().hashCode();
408        return result;
409    }
410
411    // Getters and setters for statistics implementations
412    /**
413     * Sets statistics implementations.
414     * @param newImpl new implementations for statistics
415     * @param oldImpl old implementations for statistics
416     * @throws DimensionMismatchException if the array dimension
417     * does not match the one used at construction
418     * @throws MathIllegalStateException if data has already been added
419     * (i.e. if n > 0)
420     */
421    private void setImpl(StorelessUnivariateStatistic[] newImpl,
422                         StorelessUnivariateStatistic[] oldImpl) throws MathIllegalStateException,
423                         DimensionMismatchException {
424        checkEmpty();
425        checkDimension(newImpl.length);
426        System.arraycopy(newImpl, 0, oldImpl, 0, newImpl.length);
427    }
428
429    /**
430     * Returns the currently configured Sum implementation.
431     *
432     * @return the StorelessUnivariateStatistic implementing the sum
433     */
434    public StorelessUnivariateStatistic[] getSumImpl() {
435        return sumImpl.clone();
436    }
437
438    /**
439     * <p>Sets the implementation for the Sum.</p>
440     * <p>This method must be activated before any data has been added - i.e.,
441     * before {@link #addValue(double[]) addValue} has been used to add data;
442     * otherwise an IllegalStateException will be thrown.</p>
443     *
444     * @param sumImpl the StorelessUnivariateStatistic instance to use
445     * for computing the Sum
446     * @throws DimensionMismatchException if the array dimension
447     * does not match the one used at construction
448     * @throws MathIllegalStateException if data has already been added
449     *  (i.e if n &gt; 0)
450     */
451    public void setSumImpl(StorelessUnivariateStatistic[] sumImpl)
452    throws MathIllegalStateException, DimensionMismatchException {
453        setImpl(sumImpl, this.sumImpl);
454    }
455
456    /**
457     * Returns the currently configured sum of squares implementation.
458     *
459     * @return the StorelessUnivariateStatistic implementing the sum of squares
460     */
461    public StorelessUnivariateStatistic[] getSumsqImpl() {
462        return sumSqImpl.clone();
463    }
464
465    /**
466     * <p>Sets the implementation for the sum of squares.</p>
467     * <p>This method must be activated before any data has been added - i.e.,
468     * before {@link #addValue(double[]) addValue} has been used to add data;
469     * otherwise an IllegalStateException will be thrown.</p>
470     *
471     * @param sumsqImpl the StorelessUnivariateStatistic instance to use
472     * for computing the sum of squares
473     * @throws DimensionMismatchException if the array dimension
474     * does not match the one used at construction
475     * @throws MathIllegalStateException if data has already been added
476     *  (i.e if n &gt; 0)
477     */
478    public void setSumsqImpl(StorelessUnivariateStatistic[] sumsqImpl)
479    throws MathIllegalStateException, DimensionMismatchException {
480        setImpl(sumsqImpl, this.sumSqImpl);
481    }
482
483    /**
484     * Returns the currently configured minimum implementation.
485     *
486     * @return the StorelessUnivariateStatistic implementing the minimum
487     */
488    public StorelessUnivariateStatistic[] getMinImpl() {
489        return minImpl.clone();
490    }
491
492    /**
493     * <p>Sets the implementation for the minimum.</p>
494     * <p>This method must be activated before any data has been added - i.e.,
495     * before {@link #addValue(double[]) addValue} has been used to add data;
496     * otherwise an IllegalStateException will be thrown.</p>
497     *
498     * @param minImpl the StorelessUnivariateStatistic instance to use
499     * for computing the minimum
500     * @throws DimensionMismatchException if the array dimension
501     * does not match the one used at construction
502     * @throws MathIllegalStateException if data has already been added
503     *  (i.e if n &gt; 0)
504     */
505    public void setMinImpl(StorelessUnivariateStatistic[] minImpl)
506    throws MathIllegalStateException, DimensionMismatchException {
507        setImpl(minImpl, this.minImpl);
508    }
509
510    /**
511     * Returns the currently configured maximum implementation.
512     *
513     * @return the StorelessUnivariateStatistic implementing the maximum
514     */
515    public StorelessUnivariateStatistic[] getMaxImpl() {
516        return maxImpl.clone();
517    }
518
519    /**
520     * <p>Sets the implementation for the maximum.</p>
521     * <p>This method must be activated before any data has been added - i.e.,
522     * before {@link #addValue(double[]) addValue} has been used to add data;
523     * otherwise an IllegalStateException will be thrown.</p>
524     *
525     * @param maxImpl the StorelessUnivariateStatistic instance to use
526     * for computing the maximum
527     * @throws DimensionMismatchException if the array dimension
528     * does not match the one used at construction
529     * @throws MathIllegalStateException if data has already been added
530     *  (i.e if n &gt; 0)
531     */
532    public void setMaxImpl(StorelessUnivariateStatistic[] maxImpl)
533    throws MathIllegalStateException, DimensionMismatchException{
534        setImpl(maxImpl, this.maxImpl);
535    }
536
537    /**
538     * Returns the currently configured sum of logs implementation.
539     *
540     * @return the StorelessUnivariateStatistic implementing the log sum
541     */
542    public StorelessUnivariateStatistic[] getSumLogImpl() {
543        return sumLogImpl.clone();
544    }
545
546    /**
547     * <p>Sets the implementation for the sum of logs.</p>
548     * <p>This method must be activated before any data has been added - i.e.,
549     * before {@link #addValue(double[]) addValue} has been used to add data;
550     * otherwise an IllegalStateException will be thrown.</p>
551     *
552     * @param sumLogImpl the StorelessUnivariateStatistic instance to use
553     * for computing the log sum
554     * @throws DimensionMismatchException if the array dimension
555     * does not match the one used at construction
556     * @throws MathIllegalStateException if data has already been added
557     *  (i.e if n &gt; 0)
558     */
559    public void setSumLogImpl(StorelessUnivariateStatistic[] sumLogImpl)
560    throws MathIllegalStateException, DimensionMismatchException{
561        setImpl(sumLogImpl, this.sumLogImpl);
562    }
563
564    /**
565     * Returns the currently configured geometric mean implementation.
566     *
567     * @return the StorelessUnivariateStatistic implementing the geometric mean
568     */
569    public StorelessUnivariateStatistic[] getGeoMeanImpl() {
570        return geoMeanImpl.clone();
571    }
572
573    /**
574     * <p>Sets the implementation for the geometric mean.</p>
575     * <p>This method must be activated before any data has been added - i.e.,
576     * before {@link #addValue(double[]) addValue} has been used to add data;
577     * otherwise an IllegalStateException will be thrown.</p>
578     *
579     * @param geoMeanImpl the StorelessUnivariateStatistic instance to use
580     * for computing the geometric mean
581     * @throws DimensionMismatchException if the array dimension
582     * does not match the one used at construction
583     * @throws MathIllegalStateException if data has already been added
584     *  (i.e if n &gt; 0)
585     */
586    public void setGeoMeanImpl(StorelessUnivariateStatistic[] geoMeanImpl)
587    throws MathIllegalStateException, DimensionMismatchException {
588        setImpl(geoMeanImpl, this.geoMeanImpl);
589    }
590
591    /**
592     * Returns the currently configured mean implementation.
593     *
594     * @return the StorelessUnivariateStatistic implementing the mean
595     */
596    public StorelessUnivariateStatistic[] getMeanImpl() {
597        return meanImpl.clone();
598    }
599
600    /**
601     * <p>Sets the implementation for the mean.</p>
602     * <p>This method must be activated before any data has been added - i.e.,
603     * before {@link #addValue(double[]) addValue} has been used to add data;
604     * otherwise an IllegalStateException will be thrown.</p>
605     *
606     * @param meanImpl the StorelessUnivariateStatistic instance to use
607     * for computing the mean
608     * @throws DimensionMismatchException if the array dimension
609     * does not match the one used at construction
610     * @throws MathIllegalStateException if data has already been added
611     *  (i.e if n &gt; 0)
612     */
613    public void setMeanImpl(StorelessUnivariateStatistic[] meanImpl)
614    throws MathIllegalStateException, DimensionMismatchException{
615        setImpl(meanImpl, this.meanImpl);
616    }
617
618    /**
619     * Throws MathIllegalStateException if the statistic is not empty.
620     * @throws MathIllegalStateException if n > 0.
621     */
622    private void checkEmpty() throws MathIllegalStateException {
623        if (n > 0) {
624            throw new MathIllegalStateException(
625                    LocalizedFormats.VALUES_ADDED_BEFORE_CONFIGURING_STATISTIC, n);
626        }
627    }
628
629    /**
630     * Throws DimensionMismatchException if dimension != k.
631     * @param dimension dimension to check
632     * @throws DimensionMismatchException if dimension != k
633     */
634    private void checkDimension(int dimension) throws DimensionMismatchException {
635        if (dimension != k) {
636            throw new DimensionMismatchException(dimension, k);
637        }
638    }
639}