1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package org.apache.commons.statistics.distribution;
18
19 import java.util.Arrays;
20 import java.util.Collections;
21 import java.util.EnumMap;
22 import java.util.EnumSet;
23 import java.util.Map;
24 import java.util.Objects;
25 import java.util.Properties;
26 import java.util.Set;
27 import java.util.regex.Pattern;
28
29
30
31
32
33
34 abstract class DistributionTestData {
35
36
37
38
39
40 static final String KEY_TOLERANCE_ABSOLUTE = "tolerance.absolute";
41
42 static final String KEY_TOLERANCE_RELATIVE = "tolerance.relative";
43
44
45 private static final String SUFFIX_DISABLE = ".disable";
46
47 private static final String SUFFIX_TOLERANCE_ABSOLUTE = ".absolute";
48
49 private static final String SUFFIX_TOLERANCE_RELATIVE = ".relative";
50
51 private static final int INDEX_ABSOLUTE = 0;
52
53 private static final int INDEX_RELATIVE = 1;
54
55 private static final double UNSET_TOLERANCE = -1;
56
57 private static final double[] UNSET_TOLERANCES = {UNSET_TOLERANCE, UNSET_TOLERANCE};
58
59
60 private static final Pattern PATTERN = Pattern.compile("[ ,]+");
61
62
63 protected final double[] pfValues;
64
65 protected final double[] logPfValues;
66
67
68 private final Object[] parameters;
69
70 private final double mean;
71
72 private final double variance;
73
74 private final Map<TestName, double[]> tolerance;
75
76 private final Set<TestName> disabled;
77
78
79 private final double absoluteTolerance;
80
81 private final double relativeTolerance;
82
83 private final double[] cdfValues;
84
85 private final double[] sfValues;
86
87 private final double[] cdfHpValues;
88
89 private final double[] sfHpValues;
90
91
92
93
94
95
96 static class ContinuousDistributionTestData extends DistributionTestData {
97
98 private final double lower;
99
100 private final double upper;
101
102 private final double[] cdfPoints;
103
104 private final double[] pdfPoints;
105
106 private final double[] sfPoints;
107
108 private final double[] cdfHpPoints;
109
110 private final double[] sfHpPoints;
111
112 private final double[] icdfPoints;
113
114 private final double[] icdfValues;
115
116 private final double[] isfPoints;
117
118 private final double[] isfValues;
119
120
121
122
123 ContinuousDistributionTestData(Properties props) {
124 super(props);
125
126 lower = getAsDouble(props, "lower", Double.NEGATIVE_INFINITY);
127 upper = getAsDouble(props, "upper", Double.POSITIVE_INFINITY);
128
129 cdfPoints = getAsDoubleArray(props, "cdf.points");
130
131 pdfPoints = getAsDoubleArray(props, "pdf.points", cdfPoints);
132 sfPoints = getAsDoubleArray(props, "sf.points", cdfPoints);
133 cdfHpPoints = getAsDoubleArray(props, "cdf.hp.points", null);
134 sfHpPoints = getAsDoubleArray(props, "sf.hp.points", null);
135
136
137 icdfPoints = getAsDoubleArray(props, "icdf.points", null);
138 icdfValues = getAsDoubleArray(props, "icdf.values", null);
139 isfPoints = getAsDoubleArray(props, "isf.points", null);
140 isfValues = getAsDoubleArray(props, "isf.values", null);
141
142 validatePair(cdfPoints, getCdfValues(), "cdf");
143 validatePair(pdfPoints, getPdfValues(), "pdf");
144 validatePair(pdfPoints, getLogPdfValues(), "logpdf");
145 validatePair(sfPoints, getSfValues(), "sf");
146 validatePair(cdfHpPoints, getCdfHpValues(), "cdf.hp");
147 validatePair(sfHpPoints, getSfHpValues(), "sf.hp");
148 validatePair(icdfPoints, icdfValues, "icdf");
149 validatePair(isfPoints, isfValues, "isf");
150 }
151
152 @Override
153 String getProbabilityFunctionName() {
154 return "pdf";
155 }
156
157
158
159
160
161
162 double getLower() {
163 return lower;
164 }
165
166
167
168
169
170
171 double getUpper() {
172 return upper;
173 }
174
175
176
177
178
179
180 double[] getCdfPoints() {
181 return cdfPoints;
182 }
183
184
185
186
187
188
189 double[] getPdfPoints() {
190 return pdfPoints;
191 }
192
193
194
195
196
197
198 double[] getPdfValues() {
199 return pfValues;
200 }
201
202
203
204
205
206
207 double[] getLogPdfValues() {
208 return logPfValues;
209 }
210
211
212
213
214
215
216 double[] getSfPoints() {
217 return sfPoints;
218 }
219
220
221
222
223
224
225
226 double[] getCdfHpPoints() {
227 return cdfHpPoints;
228 }
229
230
231
232
233
234
235
236 double[] getSfHpPoints() {
237 return sfHpPoints;
238 }
239
240 @Override
241 double[] getIcdfPoints() {
242 return icdfPoints;
243 }
244
245
246
247
248
249
250 double[] getIcdfValues() {
251 return icdfValues;
252 }
253
254 @Override
255 double[] getIsfPoints() {
256 return isfPoints;
257 }
258
259
260
261
262
263
264 double[] getIsfValues() {
265 return isfValues;
266 }
267 }
268
269
270
271
272
273
274 static class DiscreteDistributionTestData extends DistributionTestData {
275
276 private final int lower;
277
278 private final int upper;
279
280 private final int[] cdfPoints;
281
282 private final int[] pmfPoints;
283
284 private final int[] sfPoints;
285
286 private final int[] cdfHpPoints;
287
288 private final int[] sfHpPoints;
289
290 private final double[] icdfPoints;
291
292 private final int[] icdfValues;
293
294 private final double[] isfPoints;
295
296 private final int[] isfValues;
297
298
299
300
301 DiscreteDistributionTestData(Properties props) {
302 super(props);
303
304 lower = getAsInt(props, "lower", Integer.MIN_VALUE);
305 upper = getAsInt(props, "upper", Integer.MAX_VALUE);
306
307 cdfPoints = getAsIntArray(props, "cdf.points");
308
309 pmfPoints = getAsIntArray(props, "pmf.points", cdfPoints);
310 sfPoints = getAsIntArray(props, "sf.points", cdfPoints);
311 cdfHpPoints = getAsIntArray(props, "cdf.hp.points", null);
312 sfHpPoints = getAsIntArray(props, "sf.hp.points", null);
313
314
315 icdfPoints = getAsDoubleArray(props, "icdf.points", null);
316 icdfValues = getAsIntArray(props, "icdf.values", null);
317 isfPoints = getAsDoubleArray(props, "isf.points", null);
318 isfValues = getAsIntArray(props, "isf.values", null);
319
320 validatePair(cdfPoints, getCdfValues(), "cdf");
321 validatePair(pmfPoints, getPmfValues(), "pmf");
322 validatePair(pmfPoints, getLogPmfValues(), "logpmf");
323 validatePair(sfPoints, getSfValues(), "sf");
324 validatePair(cdfHpPoints, getCdfHpValues(), "cdf.hp");
325 validatePair(sfHpPoints, getSfHpValues(), "sf.hp");
326 validatePair(icdfPoints, icdfValues, "icdf");
327 validatePair(isfPoints, isfValues, "isf");
328 }
329
330 @Override
331 String getProbabilityFunctionName() {
332 return "pmf";
333 }
334
335
336
337
338
339
340 int getLower() {
341 return lower;
342 }
343
344
345
346
347
348
349 int getUpper() {
350 return upper;
351 }
352
353
354
355
356
357
358 int[] getCdfPoints() {
359 return cdfPoints;
360 }
361
362
363
364
365
366
367 int[] getPmfPoints() {
368 return pmfPoints;
369 }
370
371
372
373
374
375
376 double[] getPmfValues() {
377 return pfValues;
378 }
379
380
381
382
383
384
385 double[] getLogPmfValues() {
386 return logPfValues;
387 }
388
389
390
391
392
393
394 int[] getSfPoints() {
395 return sfPoints;
396 }
397
398
399
400
401
402
403
404 int[] getCdfHpPoints() {
405 return cdfHpPoints;
406 }
407
408
409
410
411
412
413
414 int[] getSfHpPoints() {
415 return sfHpPoints;
416 }
417
418 @Override
419 double[] getIcdfPoints() {
420 return icdfPoints;
421 }
422
423
424
425
426
427
428 int[] getIcdfValues() {
429 return icdfValues;
430 }
431
432 @Override
433 double[] getIsfPoints() {
434 return isfPoints;
435 }
436
437
438
439
440
441
442 int[] getIsfValues() {
443 return isfValues;
444 }
445 }
446
447
448
449
450 DistributionTestData(Properties props) {
451
452 parameters = PATTERN.splitAsStream(get(props, "parameters"))
453 .map(DistributionTestData::parseParameter).toArray();
454 mean = getAsDouble(props, "mean");
455 variance = getAsDouble(props, "variance");
456 absoluteTolerance = getAsDouble(props, KEY_TOLERANCE_ABSOLUTE);
457 relativeTolerance = getAsDouble(props, KEY_TOLERANCE_RELATIVE);
458
459 cdfValues = getAsDoubleArray(props, "cdf.values");
460 final String pf = getProbabilityFunctionName();
461 pfValues = getAsDoubleArray(props, pf + ".values");
462
463 double[] tmp = getAsDoubleArray(props, "log" + pf + ".values", null);
464 if (tmp == null && pfValues != null) {
465 tmp = Arrays.stream(pfValues).map(Math::log).toArray();
466 }
467 logPfValues = tmp;
468 tmp = getAsDoubleArray(props, "sf.values", null);
469 if (tmp == null && cdfValues != null) {
470 tmp = Arrays.stream(cdfValues).map(d -> 1.0 - d).toArray();
471 }
472 sfValues = tmp;
473 cdfHpValues = getAsDoubleArray(props, "cdf.hp.values", null);
474 sfHpValues = getAsDoubleArray(props, "sf.hp.values", null);
475
476
477 props.remove(KEY_TOLERANCE_ABSOLUTE);
478 props.remove(KEY_TOLERANCE_RELATIVE);
479
480
481 EnumMap<TestName, double[]> map = new EnumMap<>(TestName.class);
482 EnumSet<TestName> set = EnumSet.noneOf(TestName.class);
483 props.stringPropertyNames().forEach(key -> {
484 if (key.endsWith(SUFFIX_DISABLE) && getAsBoolean(props, key, false)) {
485 final TestName name = TestName.fromString(key.substring(0, key.length() - SUFFIX_DISABLE.length()));
486 if (name != null) {
487 set.add(name);
488 }
489 } else if (key.endsWith(SUFFIX_TOLERANCE_ABSOLUTE)) {
490 final TestName name = TestName.fromString(key.substring(0, key.length() - SUFFIX_TOLERANCE_ABSOLUTE.length()));
491 if (name != null) {
492 final double[] tolerances = map.computeIfAbsent(name, k -> UNSET_TOLERANCES.clone());
493 tolerances[INDEX_ABSOLUTE] = getAsDouble(props, key);
494 }
495 } else if (key.endsWith(SUFFIX_TOLERANCE_RELATIVE)) {
496 final TestName name = TestName.fromString(key.substring(0, key.length() - SUFFIX_TOLERANCE_RELATIVE.length()));
497 if (name != null) {
498 final double[] tolerances = map.computeIfAbsent(name, k -> UNSET_TOLERANCES.clone());
499 tolerances[INDEX_RELATIVE] = getAsDouble(props, key);
500 }
501 }
502 });
503
504 this.tolerance = map.isEmpty() ? Collections.emptyMap() : map;
505 this.disabled = set.isEmpty() ? Collections.emptySet() : set;
506 }
507
508
509
510
511
512
513
514 abstract String getProbabilityFunctionName();
515
516
517
518
519
520
521
522
523 private static Object parseParameter(String value) {
524
525
526 try {
527 return parseInt(value);
528 } catch (NumberFormatException ex) { }
529 try {
530 return parseDouble(value);
531 } catch (NumberFormatException ex) {
532 throw new IllegalArgumentException("Unknown parameter type: " + value, ex);
533 }
534 }
535
536
537
538
539
540
541
542
543
544 private static String get(Properties props, String key) {
545 return Objects.requireNonNull(props.getProperty(key), () -> "Missing test data: " + key);
546 }
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562 private static int parseInt(String s) {
563 if ("max".equals(s)) {
564 return Integer.MAX_VALUE;
565 } else if ("min".equals(s)) {
566 return Integer.MIN_VALUE;
567 }
568 return Integer.parseInt(s);
569 }
570
571
572
573
574
575
576
577
578
579
580
581
582
583 private static double parseDouble(String s) {
584
585 final int len = s.length();
586 if ((len == 3 || len == 4) &&
587 s.charAt(len - 1) == 'f' &&
588 s.charAt(len - 2) == 'n') {
589
590 final int start = s.charAt(0) == '-' ? 1 : 0;
591
592
593 if (s.length() - start == 3 && (s.charAt(start) == 'I' || s.charAt(start) == 'i')) {
594 return start == 0 ? Double.POSITIVE_INFINITY : Double.NEGATIVE_INFINITY;
595 }
596 }
597 return Double.parseDouble(s);
598 }
599
600
601
602
603
604
605
606
607
608
609 private static double getAsDouble(Properties props, String key) {
610 try {
611 return parseDouble(get(props, key));
612 } catch (NumberFormatException ex) {
613 throw new IllegalArgumentException("Invalid double: " + key, ex);
614 }
615 }
616
617
618
619
620
621
622
623
624
625
626 private static double getAsDouble(Properties props, String key, double defaultValue) {
627 try {
628 final String s = props.getProperty(key);
629 return s == null ? defaultValue : parseDouble(s);
630 } catch (NumberFormatException ex) {
631 throw new IllegalArgumentException("Invalid double: " + key, ex);
632 }
633 }
634
635
636
637
638
639
640
641
642
643
644 private static int getAsInt(Properties props, String key, int defaultValue) {
645 try {
646 final String s = props.getProperty(key);
647 return s == null ? defaultValue : parseInt(s);
648 } catch (NumberFormatException ex) {
649 throw new IllegalArgumentException("Invalid double: " + key, ex);
650 }
651 }
652
653
654
655
656
657
658
659
660
661
662 private static boolean getAsBoolean(Properties props, String key, boolean defaultValue) {
663 try {
664 final String s = props.getProperty(key);
665 return s == null ? defaultValue : Boolean.parseBoolean(s);
666 } catch (NumberFormatException ex) {
667 throw new IllegalArgumentException("Invalid boolean: " + key, ex);
668 }
669 }
670
671
672
673
674
675
676
677
678
679
680 private static double[] getAsDoubleArray(Properties props, String key) {
681 try {
682 return PATTERN.splitAsStream(get(props, key)).mapToDouble(DistributionTestData::parseDouble).toArray();
683 } catch (NumberFormatException ex) {
684 throw new IllegalArgumentException("Invalid double: " + key, ex);
685 }
686 }
687
688
689
690
691
692
693
694
695
696
697 private static double[] getAsDoubleArray(Properties props, String key, double[] defaultValue) {
698 try {
699 final String s = props.getProperty(key);
700 return s == null ? defaultValue :
701 PATTERN.splitAsStream(s).mapToDouble(DistributionTestData::parseDouble).toArray();
702 } catch (NumberFormatException ex) {
703 throw new IllegalArgumentException("Invalid double: " + key, ex);
704 }
705 }
706
707
708
709
710
711
712
713
714
715 private static int[] getAsIntArray(Properties props, String key) {
716 try {
717 return PATTERN.splitAsStream(get(props, key)).mapToInt(DistributionTestData::parseInt).toArray();
718 } catch (NumberFormatException ex) {
719 throw new IllegalArgumentException("Invalid double: " + key, ex);
720 }
721 }
722
723
724
725
726
727
728
729
730
731
732 private static int[] getAsIntArray(Properties props, String key, int[] defaultValue) {
733 try {
734 final String s = props.getProperty(key);
735 return s == null ? defaultValue :
736 PATTERN.splitAsStream(s).mapToInt(DistributionTestData::parseInt).toArray();
737 } catch (NumberFormatException ex) {
738 throw new IllegalArgumentException("Invalid double: " + key, ex);
739 }
740 }
741
742
743
744
745
746
747
748
749 private static void validatePair(double[] p, double[] v, String name) {
750 validatePair(TestUtils.getLength(p), TestUtils.getLength(v), name);
751 }
752
753
754
755
756
757
758
759
760 private static void validatePair(int[] p, double[] v, String name) {
761 validatePair(TestUtils.getLength(p), TestUtils.getLength(v), name);
762 }
763
764
765
766
767
768
769
770
771 private static void validatePair(double[] p, int[] v, String name) {
772 validatePair(TestUtils.getLength(p), TestUtils.getLength(v), name);
773 }
774
775
776
777
778
779
780
781
782 private static void validatePair(int l1, int l2, String name) {
783
784 if (l1 != 0 && l2 != 0 && l1 != l2) {
785 throw new IllegalArgumentException(
786 String.format("Points-Values length mismatch for %s: %d != %d", name, l1, l2));
787 }
788 }
789
790
791
792
793
794
795 Object[] getParameters() {
796 return parameters;
797 }
798
799
800
801
802
803
804 double getMean() {
805 return mean;
806 }
807
808
809
810
811
812
813 double getVariance() {
814 return variance;
815 }
816
817
818
819
820
821
822
823
824 double getAbsoluteTolerance(TestName name) {
825 return getTolerance(name, INDEX_ABSOLUTE, absoluteTolerance);
826 }
827
828
829
830
831
832
833
834
835 double getRelativeTolerance(TestName name) {
836 return getTolerance(name, INDEX_RELATIVE, relativeTolerance);
837 }
838
839
840
841
842
843
844
845
846
847
848 private double getTolerance(TestName name, int index, double defaultValue) {
849 final double[] tol = tolerance.get(name);
850 if (tol != null && tol[index] != UNSET_TOLERANCE) {
851 return tol[index];
852 }
853 return defaultValue;
854 }
855
856
857
858
859
860
861 double getAbsoluteTolerance() {
862 return absoluteTolerance;
863 }
864
865
866
867
868
869
870 double getRelativeTolerance() {
871 return relativeTolerance;
872 }
873
874
875
876
877
878
879
880 boolean isDisabled(TestName name) {
881 return disabled.contains(name);
882 }
883
884
885
886
887
888
889
890 boolean isEnabled(TestName name) {
891 return !isDisabled(name);
892 }
893
894
895
896
897
898
899 double[] getCdfValues() {
900 return cdfValues;
901 }
902
903
904
905
906
907
908 double[] getSfValues() {
909 return sfValues;
910 }
911
912
913
914
915
916
917 double[] getCdfHpValues() {
918 return cdfHpValues;
919 }
920
921
922
923
924
925
926 double[] getSfHpValues() {
927 return sfHpValues;
928 }
929
930
931
932
933
934
935 abstract double[] getIcdfPoints();
936
937
938
939
940
941
942 abstract double[] getIsfPoints();
943 }
944