diff options
author | darcy <none@none> | 2013-12-01 23:35:28 -0800 |
---|---|---|
committer | darcy <none@none> | 2013-12-01 23:35:28 -0800 |
commit | d0688bfcb609851f70f90ce14397274163a225b5 (patch) | |
tree | 5caa7e2537886b7a446b6764968ec319b5062253 /src/share/classes/java/util/stream | |
parent | ada50c7bd984569a56fd0576a69e0f40f4749a1d (diff) | |
download | jdk8u_jdk-d0688bfcb609851f70f90ce14397274163a225b5.tar.gz |
8006572: DoubleStream.sum() & DoubleSummaryStats implementations that reduce numerical errors
Reviewed-by: psandoz, mduigou
Diffstat (limited to 'src/share/classes/java/util/stream')
-rw-r--r-- | src/share/classes/java/util/stream/Collectors.java | 59 | ||||
-rw-r--r-- | src/share/classes/java/util/stream/DoublePipeline.java | 54 |
2 files changed, 94 insertions, 19 deletions
diff --git a/src/share/classes/java/util/stream/Collectors.java b/src/share/classes/java/util/stream/Collectors.java index 93ffb0113c..f52ccba57a 100644 --- a/src/share/classes/java/util/stream/Collectors.java +++ b/src/share/classes/java/util/stream/Collectors.java @@ -505,14 +505,43 @@ public final class Collectors { */ public static <T> Collector<T, ?, Double> summingDouble(ToDoubleFunction<? super T> mapper) { + /* + * In the arrays allocated for the collect operation, index 0 + * holds the high-order bits of the running sum and index 1 + * holds the low-order bits of the sum computed via + * compensated summation. + */ return new CollectorImpl<>( - () -> new double[1], - (a, t) -> { a[0] += mapper.applyAsDouble(t); }, - (a, b) -> { a[0] += b[0]; return a; }, - a -> a[0], CH_NOID); + () -> new double[2], + (a, t) -> { sumWithCompensation(a, mapper.applyAsDouble(t)); }, + (a, b) -> { sumWithCompensation(a, b[0]); return sumWithCompensation(a, b[1]); }, + // Better error bounds to add both terms as the final sum + a -> a[0] + a[1], + CH_NOID); } /** + * Incorporate a new double value using Kahan summation / + * compensation summation. + * + * High-order bits of the sum are in intermediateSum[0], low-order + * bits of the sum are in intermediateSum[1], any additional + * elements are application-specific. + * + * @param intermediateSum the high-order and low-order words of the intermediate sum + * @param value the name value to be included in the running sum + */ + static double[] sumWithCompensation(double[] intermediateSum, double value) { + double tmp = value - intermediateSum[1]; + double sum = intermediateSum[0]; + double velvel = sum + tmp; // Little wolf of rounding error + intermediateSum[1] = (velvel - sum) - tmp; + intermediateSum[0] = velvel; + return intermediateSum; + } + + + /** * Returns a {@code Collector} that produces the arithmetic mean of an integer-valued * function applied to the input elements. If no elements are present, * the result is 0. @@ -560,17 +589,31 @@ public final class Collectors { * value is a {@code NaN} or the sum is at any point a {@code NaN} then the * average will be {@code NaN}. * + * @implNote The {@code double} format can represent all + * consecutive integers in the range -2<sup>53</sup> to + * 2<sup>53</sup>. If the pipeline has more than 2<sup>53</sup> + * values, the divisor in the average computation will saturate at + * 2<sup>53</sup>, leading to additional numerical errors. + * * @param <T> the type of the input elements * @param mapper a function extracting the property to be summed * @return a {@code Collector} that produces the sum of a derived property */ public static <T> Collector<T, ?, Double> averagingDouble(ToDoubleFunction<? super T> mapper) { + /* + * In the arrays allocated for the collect operation, index 0 + * holds the high-order bits of the running sum, index 1 holds + * the low-order bits of the sum computed via compensated + * summation, and index 2 holds the number of values seen. + */ return new CollectorImpl<>( - () -> new double[2], - (a, t) -> { a[0] += mapper.applyAsDouble(t); a[1]++; }, - (a, b) -> { a[0] += b[0]; a[1] += b[1]; return a; }, - a -> (a[1] == 0) ? 0.0d : a[0] / a[1], CH_NOID); + () -> new double[3], + (a, t) -> { sumWithCompensation(a, mapper.applyAsDouble(t)); a[2]++; }, + (a, b) -> { sumWithCompensation(a, b[0]); sumWithCompensation(a, b[1]); a[2] += b[2]; return a; }, + // Better error bounds to add both terms as the final sum to compute average + a -> (a[2] == 0) ? 0.0d : ((a[0] + a[1]) / a[2]), + CH_NOID); } /** diff --git a/src/share/classes/java/util/stream/DoublePipeline.java b/src/share/classes/java/util/stream/DoublePipeline.java index 6af346c56b..ad9c056382 100644 --- a/src/share/classes/java/util/stream/DoublePipeline.java +++ b/src/share/classes/java/util/stream/DoublePipeline.java @@ -377,8 +377,23 @@ abstract class DoublePipeline<E_IN> @Override public final double sum() { - // TODO: better algorithm to compensate for errors - return reduce(0.0, Double::sum); + /* + * In the arrays allocated for the collect operation, index 0 + * holds the high-order bits of the running sum and index 1 + * holds the low-order bits of the sum computed via + * compensated summation. + */ + double[] summation = collect(() -> new double[2], + (ll, d) -> { + Collectors.sumWithCompensation(ll, d); + }, + (ll, rr) -> { + Collectors.sumWithCompensation(ll, rr[0]); + Collectors.sumWithCompensation(ll, rr[1]); + }); + + // Better error bounds to add both terms as the final sum + return summation[0] + summation[1]; } @Override @@ -391,20 +406,37 @@ abstract class DoublePipeline<E_IN> return reduce(Math::max); } + /** + * {@inheritDoc} + * + * @implNote The {@code double} format can represent all + * consecutive integers in the range -2<sup>53</sup> to + * 2<sup>53</sup>. If the pipeline has more than 2<sup>53</sup> + * values, the divisor in the average computation will saturate at + * 2<sup>53</sup>, leading to additional numerical errors. + */ @Override public final OptionalDouble average() { - double[] avg = collect(() -> new double[2], - (ll, i) -> { - ll[0]++; - ll[1] += i; + /* + * In the arrays allocated for the collect operation, index 0 + * holds the high-order bits of the running sum, index 1 holds + * the low-order bits of the sum computed via compensated + * summation, and index 2 holds the number of values seen. + */ + double[] avg = collect(() -> new double[3], + (ll, d) -> { + ll[2]++; + Collectors.sumWithCompensation(ll, d); }, (ll, rr) -> { - ll[0] += rr[0]; - ll[1] += rr[1]; + Collectors.sumWithCompensation(ll, rr[0]); + Collectors.sumWithCompensation(ll, rr[1]); + ll[2] += rr[2]; }); - return avg[0] > 0 - ? OptionalDouble.of(avg[1] / avg[0]) - : OptionalDouble.empty(); + return avg[2] > 0 + // Better error bounds to add both terms as the final sum to compute average + ? OptionalDouble.of((avg[0] + avg[1]) / avg[2]) + : OptionalDouble.empty(); } @Override |