aboutsummaryrefslogtreecommitdiff
path: root/src/share/classes/java/util/stream
diff options
context:
space:
mode:
authordarcy <none@none>2013-12-01 23:35:28 -0800
committerdarcy <none@none>2013-12-01 23:35:28 -0800
commitd0688bfcb609851f70f90ce14397274163a225b5 (patch)
tree5caa7e2537886b7a446b6764968ec319b5062253 /src/share/classes/java/util/stream
parentada50c7bd984569a56fd0576a69e0f40f4749a1d (diff)
downloadjdk8u_jdk-d0688bfcb609851f70f90ce14397274163a225b5.tar.gz
8006572: DoubleStream.sum() & DoubleSummaryStats implementations that reduce numerical errors
Reviewed-by: psandoz, mduigou
Diffstat (limited to 'src/share/classes/java/util/stream')
-rw-r--r--src/share/classes/java/util/stream/Collectors.java59
-rw-r--r--src/share/classes/java/util/stream/DoublePipeline.java54
2 files changed, 94 insertions, 19 deletions
diff --git a/src/share/classes/java/util/stream/Collectors.java b/src/share/classes/java/util/stream/Collectors.java
index 93ffb0113c..f52ccba57a 100644
--- a/src/share/classes/java/util/stream/Collectors.java
+++ b/src/share/classes/java/util/stream/Collectors.java
@@ -505,14 +505,43 @@ public final class Collectors {
*/
public static <T> Collector<T, ?, Double>
summingDouble(ToDoubleFunction<? super T> mapper) {
+ /*
+ * In the arrays allocated for the collect operation, index 0
+ * holds the high-order bits of the running sum and index 1
+ * holds the low-order bits of the sum computed via
+ * compensated summation.
+ */
return new CollectorImpl<>(
- () -> new double[1],
- (a, t) -> { a[0] += mapper.applyAsDouble(t); },
- (a, b) -> { a[0] += b[0]; return a; },
- a -> a[0], CH_NOID);
+ () -> new double[2],
+ (a, t) -> { sumWithCompensation(a, mapper.applyAsDouble(t)); },
+ (a, b) -> { sumWithCompensation(a, b[0]); return sumWithCompensation(a, b[1]); },
+ // Better error bounds to add both terms as the final sum
+ a -> a[0] + a[1],
+ CH_NOID);
}
/**
+ * Incorporate a new double value using Kahan summation /
+ * compensation summation.
+ *
+ * High-order bits of the sum are in intermediateSum[0], low-order
+ * bits of the sum are in intermediateSum[1], any additional
+ * elements are application-specific.
+ *
+ * @param intermediateSum the high-order and low-order words of the intermediate sum
+ * @param value the name value to be included in the running sum
+ */
+ static double[] sumWithCompensation(double[] intermediateSum, double value) {
+ double tmp = value - intermediateSum[1];
+ double sum = intermediateSum[0];
+ double velvel = sum + tmp; // Little wolf of rounding error
+ intermediateSum[1] = (velvel - sum) - tmp;
+ intermediateSum[0] = velvel;
+ return intermediateSum;
+ }
+
+
+ /**
* Returns a {@code Collector} that produces the arithmetic mean of an integer-valued
* function applied to the input elements. If no elements are present,
* the result is 0.
@@ -560,17 +589,31 @@ public final class Collectors {
* value is a {@code NaN} or the sum is at any point a {@code NaN} then the
* average will be {@code NaN}.
*
+ * @implNote The {@code double} format can represent all
+ * consecutive integers in the range -2<sup>53</sup> to
+ * 2<sup>53</sup>. If the pipeline has more than 2<sup>53</sup>
+ * values, the divisor in the average computation will saturate at
+ * 2<sup>53</sup>, leading to additional numerical errors.
+ *
* @param <T> the type of the input elements
* @param mapper a function extracting the property to be summed
* @return a {@code Collector} that produces the sum of a derived property
*/
public static <T> Collector<T, ?, Double>
averagingDouble(ToDoubleFunction<? super T> mapper) {
+ /*
+ * In the arrays allocated for the collect operation, index 0
+ * holds the high-order bits of the running sum, index 1 holds
+ * the low-order bits of the sum computed via compensated
+ * summation, and index 2 holds the number of values seen.
+ */
return new CollectorImpl<>(
- () -> new double[2],
- (a, t) -> { a[0] += mapper.applyAsDouble(t); a[1]++; },
- (a, b) -> { a[0] += b[0]; a[1] += b[1]; return a; },
- a -> (a[1] == 0) ? 0.0d : a[0] / a[1], CH_NOID);
+ () -> new double[3],
+ (a, t) -> { sumWithCompensation(a, mapper.applyAsDouble(t)); a[2]++; },
+ (a, b) -> { sumWithCompensation(a, b[0]); sumWithCompensation(a, b[1]); a[2] += b[2]; return a; },
+ // Better error bounds to add both terms as the final sum to compute average
+ a -> (a[2] == 0) ? 0.0d : ((a[0] + a[1]) / a[2]),
+ CH_NOID);
}
/**
diff --git a/src/share/classes/java/util/stream/DoublePipeline.java b/src/share/classes/java/util/stream/DoublePipeline.java
index 6af346c56b..ad9c056382 100644
--- a/src/share/classes/java/util/stream/DoublePipeline.java
+++ b/src/share/classes/java/util/stream/DoublePipeline.java
@@ -377,8 +377,23 @@ abstract class DoublePipeline<E_IN>
@Override
public final double sum() {
- // TODO: better algorithm to compensate for errors
- return reduce(0.0, Double::sum);
+ /*
+ * In the arrays allocated for the collect operation, index 0
+ * holds the high-order bits of the running sum and index 1
+ * holds the low-order bits of the sum computed via
+ * compensated summation.
+ */
+ double[] summation = collect(() -> new double[2],
+ (ll, d) -> {
+ Collectors.sumWithCompensation(ll, d);
+ },
+ (ll, rr) -> {
+ Collectors.sumWithCompensation(ll, rr[0]);
+ Collectors.sumWithCompensation(ll, rr[1]);
+ });
+
+ // Better error bounds to add both terms as the final sum
+ return summation[0] + summation[1];
}
@Override
@@ -391,20 +406,37 @@ abstract class DoublePipeline<E_IN>
return reduce(Math::max);
}
+ /**
+ * {@inheritDoc}
+ *
+ * @implNote The {@code double} format can represent all
+ * consecutive integers in the range -2<sup>53</sup> to
+ * 2<sup>53</sup>. If the pipeline has more than 2<sup>53</sup>
+ * values, the divisor in the average computation will saturate at
+ * 2<sup>53</sup>, leading to additional numerical errors.
+ */
@Override
public final OptionalDouble average() {
- double[] avg = collect(() -> new double[2],
- (ll, i) -> {
- ll[0]++;
- ll[1] += i;
+ /*
+ * In the arrays allocated for the collect operation, index 0
+ * holds the high-order bits of the running sum, index 1 holds
+ * the low-order bits of the sum computed via compensated
+ * summation, and index 2 holds the number of values seen.
+ */
+ double[] avg = collect(() -> new double[3],
+ (ll, d) -> {
+ ll[2]++;
+ Collectors.sumWithCompensation(ll, d);
},
(ll, rr) -> {
- ll[0] += rr[0];
- ll[1] += rr[1];
+ Collectors.sumWithCompensation(ll, rr[0]);
+ Collectors.sumWithCompensation(ll, rr[1]);
+ ll[2] += rr[2];
});
- return avg[0] > 0
- ? OptionalDouble.of(avg[1] / avg[0])
- : OptionalDouble.empty();
+ return avg[2] > 0
+ // Better error bounds to add both terms as the final sum to compute average
+ ? OptionalDouble.of((avg[0] + avg[1]) / avg[2])
+ : OptionalDouble.empty();
}
@Override