aboutsummaryrefslogtreecommitdiff
path: root/public/output_stages.h
diff options
context:
space:
mode:
Diffstat (limited to 'public/output_stages.h')
-rw-r--r--public/output_stages.h32
1 files changed, 29 insertions, 3 deletions
diff --git a/public/output_stages.h b/public/output_stages.h
index 23bcdc0..1d5fca4 100644
--- a/public/output_stages.h
+++ b/public/output_stages.h
@@ -66,8 +66,9 @@ struct OutputStageQuantizeDownInt32ToUint8ScalePC {
};
// This output stage takes int32 values and returns still int32 values,
-// but "quantized down" to the uint8 scale; in other words, its output
-// is typically what one would then clamp to [0..255] and cast to uint8
+// but "quantized down" to a difference scale; for example, in a pipeline
+// that outputs uint8 values in [0..255], the output of this stage would be
+// int32 values ready to be clamped to [0..255] and casted to uint8
// (see OutputStageSaturatingCastToUint8).
//
// This "quantization down" process depends on 3 parameters,
@@ -111,17 +112,42 @@ struct OutputStageQuantizeDownInt32ToUint8ScalePC {
// expansions that implicitly rely on 0-padding. If 0 were not
// a representable value, such operations would have to pad
// using a nonzero value, introducing bias in the computation.
-struct OutputStageQuantizeDownInt32ToUint8ScaleByFixedPoint {
+struct OutputStageQuantizeDownInt32ByFixedPoint {
std::int32_t result_fixedpoint_multiplier;
std::int32_t result_shift;
std::int32_t result_offset_after_shift;
};
+// OutputStageQuantizeDownInt32ToUint8ScaleByFixedPoint is the old deprecated
+// name of OutputStageQuantizeDownInt32ByFixedPoint, before we noticed that
+// there really wasn't anything Uint8-specific about it.
+using OutputStageQuantizeDownInt32ToUint8ScaleByFixedPoint = OutputStageQuantizeDownInt32ByFixedPoint;
+
+// Variant of OutputStageQuantizeDownInt32ByFixedPoint where the 'shift'
+// is not necessarily just a right shift, so we can represent multipliers
+// greater than 1. This takes an result_exponent parameter; when it's
+// <= 0, this is equivalent to OutputStageQuantizeDownInt32ByFixedPoint
+// with result_shift = -result_exponent.
+// In the general case, this consists in first left-shifting by
+// std::max(result_exponent, 0), before doing the same as
+// OutputStageQuantizeDownInt32ByFixedPoint with
+// result_shift = std::max(-result_exponent, 0).
+struct OutputStageScaleInt32ByFixedPointAndExponent {
+ std::int32_t result_fixedpoint_multiplier;
+ std::int32_t result_exponent;
+ std::int32_t result_offset_after_shift;
+};
+
// This output stage takes int32 values that are expected to be already
// on the final uint8 scale, but not necessarily in the [0..255] range.
// It clamps them to the [0..255] range and returns them casted to uint8.
struct OutputStageSaturatingCastToUint8 {};
+// This output stage takes int32 values that are expected to be already
+// on the final int16 scale, but not necessarily in the [-32768..32767] range.
+// It clamps them to the [-32768..32767] range and returns them casted to int16.
+struct OutputStageSaturatingCastToInt16 {};
+
// This output stage depends on a "bias vector" that should contain int32
// entries, and be either a row-vector of the same number of columns as the
// result matrix, or a column-vector of the same number of rows as the