summaryrefslogtreecommitdiff
path: root/nn/common/operations/Broadcast.cpp
diff options
context:
space:
mode:
authorLev Proleev <levp@google.com>2019-10-23 17:24:57 +0100
committerLev Proleev <levp@google.com>2019-11-27 14:47:27 +0000
commit799369fdedf36424e088d557bf8ed66e7d9a93e0 (patch)
treee7c57b065d86cd3f953187d3d7a53734c8803302 /nn/common/operations/Broadcast.cpp
parent8b64d1d84943fd762b3d653f88310d1c59474264 (diff)
downloadml-799369fdedf36424e088d557bf8ed66e7d9a93e0.tar.gz
Add TENSOR_QUANT8_ASYMM_SIGNED support to ADD, MUL and SUB.
* Update legacy TF Lite functions calls to the current ones. * Add QuantizeMultiplierSmallerThanOneExp function (same as TF Lite function, does the same thing as QuantizeMultiplierSmallerThanOne but returns left shift instead of right shift so that all the QuantizeMultiplier* functions have the same interface. Bug: 143934463 Bug: 143934627 Bug: 143935040 Test: NNTest_static and 1.3 VTS QuantizationCouplingTest Change-Id: I897c07bc5d93dfa7c03a0c66eeb77fcbdec6aea0 Merged-In: I897c07bc5d93dfa7c03a0c66eeb77fcbdec6aea0 (cherry picked from commit c7b32bf9555a556cd1b9e5ffe9372a7f6d3fadb8)
Diffstat (limited to 'nn/common/operations/Broadcast.cpp')
-rw-r--r--nn/common/operations/Broadcast.cpp249
1 files changed, 167 insertions, 82 deletions
diff --git a/nn/common/operations/Broadcast.cpp b/nn/common/operations/Broadcast.cpp
index 132325439..e973c119f 100644
--- a/nn/common/operations/Broadcast.cpp
+++ b/nn/common/operations/Broadcast.cpp
@@ -18,17 +18,21 @@
#define LOG_TAG "Operations"
+#include <tensorflow/lite/kernels/internal/optimized/integer_ops/add.h>
+#include <tensorflow/lite/kernels/internal/optimized/integer_ops/mul.h>
+#include <tensorflow/lite/kernels/internal/optimized/legacy_optimized_ops.h>
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/add.h>
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/mul.h>
+#include <tensorflow/lite/kernels/internal/types.h>
+
+#include <algorithm>
+#include <vector>
+
#include "CpuOperationUtils.h"
#include "HalInterfaces.h"
#include "OperationResolver.h"
-
-#include <tensorflow/lite/kernels/internal/optimized/legacy_optimized_ops.h>
-#include <tensorflow/lite/kernels/internal/reference/legacy_reference_ops.h>
-
#include "Tracing.h"
-#include <algorithm>
-
namespace android {
namespace nn {
@@ -118,10 +122,11 @@ bool addFloat16(const _Float16* in1, const Shape& shape1, const _Float16* in2, c
return binaryOperationFloat16(in1, shape1, in2, shape2, activation, out, shapeOut, &addFloat32);
}
-bool addQuant8(const uint8_t* in1, const Shape& shape1, const uint8_t* in2, const Shape& shape2,
- int32_t activation, uint8_t* out, const Shape& shapeOut) {
+template <typename T>
+bool addQuant8(const T* in1, const Shape& shape1, const T* in2, const Shape& shape2,
+ int32_t activation, T* out, const Shape& shapeOut) {
NNTRACE_TRANS("addQuant8");
- bool needBroadcast = !SameShape(shape1, shape2);
+ const bool needBroadcast = !SameShape(shape1, shape2);
const int32_t input1_offset = -shape1.offset;
const int32_t input2_offset = -shape2.offset;
@@ -135,49 +140,65 @@ bool addQuant8(const uint8_t* in1, const Shape& shape1, const uint8_t* in2, cons
int32_t input1_multiplier;
int32_t input1_shift;
- if (!QuantizeMultiplierSmallerThanOne(real_input1_multiplier, &input1_multiplier,
- &input1_shift)) {
- return false;
- }
+ NN_RET_CHECK(QuantizeMultiplierSmallerThanOneExp(real_input1_multiplier, &input1_multiplier,
+ &input1_shift));
int32_t input2_multiplier;
int32_t input2_shift;
- if (!QuantizeMultiplierSmallerThanOne(real_input2_multiplier, &input2_multiplier,
- &input2_shift)) {
- return false;
- }
+ NN_RET_CHECK(QuantizeMultiplierSmallerThanOneExp(real_input2_multiplier, &input2_multiplier,
+ &input2_shift));
int32_t output_multiplier;
int32_t output_shift;
- if (!QuantizeMultiplierSmallerThanOne(real_output_multiplier, &output_multiplier,
- &output_shift)) {
- return false;
- }
+ NN_RET_CHECK(QuantizeMultiplierSmallerThanOneExp(real_output_multiplier, &output_multiplier,
+ &output_shift));
+
int32_t output_activation_min;
int32_t output_activation_max;
- CalculateActivationRangeUint8(activation, shapeOut, &output_activation_min,
- &output_activation_max);
+ constexpr bool isSignedOp = std::is_same<T, int8_t>::value;
+ if constexpr (isSignedOp) {
+ CalculateActivationRangeInt8(activation, shapeOut, &output_activation_min,
+ &output_activation_max);
+ } else {
+ CalculateActivationRangeUint8(activation, shapeOut, &output_activation_min,
+ &output_activation_max);
+ }
- if (needBroadcast) {
- NNTRACE_COMP_SWITCH("optimized_ops::BroadcastAdd");
-#define ANDROID_NN_BROADCAST_ADD(activation) \
- tflite::optimized_ops::BroadcastAdd<tflite::FusedActivationFunctionType::activation>( \
- left_shift, in1, convertShapeToDims(shape1), input1_offset, input1_multiplier, \
- input1_shift, in2, convertShapeToDims(shape2), input2_offset, input2_multiplier, \
- input2_shift, output_offset, output_multiplier, output_shift, output_activation_min, \
- output_activation_max, out, convertShapeToDims(shapeOut))
+ tflite::ArithmeticParams op_params;
+ op_params.left_shift = left_shift;
+ op_params.input1_offset = input1_offset;
+ op_params.input1_multiplier = input1_multiplier;
+ op_params.input1_shift = input1_shift;
+ op_params.input2_offset = input2_offset;
+ op_params.input2_multiplier = input2_multiplier;
+ op_params.input2_shift = input2_shift;
+ op_params.output_offset = output_offset;
+ op_params.output_multiplier = output_multiplier;
+ op_params.output_shift = output_shift;
+ tflite::SetActivationParams(output_activation_min, output_activation_max, &op_params);
- ANDROID_NN_MACRO_DISPATCH(ANDROID_NN_BROADCAST_ADD)
-#undef ANDROID_NN_BROADCAST_ADD
+ if (needBroadcast) {
+ if constexpr (isSignedOp) {
+ NNTRACE_COMP_SWITCH("reference_integer_ops::BroadcastAdd4DSlow");
+ tflite::reference_integer_ops::BroadcastAdd4DSlow(
+ op_params, convertShapeToTflshape(shape1), in1, convertShapeToTflshape(shape2),
+ in2, convertShapeToTflshape(shapeOut), out);
+ } else {
+ NNTRACE_COMP_SWITCH("reference_ops::BroadcastAdd4DSlow");
+ tflite::reference_ops::BroadcastAdd4DSlow(op_params, convertShapeToTflshape(shape1),
+ in1, convertShapeToTflshape(shape2), in2,
+ convertShapeToTflshape(shapeOut), out);
+ }
} else {
- NNTRACE_COMP_SWITCH("optimized_ops::Add");
-#define ANDROID_NN_NORMAL_ADD(activation) \
- tflite::optimized_ops::Add<tflite::FusedActivationFunctionType::activation>( \
- left_shift, in1, convertShapeToDims(shape1), input1_offset, input1_multiplier, \
- input1_shift, in2, convertShapeToDims(shape2), input2_offset, input2_multiplier, \
- input2_shift, output_offset, output_multiplier, output_shift, output_activation_min, \
- output_activation_max, out, convertShapeToDims(shapeOut))
-
- ANDROID_NN_MACRO_DISPATCH(ANDROID_NN_NORMAL_ADD)
-#undef ANDROID_NN_NORMAL_ADD
+ if constexpr (isSignedOp) {
+ NNTRACE_COMP_SWITCH("optimized_integer_ops::Add");
+ tflite::optimized_integer_ops::Add(op_params, convertShapeToTflshape(shape1), in1,
+ convertShapeToTflshape(shape2), in2,
+ convertShapeToTflshape(shapeOut), out);
+ } else {
+ NNTRACE_COMP_SWITCH("optimized_ops::Add");
+ tflite::optimized_ops::Add(op_params, convertShapeToTflshape(shape1), in1,
+ convertShapeToTflshape(shape2), in2,
+ convertShapeToTflshape(shapeOut), out);
+ }
}
return true;
@@ -216,8 +237,9 @@ bool mulFloat16(const _Float16* in1, const Shape& shape1, const _Float16* in2, c
return binaryOperationFloat16(in1, shape1, in2, shape2, activation, out, shapeOut, &mulFloat32);
}
-bool mulQuant8(const uint8_t* in1, const Shape& shape1, const uint8_t* in2, const Shape& shape2,
- int32_t activation, uint8_t* out, const Shape& shapeOut) {
+template <typename T>
+bool mulQuant8(const T* in1, const Shape& shape1, const T* in2, const Shape& shape2,
+ int32_t activation, T* out, const Shape& shapeOut) {
NNTRACE_TRANS("mulQuant8");
const int32_t input1_offset = -shape1.offset;
const int32_t input2_offset = -shape2.offset;
@@ -226,20 +248,39 @@ bool mulQuant8(const uint8_t* in1, const Shape& shape1, const uint8_t* in2, cons
const double real_multiplier = input_product_scale / shapeOut.scale;
int32 output_multiplier;
int output_shift;
- if (!QuantizeMultiplierSmallerThanOne(real_multiplier, &output_multiplier, &output_shift)) {
- return false;
- }
+ NN_RET_CHECK(QuantizeMultiplierSmallerThanOneExp(real_multiplier, &output_multiplier,
+ &output_shift));
+
+ constexpr bool isSignedOp = std::is_same<T, int8_t>::value;
int32_t output_activation_min;
int32_t output_activation_max;
- CalculateActivationRangeUint8(activation, shapeOut, &output_activation_min,
- &output_activation_max);
+ if constexpr (isSignedOp) {
+ CalculateActivationRangeInt8(activation, shapeOut, &output_activation_min,
+ &output_activation_max);
+ } else {
+ CalculateActivationRangeUint8(activation, shapeOut, &output_activation_min,
+ &output_activation_max);
+ }
- // Use BROADCAST version to handle the normal case.
- NNTRACE_COMP_SWITCH("optimized_ops::BroadcastMul");
- tflite::optimized_ops::BroadcastMul(in1, convertShapeToDims(shape1), input1_offset, in2,
- convertShapeToDims(shape2), input2_offset, output_offset,
- output_multiplier, output_shift, output_activation_min,
- output_activation_max, out, convertShapeToDims(shapeOut));
+ tflite::ArithmeticParams op_params;
+ op_params.input1_offset = input1_offset;
+ op_params.input2_offset = input2_offset;
+ op_params.output_offset = output_offset;
+ op_params.output_multiplier = output_multiplier;
+ op_params.output_shift = output_shift;
+ tflite::SetActivationParams(output_activation_min, output_activation_max, &op_params);
+
+ if constexpr (isSignedOp) {
+ NNTRACE_COMP_SWITCH("reference_integer_ops::BroadcastMul4DSlow");
+ tflite::reference_integer_ops::BroadcastMul4DSlow(op_params, convertShapeToTflshape(shape1),
+ in1, convertShapeToTflshape(shape2), in2,
+ convertShapeToTflshape(shapeOut), out);
+ } else {
+ NNTRACE_COMP_SWITCH("reference_ops::BroadcastMul4DSlow");
+ tflite::reference_ops::BroadcastMul4DSlow(op_params, convertShapeToTflshape(shape1), in1,
+ convertShapeToTflshape(shape2), in2,
+ convertShapeToTflshape(shapeOut), out);
+ }
return true;
}
@@ -267,8 +308,9 @@ bool subFloat16(const _Float16* in1, const Shape& shape1, const _Float16* in2, c
return binaryOperationFloat16(in1, shape1, in2, shape2, activation, out, shapeOut, &subFloat32);
}
-bool subQuant8(const uint8_t* in1, const Shape& shape1, const uint8_t* in2, const Shape& shape2,
- int32_t activation, uint8_t* out, const Shape& shapeOut) {
+template <typename T>
+bool subQuant8(const T* in1, const Shape& shape1, const T* in2, const Shape& shape2,
+ int32_t activation, T* out, const Shape& shapeOut) {
NNTRACE_TRANS("subQuant8");
const int32_t input1_offset = -shape1.offset;
@@ -283,41 +325,58 @@ bool subQuant8(const uint8_t* in1, const Shape& shape1, const uint8_t* in2, cons
int32_t input1_multiplier;
int32_t input1_shift;
- if (!QuantizeMultiplierSmallerThanOne(real_input1_multiplier, &input1_multiplier,
- &input1_shift)) {
- return false;
- }
+ NN_RET_CHECK(QuantizeMultiplierSmallerThanOneExp(real_input1_multiplier, &input1_multiplier,
+ &input1_shift));
int32_t input2_multiplier;
int32_t input2_shift;
- if (!QuantizeMultiplierSmallerThanOne(real_input2_multiplier, &input2_multiplier,
- &input2_shift)) {
- return false;
- }
+ NN_RET_CHECK(QuantizeMultiplierSmallerThanOneExp(real_input2_multiplier, &input2_multiplier,
+ &input2_shift));
+ // Negate multiplier of the second input, so that we can use Add kernels.
input2_multiplier *= -1;
+
int32_t output_multiplier;
int32_t output_shift;
- if (!QuantizeMultiplierSmallerThanOne(real_output_multiplier, &output_multiplier,
- &output_shift)) {
- return false;
- }
+ NN_RET_CHECK(QuantizeMultiplierSmallerThanOneExp(real_output_multiplier, &output_multiplier,
+ &output_shift));
+
+ constexpr bool isSignedOp = std::is_same<T, int8_t>::value;
int32_t output_activation_min;
int32_t output_activation_max;
- CalculateActivationRangeUint8(activation, shapeOut, &output_activation_min,
- &output_activation_max);
+ if constexpr (isSignedOp) {
+ CalculateActivationRangeInt8(activation, shapeOut, &output_activation_min,
+ &output_activation_max);
+ } else {
+ CalculateActivationRangeUint8(activation, shapeOut, &output_activation_min,
+ &output_activation_max);
+ }
+
+ tflite::ArithmeticParams op_params;
+ op_params.left_shift = left_shift;
+ op_params.input1_offset = input1_offset;
+ op_params.input1_multiplier = input1_multiplier;
+ op_params.input1_shift = input1_shift;
+ op_params.input2_offset = input2_offset;
+ op_params.input2_multiplier = input2_multiplier;
+ op_params.input2_shift = input2_shift;
+ op_params.output_offset = output_offset;
+ op_params.output_multiplier = output_multiplier;
+ op_params.output_shift = output_shift;
+ tflite::SetActivationParams(output_activation_min, output_activation_max, &op_params);
// We are using tflite::optimized_ops::BroadcastAdd unconditionally here
// because tflite::optimized_ops::Add fails to pass some of the
// sub_quantized_different_scales tests.
- NNTRACE_COMP_SWITCH("optimized_ops::BroadcastAdd");
-#define ANDROID_NN_BROADCAST_ADD(activation) \
- tflite::optimized_ops::BroadcastAdd<tflite::FusedActivationFunctionType::activation>( \
- left_shift, in1, convertShapeToDims(shape1), input1_offset, input1_multiplier, \
- input1_shift, in2, convertShapeToDims(shape2), input2_offset, input2_multiplier, \
- input2_shift, output_offset, output_multiplier, output_shift, output_activation_min, \
- output_activation_max, out, convertShapeToDims(shapeOut))
-
- ANDROID_NN_MACRO_DISPATCH(ANDROID_NN_BROADCAST_ADD)
-#undef ANDROID_NN_BROADCAST_ADD
+ if constexpr (isSignedOp) {
+ NNTRACE_COMP_SWITCH("reference_integer_ops::BroadcastAdd4DSlow");
+ tflite::reference_integer_ops::BroadcastAdd4DSlow(op_params, convertShapeToTflshape(shape1),
+ in1, convertShapeToTflshape(shape2), in2,
+ convertShapeToTflshape(shapeOut), out);
+ } else {
+ NNTRACE_COMP_SWITCH("reference_ops::BroadcastAdd4DSlow");
+ tflite::reference_ops::BroadcastAdd4DSlow(op_params, convertShapeToTflshape(shape1), in1,
+ convertShapeToTflshape(shape2), in2,
+ convertShapeToTflshape(shapeOut), out);
+ }
return true;
}
@@ -376,6 +435,8 @@ bool validate(OperationType opType, const IOperationValidationContext* context)
} else {
NN_RET_CHECK(validateHalVersion(context, std::max(HalVersion::V1_0, opIntroducedAt)));
}
+ } else if (inputType == OperandType::TENSOR_QUANT8_ASYMM_SIGNED) {
+ NN_RET_CHECK(validateHalVersion(context, std::max(HalVersion::V1_3, opIntroducedAt)));
} else {
NN_RET_CHECK_FAIL() << "Unsupported tensor type for operation " << getOperationName(opType);
}
@@ -421,6 +482,14 @@ bool executeAdd(IOperationExecutionContext* context) {
context->getInputValue<int32_t>(kActivationScalar),
context->getOutputBuffer<uint8_t>(kOutputTensor),
context->getOutputShape(kOutputTensor));
+ case OperandType::TENSOR_QUANT8_ASYMM_SIGNED:
+ return addQuant8(context->getInputBuffer<int8_t>(kInputTensor1),
+ context->getInputShape(kInputTensor1),
+ context->getInputBuffer<int8_t>(kInputTensor2),
+ context->getInputShape(kInputTensor2),
+ context->getInputValue<int32_t>(kActivationScalar),
+ context->getOutputBuffer<int8_t>(kOutputTensor),
+ context->getOutputShape(kOutputTensor));
default:
NN_RET_CHECK_FAIL() << "Unsupported tensor type for operation ADD";
}
@@ -454,6 +523,14 @@ bool executeMul(IOperationExecutionContext* context) {
context->getInputValue<int32_t>(kActivationScalar),
context->getOutputBuffer<uint8_t>(kOutputTensor),
context->getOutputShape(kOutputTensor));
+ case OperandType::TENSOR_QUANT8_ASYMM_SIGNED:
+ return mulQuant8(context->getInputBuffer<int8_t>(kInputTensor1),
+ context->getInputShape(kInputTensor1),
+ context->getInputBuffer<int8_t>(kInputTensor2),
+ context->getInputShape(kInputTensor2),
+ context->getInputValue<int32_t>(kActivationScalar),
+ context->getOutputBuffer<int8_t>(kOutputTensor),
+ context->getOutputShape(kOutputTensor));
default:
NN_RET_CHECK_FAIL() << "Unsupported tensor type for operation MUL";
}
@@ -487,6 +564,14 @@ bool executeSub(IOperationExecutionContext* context) {
context->getInputValue<int32_t>(kActivationScalar),
context->getOutputBuffer<uint8_t>(kOutputTensor),
context->getOutputShape(kOutputTensor));
+ case OperandType::TENSOR_QUANT8_ASYMM_SIGNED:
+ return subQuant8(context->getInputBuffer<int8_t>(kInputTensor1),
+ context->getInputShape(kInputTensor1),
+ context->getInputBuffer<int8_t>(kInputTensor2),
+ context->getInputShape(kInputTensor2),
+ context->getInputValue<int32_t>(kActivationScalar),
+ context->getOutputBuffer<int8_t>(kOutputTensor),
+ context->getOutputShape(kOutputTensor));
default:
NN_RET_CHECK_FAIL() << "Unsupported tensor type for operation SUB";
}