diff options
author | Lev Proleev <levp@google.com> | 2019-10-23 17:24:57 +0100 |
---|---|---|
committer | Lev Proleev <levp@google.com> | 2019-11-27 14:47:27 +0000 |
commit | 799369fdedf36424e088d557bf8ed66e7d9a93e0 (patch) | |
tree | e7c57b065d86cd3f953187d3d7a53734c8803302 /nn/common/operations/Broadcast.cpp | |
parent | 8b64d1d84943fd762b3d653f88310d1c59474264 (diff) | |
download | ml-799369fdedf36424e088d557bf8ed66e7d9a93e0.tar.gz |
Add TENSOR_QUANT8_ASYMM_SIGNED support to ADD, MUL and SUB.
* Update legacy TF Lite functions calls to the current ones.
* Add QuantizeMultiplierSmallerThanOneExp function (same as TF Lite
function, does the same thing as QuantizeMultiplierSmallerThanOne but
returns left shift instead of right shift so that all the
QuantizeMultiplier* functions have the same interface.
Bug: 143934463
Bug: 143934627
Bug: 143935040
Test: NNTest_static and 1.3 VTS QuantizationCouplingTest
Change-Id: I897c07bc5d93dfa7c03a0c66eeb77fcbdec6aea0
Merged-In: I897c07bc5d93dfa7c03a0c66eeb77fcbdec6aea0
(cherry picked from commit c7b32bf9555a556cd1b9e5ffe9372a7f6d3fadb8)
Diffstat (limited to 'nn/common/operations/Broadcast.cpp')
-rw-r--r-- | nn/common/operations/Broadcast.cpp | 249 |
1 files changed, 167 insertions, 82 deletions
diff --git a/nn/common/operations/Broadcast.cpp b/nn/common/operations/Broadcast.cpp index 132325439..e973c119f 100644 --- a/nn/common/operations/Broadcast.cpp +++ b/nn/common/operations/Broadcast.cpp @@ -18,17 +18,21 @@ #define LOG_TAG "Operations" +#include <tensorflow/lite/kernels/internal/optimized/integer_ops/add.h> +#include <tensorflow/lite/kernels/internal/optimized/integer_ops/mul.h> +#include <tensorflow/lite/kernels/internal/optimized/legacy_optimized_ops.h> +#include <tensorflow/lite/kernels/internal/reference/integer_ops/add.h> +#include <tensorflow/lite/kernels/internal/reference/integer_ops/mul.h> +#include <tensorflow/lite/kernels/internal/types.h> + +#include <algorithm> +#include <vector> + #include "CpuOperationUtils.h" #include "HalInterfaces.h" #include "OperationResolver.h" - -#include <tensorflow/lite/kernels/internal/optimized/legacy_optimized_ops.h> -#include <tensorflow/lite/kernels/internal/reference/legacy_reference_ops.h> - #include "Tracing.h" -#include <algorithm> - namespace android { namespace nn { @@ -118,10 +122,11 @@ bool addFloat16(const _Float16* in1, const Shape& shape1, const _Float16* in2, c return binaryOperationFloat16(in1, shape1, in2, shape2, activation, out, shapeOut, &addFloat32); } -bool addQuant8(const uint8_t* in1, const Shape& shape1, const uint8_t* in2, const Shape& shape2, - int32_t activation, uint8_t* out, const Shape& shapeOut) { +template <typename T> +bool addQuant8(const T* in1, const Shape& shape1, const T* in2, const Shape& shape2, + int32_t activation, T* out, const Shape& shapeOut) { NNTRACE_TRANS("addQuant8"); - bool needBroadcast = !SameShape(shape1, shape2); + const bool needBroadcast = !SameShape(shape1, shape2); const int32_t input1_offset = -shape1.offset; const int32_t input2_offset = -shape2.offset; @@ -135,49 +140,65 @@ bool addQuant8(const uint8_t* in1, const Shape& shape1, const uint8_t* in2, cons int32_t input1_multiplier; int32_t input1_shift; - if (!QuantizeMultiplierSmallerThanOne(real_input1_multiplier, &input1_multiplier, - &input1_shift)) { - return false; - } + NN_RET_CHECK(QuantizeMultiplierSmallerThanOneExp(real_input1_multiplier, &input1_multiplier, + &input1_shift)); int32_t input2_multiplier; int32_t input2_shift; - if (!QuantizeMultiplierSmallerThanOne(real_input2_multiplier, &input2_multiplier, - &input2_shift)) { - return false; - } + NN_RET_CHECK(QuantizeMultiplierSmallerThanOneExp(real_input2_multiplier, &input2_multiplier, + &input2_shift)); int32_t output_multiplier; int32_t output_shift; - if (!QuantizeMultiplierSmallerThanOne(real_output_multiplier, &output_multiplier, - &output_shift)) { - return false; - } + NN_RET_CHECK(QuantizeMultiplierSmallerThanOneExp(real_output_multiplier, &output_multiplier, + &output_shift)); + int32_t output_activation_min; int32_t output_activation_max; - CalculateActivationRangeUint8(activation, shapeOut, &output_activation_min, - &output_activation_max); + constexpr bool isSignedOp = std::is_same<T, int8_t>::value; + if constexpr (isSignedOp) { + CalculateActivationRangeInt8(activation, shapeOut, &output_activation_min, + &output_activation_max); + } else { + CalculateActivationRangeUint8(activation, shapeOut, &output_activation_min, + &output_activation_max); + } - if (needBroadcast) { - NNTRACE_COMP_SWITCH("optimized_ops::BroadcastAdd"); -#define ANDROID_NN_BROADCAST_ADD(activation) \ - tflite::optimized_ops::BroadcastAdd<tflite::FusedActivationFunctionType::activation>( \ - left_shift, in1, convertShapeToDims(shape1), input1_offset, input1_multiplier, \ - input1_shift, in2, convertShapeToDims(shape2), input2_offset, input2_multiplier, \ - input2_shift, output_offset, output_multiplier, output_shift, output_activation_min, \ - output_activation_max, out, convertShapeToDims(shapeOut)) + tflite::ArithmeticParams op_params; + op_params.left_shift = left_shift; + op_params.input1_offset = input1_offset; + op_params.input1_multiplier = input1_multiplier; + op_params.input1_shift = input1_shift; + op_params.input2_offset = input2_offset; + op_params.input2_multiplier = input2_multiplier; + op_params.input2_shift = input2_shift; + op_params.output_offset = output_offset; + op_params.output_multiplier = output_multiplier; + op_params.output_shift = output_shift; + tflite::SetActivationParams(output_activation_min, output_activation_max, &op_params); - ANDROID_NN_MACRO_DISPATCH(ANDROID_NN_BROADCAST_ADD) -#undef ANDROID_NN_BROADCAST_ADD + if (needBroadcast) { + if constexpr (isSignedOp) { + NNTRACE_COMP_SWITCH("reference_integer_ops::BroadcastAdd4DSlow"); + tflite::reference_integer_ops::BroadcastAdd4DSlow( + op_params, convertShapeToTflshape(shape1), in1, convertShapeToTflshape(shape2), + in2, convertShapeToTflshape(shapeOut), out); + } else { + NNTRACE_COMP_SWITCH("reference_ops::BroadcastAdd4DSlow"); + tflite::reference_ops::BroadcastAdd4DSlow(op_params, convertShapeToTflshape(shape1), + in1, convertShapeToTflshape(shape2), in2, + convertShapeToTflshape(shapeOut), out); + } } else { - NNTRACE_COMP_SWITCH("optimized_ops::Add"); -#define ANDROID_NN_NORMAL_ADD(activation) \ - tflite::optimized_ops::Add<tflite::FusedActivationFunctionType::activation>( \ - left_shift, in1, convertShapeToDims(shape1), input1_offset, input1_multiplier, \ - input1_shift, in2, convertShapeToDims(shape2), input2_offset, input2_multiplier, \ - input2_shift, output_offset, output_multiplier, output_shift, output_activation_min, \ - output_activation_max, out, convertShapeToDims(shapeOut)) - - ANDROID_NN_MACRO_DISPATCH(ANDROID_NN_NORMAL_ADD) -#undef ANDROID_NN_NORMAL_ADD + if constexpr (isSignedOp) { + NNTRACE_COMP_SWITCH("optimized_integer_ops::Add"); + tflite::optimized_integer_ops::Add(op_params, convertShapeToTflshape(shape1), in1, + convertShapeToTflshape(shape2), in2, + convertShapeToTflshape(shapeOut), out); + } else { + NNTRACE_COMP_SWITCH("optimized_ops::Add"); + tflite::optimized_ops::Add(op_params, convertShapeToTflshape(shape1), in1, + convertShapeToTflshape(shape2), in2, + convertShapeToTflshape(shapeOut), out); + } } return true; @@ -216,8 +237,9 @@ bool mulFloat16(const _Float16* in1, const Shape& shape1, const _Float16* in2, c return binaryOperationFloat16(in1, shape1, in2, shape2, activation, out, shapeOut, &mulFloat32); } -bool mulQuant8(const uint8_t* in1, const Shape& shape1, const uint8_t* in2, const Shape& shape2, - int32_t activation, uint8_t* out, const Shape& shapeOut) { +template <typename T> +bool mulQuant8(const T* in1, const Shape& shape1, const T* in2, const Shape& shape2, + int32_t activation, T* out, const Shape& shapeOut) { NNTRACE_TRANS("mulQuant8"); const int32_t input1_offset = -shape1.offset; const int32_t input2_offset = -shape2.offset; @@ -226,20 +248,39 @@ bool mulQuant8(const uint8_t* in1, const Shape& shape1, const uint8_t* in2, cons const double real_multiplier = input_product_scale / shapeOut.scale; int32 output_multiplier; int output_shift; - if (!QuantizeMultiplierSmallerThanOne(real_multiplier, &output_multiplier, &output_shift)) { - return false; - } + NN_RET_CHECK(QuantizeMultiplierSmallerThanOneExp(real_multiplier, &output_multiplier, + &output_shift)); + + constexpr bool isSignedOp = std::is_same<T, int8_t>::value; int32_t output_activation_min; int32_t output_activation_max; - CalculateActivationRangeUint8(activation, shapeOut, &output_activation_min, - &output_activation_max); + if constexpr (isSignedOp) { + CalculateActivationRangeInt8(activation, shapeOut, &output_activation_min, + &output_activation_max); + } else { + CalculateActivationRangeUint8(activation, shapeOut, &output_activation_min, + &output_activation_max); + } - // Use BROADCAST version to handle the normal case. - NNTRACE_COMP_SWITCH("optimized_ops::BroadcastMul"); - tflite::optimized_ops::BroadcastMul(in1, convertShapeToDims(shape1), input1_offset, in2, - convertShapeToDims(shape2), input2_offset, output_offset, - output_multiplier, output_shift, output_activation_min, - output_activation_max, out, convertShapeToDims(shapeOut)); + tflite::ArithmeticParams op_params; + op_params.input1_offset = input1_offset; + op_params.input2_offset = input2_offset; + op_params.output_offset = output_offset; + op_params.output_multiplier = output_multiplier; + op_params.output_shift = output_shift; + tflite::SetActivationParams(output_activation_min, output_activation_max, &op_params); + + if constexpr (isSignedOp) { + NNTRACE_COMP_SWITCH("reference_integer_ops::BroadcastMul4DSlow"); + tflite::reference_integer_ops::BroadcastMul4DSlow(op_params, convertShapeToTflshape(shape1), + in1, convertShapeToTflshape(shape2), in2, + convertShapeToTflshape(shapeOut), out); + } else { + NNTRACE_COMP_SWITCH("reference_ops::BroadcastMul4DSlow"); + tflite::reference_ops::BroadcastMul4DSlow(op_params, convertShapeToTflshape(shape1), in1, + convertShapeToTflshape(shape2), in2, + convertShapeToTflshape(shapeOut), out); + } return true; } @@ -267,8 +308,9 @@ bool subFloat16(const _Float16* in1, const Shape& shape1, const _Float16* in2, c return binaryOperationFloat16(in1, shape1, in2, shape2, activation, out, shapeOut, &subFloat32); } -bool subQuant8(const uint8_t* in1, const Shape& shape1, const uint8_t* in2, const Shape& shape2, - int32_t activation, uint8_t* out, const Shape& shapeOut) { +template <typename T> +bool subQuant8(const T* in1, const Shape& shape1, const T* in2, const Shape& shape2, + int32_t activation, T* out, const Shape& shapeOut) { NNTRACE_TRANS("subQuant8"); const int32_t input1_offset = -shape1.offset; @@ -283,41 +325,58 @@ bool subQuant8(const uint8_t* in1, const Shape& shape1, const uint8_t* in2, cons int32_t input1_multiplier; int32_t input1_shift; - if (!QuantizeMultiplierSmallerThanOne(real_input1_multiplier, &input1_multiplier, - &input1_shift)) { - return false; - } + NN_RET_CHECK(QuantizeMultiplierSmallerThanOneExp(real_input1_multiplier, &input1_multiplier, + &input1_shift)); int32_t input2_multiplier; int32_t input2_shift; - if (!QuantizeMultiplierSmallerThanOne(real_input2_multiplier, &input2_multiplier, - &input2_shift)) { - return false; - } + NN_RET_CHECK(QuantizeMultiplierSmallerThanOneExp(real_input2_multiplier, &input2_multiplier, + &input2_shift)); + // Negate multiplier of the second input, so that we can use Add kernels. input2_multiplier *= -1; + int32_t output_multiplier; int32_t output_shift; - if (!QuantizeMultiplierSmallerThanOne(real_output_multiplier, &output_multiplier, - &output_shift)) { - return false; - } + NN_RET_CHECK(QuantizeMultiplierSmallerThanOneExp(real_output_multiplier, &output_multiplier, + &output_shift)); + + constexpr bool isSignedOp = std::is_same<T, int8_t>::value; int32_t output_activation_min; int32_t output_activation_max; - CalculateActivationRangeUint8(activation, shapeOut, &output_activation_min, - &output_activation_max); + if constexpr (isSignedOp) { + CalculateActivationRangeInt8(activation, shapeOut, &output_activation_min, + &output_activation_max); + } else { + CalculateActivationRangeUint8(activation, shapeOut, &output_activation_min, + &output_activation_max); + } + + tflite::ArithmeticParams op_params; + op_params.left_shift = left_shift; + op_params.input1_offset = input1_offset; + op_params.input1_multiplier = input1_multiplier; + op_params.input1_shift = input1_shift; + op_params.input2_offset = input2_offset; + op_params.input2_multiplier = input2_multiplier; + op_params.input2_shift = input2_shift; + op_params.output_offset = output_offset; + op_params.output_multiplier = output_multiplier; + op_params.output_shift = output_shift; + tflite::SetActivationParams(output_activation_min, output_activation_max, &op_params); // We are using tflite::optimized_ops::BroadcastAdd unconditionally here // because tflite::optimized_ops::Add fails to pass some of the // sub_quantized_different_scales tests. - NNTRACE_COMP_SWITCH("optimized_ops::BroadcastAdd"); -#define ANDROID_NN_BROADCAST_ADD(activation) \ - tflite::optimized_ops::BroadcastAdd<tflite::FusedActivationFunctionType::activation>( \ - left_shift, in1, convertShapeToDims(shape1), input1_offset, input1_multiplier, \ - input1_shift, in2, convertShapeToDims(shape2), input2_offset, input2_multiplier, \ - input2_shift, output_offset, output_multiplier, output_shift, output_activation_min, \ - output_activation_max, out, convertShapeToDims(shapeOut)) - - ANDROID_NN_MACRO_DISPATCH(ANDROID_NN_BROADCAST_ADD) -#undef ANDROID_NN_BROADCAST_ADD + if constexpr (isSignedOp) { + NNTRACE_COMP_SWITCH("reference_integer_ops::BroadcastAdd4DSlow"); + tflite::reference_integer_ops::BroadcastAdd4DSlow(op_params, convertShapeToTflshape(shape1), + in1, convertShapeToTflshape(shape2), in2, + convertShapeToTflshape(shapeOut), out); + } else { + NNTRACE_COMP_SWITCH("reference_ops::BroadcastAdd4DSlow"); + tflite::reference_ops::BroadcastAdd4DSlow(op_params, convertShapeToTflshape(shape1), in1, + convertShapeToTflshape(shape2), in2, + convertShapeToTflshape(shapeOut), out); + } return true; } @@ -376,6 +435,8 @@ bool validate(OperationType opType, const IOperationValidationContext* context) } else { NN_RET_CHECK(validateHalVersion(context, std::max(HalVersion::V1_0, opIntroducedAt))); } + } else if (inputType == OperandType::TENSOR_QUANT8_ASYMM_SIGNED) { + NN_RET_CHECK(validateHalVersion(context, std::max(HalVersion::V1_3, opIntroducedAt))); } else { NN_RET_CHECK_FAIL() << "Unsupported tensor type for operation " << getOperationName(opType); } @@ -421,6 +482,14 @@ bool executeAdd(IOperationExecutionContext* context) { context->getInputValue<int32_t>(kActivationScalar), context->getOutputBuffer<uint8_t>(kOutputTensor), context->getOutputShape(kOutputTensor)); + case OperandType::TENSOR_QUANT8_ASYMM_SIGNED: + return addQuant8(context->getInputBuffer<int8_t>(kInputTensor1), + context->getInputShape(kInputTensor1), + context->getInputBuffer<int8_t>(kInputTensor2), + context->getInputShape(kInputTensor2), + context->getInputValue<int32_t>(kActivationScalar), + context->getOutputBuffer<int8_t>(kOutputTensor), + context->getOutputShape(kOutputTensor)); default: NN_RET_CHECK_FAIL() << "Unsupported tensor type for operation ADD"; } @@ -454,6 +523,14 @@ bool executeMul(IOperationExecutionContext* context) { context->getInputValue<int32_t>(kActivationScalar), context->getOutputBuffer<uint8_t>(kOutputTensor), context->getOutputShape(kOutputTensor)); + case OperandType::TENSOR_QUANT8_ASYMM_SIGNED: + return mulQuant8(context->getInputBuffer<int8_t>(kInputTensor1), + context->getInputShape(kInputTensor1), + context->getInputBuffer<int8_t>(kInputTensor2), + context->getInputShape(kInputTensor2), + context->getInputValue<int32_t>(kActivationScalar), + context->getOutputBuffer<int8_t>(kOutputTensor), + context->getOutputShape(kOutputTensor)); default: NN_RET_CHECK_FAIL() << "Unsupported tensor type for operation MUL"; } @@ -487,6 +564,14 @@ bool executeSub(IOperationExecutionContext* context) { context->getInputValue<int32_t>(kActivationScalar), context->getOutputBuffer<uint8_t>(kOutputTensor), context->getOutputShape(kOutputTensor)); + case OperandType::TENSOR_QUANT8_ASYMM_SIGNED: + return subQuant8(context->getInputBuffer<int8_t>(kInputTensor1), + context->getInputShape(kInputTensor1), + context->getInputBuffer<int8_t>(kInputTensor2), + context->getInputShape(kInputTensor2), + context->getInputValue<int32_t>(kActivationScalar), + context->getOutputBuffer<int8_t>(kOutputTensor), + context->getOutputShape(kOutputTensor)); default: NN_RET_CHECK_FAIL() << "Unsupported tensor type for operation SUB"; } |