summaryrefslogtreecommitdiff
path: root/native/annotator/number/number_test-include.cc
diff options
context:
space:
mode:
Diffstat (limited to 'native/annotator/number/number_test-include.cc')
-rw-r--r--native/annotator/number/number_test-include.cc157
1 files changed, 100 insertions, 57 deletions
diff --git a/native/annotator/number/number_test-include.cc b/native/annotator/number/number_test-include.cc
index f47933f..98140f4 100644
--- a/native/annotator/number/number_test-include.cc
+++ b/native/annotator/number/number_test-include.cc
@@ -16,6 +16,7 @@
#include "annotator/number/number_test-include.h"
+#include <set>
#include <string>
#include <vector>
@@ -34,37 +35,57 @@ namespace test_internal {
using ::testing::AllOf;
using ::testing::ElementsAre;
using ::testing::Field;
+using ::testing::IsEmpty;
using ::testing::Matcher;
using ::testing::UnorderedElementsAre;
+namespace {
+const flatbuffers::DetachedBuffer* CreateOptionsData(ModeFlag enabled_modes) {
+ NumberAnnotatorOptionsT options;
+ options.enabled = true;
+ options.priority_score = -10.0;
+ options.float_number_priority_score = 1.0;
+ options.enabled_annotation_usecases =
+ 1 << AnnotationUsecase_ANNOTATION_USECASE_RAW;
+ options.max_number_of_digits = 20;
+ options.enabled_modes = enabled_modes;
+
+ options.percentage_priority_score = 1.0;
+ options.percentage_annotation_usecases =
+ (1 << AnnotationUsecase_ANNOTATION_USECASE_RAW) +
+ (1 << AnnotationUsecase_ANNOTATION_USECASE_SMART);
+ std::set<std::string> percent_suffixes(
+ {"パーセント", "percent", "pércént", "pc", "pct", "%", "٪", "﹪", "%"});
+ for (const std::string& string_value : percent_suffixes) {
+ options.percentage_pieces_string.append(string_value);
+ options.percentage_pieces_string.push_back('\0');
+ }
+
+ flatbuffers::FlatBufferBuilder builder;
+ builder.Finish(NumberAnnotatorOptions::Pack(builder, &options));
+ return new flatbuffers::DetachedBuffer(builder.Release());
+}
+} // namespace
+
const NumberAnnotatorOptions*
-NumberAnnotatorTest::TestingNumberAnnotatorOptions() {
- static const flatbuffers::DetachedBuffer* options_data = []() {
- NumberAnnotatorOptionsT options;
- options.enabled = true;
- options.priority_score = -10.0;
- options.float_number_priority_score = 1.0;
- options.enabled_annotation_usecases =
- 1 << AnnotationUsecase_ANNOTATION_USECASE_RAW;
- options.max_number_of_digits = 20;
-
- options.percentage_priority_score = 1.0;
- options.percentage_annotation_usecases =
- (1 << AnnotationUsecase_ANNOTATION_USECASE_RAW) +
- (1 << AnnotationUsecase_ANNOTATION_USECASE_SMART);
- std::set<std::string> percent_suffixes({"パーセント", "percent", "pércént",
- "pc", "pct", "%", "٪", "﹪", "%"});
- for (const std::string& string_value : percent_suffixes) {
- options.percentage_pieces_string.append(string_value);
- options.percentage_pieces_string.push_back('\0');
- }
-
- flatbuffers::FlatBufferBuilder builder;
- builder.Finish(NumberAnnotatorOptions::Pack(builder, &options));
- return new flatbuffers::DetachedBuffer(builder.Release());
- }();
-
- return flatbuffers::GetRoot<NumberAnnotatorOptions>(options_data->data());
+NumberAnnotatorTest::TestingNumberAnnotatorOptions(ModeFlag enabled_modes) {
+ static const flatbuffers::DetachedBuffer* options_data_selection =
+ CreateOptionsData(ModeFlag_SELECTION);
+ static const flatbuffers::DetachedBuffer* options_data_no_selection =
+ CreateOptionsData(ModeFlag_ANNOTATION_AND_CLASSIFICATION);
+ static const flatbuffers::DetachedBuffer* options_data_all =
+ CreateOptionsData(ModeFlag_ALL);
+
+ if (enabled_modes == ModeFlag_SELECTION) {
+ return flatbuffers::GetRoot<NumberAnnotatorOptions>(
+ options_data_selection->data());
+ } else if (enabled_modes == ModeFlag_ANNOTATION_AND_CLASSIFICATION) {
+ return flatbuffers::GetRoot<NumberAnnotatorOptions>(
+ options_data_no_selection->data());
+ } else {
+ return flatbuffers::GetRoot<NumberAnnotatorOptions>(
+ options_data_all->data());
+ }
}
MATCHER_P(IsCorrectCollection, collection, "collection is " + collection) {
@@ -124,6 +145,14 @@ TEST_F(NumberAnnotatorTest, ClassifiesAndParsesNumberCorrectly) {
EXPECT_FLOAT_EQ(classification_result.numeric_double_value, 12345);
}
+TEST_F(NumberAnnotatorForSelectionTest,
+ ClassifyTextDisabledClassificationReturnsFalse) {
+ ClassificationResult classification_result;
+ EXPECT_FALSE(number_annotator_.ClassifyText(
+ UTF8ToUnicodeText("... 12345 ..."), {4, 9},
+ AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result));
+}
+
TEST_F(NumberAnnotatorTest, ClassifiesAndParsesNumberAsFloatCorrectly) {
ClassificationResult classification_result;
EXPECT_TRUE(number_annotator_.ClassifyText(
@@ -167,7 +196,7 @@ TEST_F(NumberAnnotatorTest, FindsAllIntegerAndFloatNumbersInText) {
EXPECT_TRUE(number_annotator_.FindAll(
UTF8ToUnicodeText("how much is 2 plus 5 divided by 7% minus 3.14 "
"what about 68.9# or 68.9#?"),
- AnnotationUsecase_ANNOTATION_USECASE_RAW, &result));
+ AnnotationUsecase_ANNOTATION_USECASE_RAW, ModeFlag_ANNOTATION, &result));
EXPECT_THAT(result,
UnorderedElementsAre(
@@ -268,7 +297,8 @@ TEST_F(NumberAnnotatorTest, ClassifiesNonAsciiJaPercentageCorrectSuffix) {
std::vector<AnnotatedSpan> result;
EXPECT_TRUE(number_annotator_.FindAll(
UTF8ToUnicodeText("明日の降水確率は10パーセント 音量を12にセット"),
- AnnotationUsecase_ANNOTATION_USECASE_RAW, &result));
+ AnnotationUsecase_ANNOTATION_USECASE_RAW, ModeFlag_CLASSIFICATION,
+ &result));
EXPECT_THAT(result,
UnorderedElementsAre(
IsAnnotatedSpan(CodepointSpan(8, 10), "number",
@@ -285,7 +315,7 @@ TEST_F(NumberAnnotatorTest, FindsAllNumbersInText) {
EXPECT_TRUE(number_annotator_.FindAll(
UTF8ToUnicodeText("... 12345 ... 9 is my number and 27% or 68# #38 #39 "
"but not $99."),
- AnnotationUsecase_ANNOTATION_USECASE_RAW, &result));
+ AnnotationUsecase_ANNOTATION_USECASE_RAW, ModeFlag_ANNOTATION, &result));
EXPECT_THAT(
result,
@@ -307,12 +337,23 @@ TEST_F(NumberAnnotatorTest, FindsAllNumbersInText) {
/*int_value=*/39, /*double_value=*/39.0)));
}
+TEST_F(NumberAnnotatorForAnnotationAndClassificationTest,
+ FindsAllDisabledModeReturnsNoResults) {
+ std::vector<AnnotatedSpan> result;
+ EXPECT_TRUE(number_annotator_.FindAll(
+ UTF8ToUnicodeText("... 12345 ... 9 is my number and 27% or 68# #38 #39 "
+ "but not $99."),
+ AnnotationUsecase_ANNOTATION_USECASE_RAW, ModeFlag_SELECTION, &result));
+
+ EXPECT_THAT(result, IsEmpty());
+}
+
TEST_F(NumberAnnotatorTest, FindsNoNumberInText) {
std::vector<AnnotatedSpan> result;
EXPECT_TRUE(number_annotator_.FindAll(
UTF8ToUnicodeText("... 12345a ... 12345..12345 and 123a45 are not valid. "
"And -#5% is also bad."),
- AnnotationUsecase_ANNOTATION_USECASE_RAW, &result));
+ AnnotationUsecase_ANNOTATION_USECASE_RAW, ModeFlag_SELECTION, &result));
ASSERT_EQ(result.size(), 0);
}
@@ -323,7 +364,8 @@ TEST_F(NumberAnnotatorTest, FindsNumberWithPunctuation) {
EXPECT_TRUE(number_annotator_.FindAll(
UTF8ToUnicodeText(
"It's 12, 13, 14! Or 15??? For sure 16: 17; 18. and -19"),
- AnnotationUsecase_ANNOTATION_USECASE_RAW, &result));
+ AnnotationUsecase_ANNOTATION_USECASE_RAW, ModeFlag_CLASSIFICATION,
+ &result));
EXPECT_THAT(result,
UnorderedElementsAre(
@@ -348,7 +390,7 @@ TEST_F(NumberAnnotatorTest, FindsFloatNumberWithPunctuation) {
EXPECT_TRUE(number_annotator_.FindAll(
UTF8ToUnicodeText("It's 12.123, 13.45, 14.54321! Or 15.1? Maybe 16.33: "
"17.21; but for sure 18.90."),
- AnnotationUsecase_ANNOTATION_USECASE_RAW, &result));
+ AnnotationUsecase_ANNOTATION_USECASE_RAW, ModeFlag_ANNOTATION, &result));
EXPECT_THAT(result,
UnorderedElementsAre(
@@ -379,7 +421,7 @@ TEST_F(NumberAnnotatorTest, HandlesNumbersAtBeginning) {
std::vector<AnnotatedSpan> result;
EXPECT_TRUE(number_annotator_.FindAll(
UTF8ToUnicodeText("-5"), AnnotationUsecase_ANNOTATION_USECASE_RAW,
- &result));
+ ModeFlag_SELECTION, &result));
EXPECT_THAT(result, UnorderedElementsAre(IsAnnotatedSpan(
CodepointSpan(0, 2), "number",
@@ -390,7 +432,7 @@ TEST_F(NumberAnnotatorTest, HandlesNegativeNumbers) {
std::vector<AnnotatedSpan> result;
EXPECT_TRUE(number_annotator_.FindAll(
UTF8ToUnicodeText("Number -5 and -5% and not number --5%"),
- AnnotationUsecase_ANNOTATION_USECASE_RAW, &result));
+ AnnotationUsecase_ANNOTATION_USECASE_RAW, ModeFlag_ANNOTATION, &result));
EXPECT_THAT(result,
UnorderedElementsAre(
@@ -408,7 +450,7 @@ TEST_F(NumberAnnotatorTest, FindGoodPercentageContexts) {
EXPECT_TRUE(number_annotator_.FindAll(
UTF8ToUnicodeText(
"5 percent, 10 pct, 25 pc and 17%, -5 percent, 10% are percentages"),
- AnnotationUsecase_ANNOTATION_USECASE_RAW, &result));
+ AnnotationUsecase_ANNOTATION_USECASE_RAW, ModeFlag_SELECTION, &result));
EXPECT_THAT(result,
UnorderedElementsAre(
@@ -448,7 +490,7 @@ TEST_F(NumberAnnotatorTest, FindSinglePercentageInContext) {
std::vector<AnnotatedSpan> result;
EXPECT_TRUE(number_annotator_.FindAll(
UTF8ToUnicodeText("5%"), AnnotationUsecase_ANNOTATION_USECASE_RAW,
- &result));
+ ModeFlag_ANNOTATION, &result));
EXPECT_THAT(result, UnorderedElementsAre(
IsAnnotatedSpan(CodepointSpan(0, 1), "number",
@@ -463,7 +505,7 @@ TEST_F(NumberAnnotatorTest, IgnoreBadPercentageContexts) {
// A valid number is followed by only one punctuation element.
EXPECT_TRUE(number_annotator_.FindAll(
UTF8ToUnicodeText("10, pct, 25 prc, 5#: percentage are not percentages"),
- AnnotationUsecase_ANNOTATION_USECASE_RAW, &result));
+ AnnotationUsecase_ANNOTATION_USECASE_RAW, ModeFlag_ANNOTATION, &result));
EXPECT_THAT(result,
UnorderedElementsAre(
@@ -478,7 +520,7 @@ TEST_F(NumberAnnotatorTest, IgnoreBadPercentagePunctuationContexts) {
EXPECT_TRUE(number_annotator_.FindAll(
UTF8ToUnicodeText(
"#!24% or :?33 percent are not valid percentages, nor numbers."),
- AnnotationUsecase_ANNOTATION_USECASE_RAW, &result));
+ AnnotationUsecase_ANNOTATION_USECASE_RAW, ModeFlag_ANNOTATION, &result));
EXPECT_TRUE(result.empty());
}
@@ -488,7 +530,7 @@ TEST_F(NumberAnnotatorTest, FindPercentageInNonAsciiContext) {
EXPECT_TRUE(number_annotator_.FindAll(
UTF8ToUnicodeText(
"At the café 10% or 25 percent of people are nice. Only 10%!"),
- AnnotationUsecase_ANNOTATION_USECASE_RAW, &result));
+ AnnotationUsecase_ANNOTATION_USECASE_RAW, ModeFlag_ANNOTATION, &result));
EXPECT_THAT(result,
UnorderedElementsAre(
@@ -748,7 +790,7 @@ TEST_F(NumberAnnotatorTest, WhenSuffixWithoutNumberDoesNotParseIt) {
std::vector<AnnotatedSpan> result;
EXPECT_TRUE(number_annotator_.FindAll(
UTF8ToUnicodeText("... % ..."), AnnotationUsecase_ANNOTATION_USECASE_RAW,
- &result));
+ ModeFlag_ANNOTATION, &result));
ASSERT_EQ(result.size(), 0);
}
@@ -757,7 +799,7 @@ TEST_F(NumberAnnotatorTest, WhenPrefixWithoutNumberDoesNotParseIt) {
std::vector<AnnotatedSpan> result;
EXPECT_TRUE(number_annotator_.FindAll(
UTF8ToUnicodeText("... $ ..."), AnnotationUsecase_ANNOTATION_USECASE_RAW,
- &result));
+ ModeFlag_ANNOTATION, &result));
ASSERT_EQ(result.size(), 0);
}
@@ -766,7 +808,7 @@ TEST_F(NumberAnnotatorTest, WhenPrefixAndSuffixWithoutNumberDoesNotParseIt) {
std::vector<AnnotatedSpan> result;
EXPECT_TRUE(number_annotator_.FindAll(
UTF8ToUnicodeText("... $% ..."), AnnotationUsecase_ANNOTATION_USECASE_RAW,
- &result));
+ ModeFlag_ANNOTATION, &result));
ASSERT_EQ(result.size(), 0);
}
@@ -786,7 +828,7 @@ TEST_F(NumberAnnotatorTest, ForNumberAnnotationsSetsScoreAndPriorityScore) {
std::vector<AnnotatedSpan> result;
EXPECT_TRUE(number_annotator_.FindAll(
UTF8ToUnicodeText("Come at 9 or 10 ok?"),
- AnnotationUsecase_ANNOTATION_USECASE_RAW, &result));
+ AnnotationUsecase_ANNOTATION_USECASE_RAW, ModeFlag_ANNOTATION, &result));
EXPECT_THAT(result,
UnorderedElementsAre(
@@ -811,7 +853,7 @@ TEST_F(NumberAnnotatorTest,
std::vector<AnnotatedSpan> result;
EXPECT_TRUE(number_annotator_.FindAll(
UTF8ToUnicodeText("Results are between 12.5 and 13.5, right?"),
- AnnotationUsecase_ANNOTATION_USECASE_RAW, &result));
+ AnnotationUsecase_ANNOTATION_USECASE_RAW, ModeFlag_ANNOTATION, &result));
EXPECT_THAT(result,
UnorderedElementsAre(
IsAnnotatedSpan(CodepointSpan(20, 24), "number",
@@ -845,7 +887,7 @@ TEST_F(NumberAnnotatorTest, ForPercentageAnnotationsSetsScoreAndPriorityScore) {
std::vector<AnnotatedSpan> result;
EXPECT_TRUE(number_annotator_.FindAll(
UTF8ToUnicodeText("Results are between 9% and 10 percent."),
- AnnotationUsecase_ANNOTATION_USECASE_RAW, &result));
+ AnnotationUsecase_ANNOTATION_USECASE_RAW, ModeFlag_ANNOTATION, &result));
EXPECT_THAT(result,
UnorderedElementsAre(
IsAnnotatedSpan(CodepointSpan(20, 21), "number",
@@ -887,7 +929,8 @@ TEST_F(NumberAnnotatorTest, NumberDisabledPercentageEnabledForSmartUsecase) {
std::vector<AnnotatedSpan> result;
EXPECT_TRUE(number_annotator_.FindAll(
UTF8ToUnicodeText("Accuracy for experiment 3 is 9%."),
- AnnotationUsecase_ANNOTATION_USECASE_SMART, &result));
+ AnnotationUsecase_ANNOTATION_USECASE_SMART, ModeFlag_ANNOTATION,
+ &result));
EXPECT_THAT(result, UnorderedElementsAre(
IsAnnotatedSpan(CodepointSpan(29, 31), "percentage",
/*int_value=*/9, /*double_value=*/9.0,
@@ -898,7 +941,7 @@ TEST_F(NumberAnnotatorTest, MathOperatorsNotAnnotatedAsNumbersFindAll) {
std::vector<AnnotatedSpan> result;
EXPECT_TRUE(number_annotator_.FindAll(
UTF8ToUnicodeText("how much is 2 + 2 or 5 - 96 * 89"),
- AnnotationUsecase_ANNOTATION_USECASE_RAW, &result));
+ AnnotationUsecase_ANNOTATION_USECASE_RAW, ModeFlag_ANNOTATION, &result));
EXPECT_THAT(result,
UnorderedElementsAre(
@@ -928,7 +971,7 @@ TEST_F(NumberAnnotatorTest, SlashSeparatesTwoNumbersFindAll) {
std::vector<AnnotatedSpan> result;
EXPECT_TRUE(number_annotator_.FindAll(
UTF8ToUnicodeText("what's 1 + 2/3 * 4/5 * 6 / 7"),
- AnnotationUsecase_ANNOTATION_USECASE_RAW, &result));
+ AnnotationUsecase_ANNOTATION_USECASE_RAW, ModeFlag_ANNOTATION, &result));
EXPECT_THAT(result,
UnorderedElementsAre(
@@ -972,7 +1015,7 @@ TEST_F(NumberAnnotatorTest, SlashDoesNotSeparatesTwoNumbersFindAll) {
// 2 in the "2/" context is a number because / is punctuation
EXPECT_TRUE(number_annotator_.FindAll(
UTF8ToUnicodeText("what's 2a2/3 or 2/s4 or 2/ or /3 or //3 or 2//"),
- AnnotationUsecase_ANNOTATION_USECASE_RAW, &result));
+ AnnotationUsecase_ANNOTATION_USECASE_RAW, ModeFlag_ANNOTATION, &result));
EXPECT_THAT(result, UnorderedElementsAre(IsAnnotatedSpan(
CodepointSpan(24, 25), "number",
@@ -983,7 +1026,7 @@ TEST_F(NumberAnnotatorTest, BracketsContextAnnotatedFindAll) {
std::vector<AnnotatedSpan> result;
EXPECT_TRUE(number_annotator_.FindAll(
UTF8ToUnicodeText("The interval is: (12, 13) or [-12, -4.5)"),
- AnnotationUsecase_ANNOTATION_USECASE_RAW, &result));
+ AnnotationUsecase_ANNOTATION_USECASE_RAW, ModeFlag_ANNOTATION, &result));
EXPECT_THAT(result,
UnorderedElementsAre(
@@ -1002,7 +1045,7 @@ TEST_F(NumberAnnotatorTest, BracketsContextNotAnnotatedFindAll) {
std::vector<AnnotatedSpan> result;
EXPECT_TRUE(number_annotator_.FindAll(
UTF8ToUnicodeText("The interval is: -(12, 138*)"),
- AnnotationUsecase_ANNOTATION_USECASE_RAW, &result));
+ AnnotationUsecase_ANNOTATION_USECASE_RAW, ModeFlag_ANNOTATION, &result));
EXPECT_TRUE(result.empty());
}
@@ -1012,7 +1055,7 @@ TEST_F(NumberAnnotatorTest, FractionalNumberDotsFindAll) {
// Dots source: https://unicode-search.net/unicode-namesearch.pl?term=period
EXPECT_TRUE(number_annotator_.FindAll(
UTF8ToUnicodeText("3.1 3﹒2 3.3"),
- AnnotationUsecase_ANNOTATION_USECASE_RAW, &result));
+ AnnotationUsecase_ANNOTATION_USECASE_RAW, ModeFlag_ANNOTATION, &result));
EXPECT_THAT(result, UnorderedElementsAre(
IsAnnotatedSpan(CodepointSpan(0, 3), "number",
@@ -1032,7 +1075,7 @@ TEST_F(NumberAnnotatorTest, NonAsciiDigitsFindAll) {
// Digits source: https://unicode-search.net/unicode-namesearch.pl?term=digit
EXPECT_TRUE(number_annotator_.FindAll(
UTF8ToUnicodeText("3 3﹒2 3.3%"),
- AnnotationUsecase_ANNOTATION_USECASE_RAW, &result));
+ AnnotationUsecase_ANNOTATION_USECASE_RAW, ModeFlag_ANNOTATION, &result));
EXPECT_THAT(result, UnorderedElementsAre(
IsAnnotatedSpan(CodepointSpan(0, 1), "number",
@@ -1052,7 +1095,7 @@ TEST_F(NumberAnnotatorTest, AnnotatedZeroPrecededNumbersFindAll) {
std::vector<AnnotatedSpan> result;
EXPECT_TRUE(number_annotator_.FindAll(
UTF8ToUnicodeText("Numbers: 0.9 or 09 or 09.9 or 032310"),
- AnnotationUsecase_ANNOTATION_USECASE_RAW, &result));
+ AnnotationUsecase_ANNOTATION_USECASE_RAW, ModeFlag_ANNOTATION, &result));
EXPECT_THAT(result, UnorderedElementsAre(
IsAnnotatedSpan(CodepointSpan(9, 12), "number",
@@ -1072,7 +1115,7 @@ TEST_F(NumberAnnotatorTest, ZeroAfterDotFindAll) {
std::vector<AnnotatedSpan> result;
EXPECT_TRUE(number_annotator_.FindAll(
UTF8ToUnicodeText("15.0 16.00"), AnnotationUsecase_ANNOTATION_USECASE_RAW,
- &result));
+ ModeFlag_ANNOTATION, &result));
EXPECT_THAT(result,
UnorderedElementsAre(
@@ -1086,7 +1129,7 @@ TEST_F(NumberAnnotatorTest, NineDotNineFindAll) {
std::vector<AnnotatedSpan> result;
EXPECT_TRUE(number_annotator_.FindAll(
UTF8ToUnicodeText("9.9 9.99 99.99 99.999 99.9999"),
- AnnotationUsecase_ANNOTATION_USECASE_RAW, &result));
+ AnnotationUsecase_ANNOTATION_USECASE_RAW, ModeFlag_ANNOTATION, &result));
EXPECT_THAT(result,
UnorderedElementsAre(