aboutsummaryrefslogtreecommitdiff
path: root/cpp/src/util/string_compare.cc
diff options
context:
space:
mode:
authorTorne (Richard Coles) <torne@google.com>2014-06-25 10:31:36 +0100
committerTorne (Richard Coles) <torne@google.com>2014-06-25 10:31:36 +0100
commit6ce3a9ad00160cd58574b6bca6d2220c4dbfc83e (patch)
treebd4c283a39b6659d25c49870a045ff73b049eebc /cpp/src/util/string_compare.cc
parentb8347ad8ead685b8afe0ff329ae047f17c7b817c (diff)
parentf7ddeee545f03c948074c921c4648807d90227ae (diff)
downloadsrc-6ce3a9ad00160cd58574b6bca6d2220c4dbfc83e.tar.gz
This commit was generated by merge_to_master.py. Change-Id: I283eef90c15d40ea8cd6290f12094fc152a2c45e
Diffstat (limited to 'cpp/src/util/string_compare.cc')
-rw-r--r--cpp/src/util/string_compare.cc52
1 files changed, 49 insertions, 3 deletions
diff --git a/cpp/src/util/string_compare.cc b/cpp/src/util/string_compare.cc
index c63b138..31a7534 100644
--- a/cpp/src/util/string_compare.cc
+++ b/cpp/src/util/string_compare.cc
@@ -12,20 +12,54 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-#include "re2ptr.h" // Must be the first #include statement!
-
#include "string_compare.h"
#include <libaddressinput/util/basictypes.h>
+#include <cassert>
#include <string>
+#include <re2/re2.h>
+
+#include "lru_cache_using_std.h"
+
+// RE2 uses type string, which is not necessarily the same as type std::string.
+// In order to create objects of the correct type, to be able to pass pointers
+// to these objects to RE2, the function that does that is defined inside an
+// unnamed namespace inside the re2 namespace. Oh, my ...
+namespace re2 {
+namespace {
+
+// In order to (mis-)use RE2 to implement UTF-8 capable less<>, this function
+// calls RE2::PossibleMatchRange() to calculate the "lessest" string that would
+// be a case-insensitive match to the string. This is far too expensive to do
+// repeatedly, so the function is only ever called through an LRU cache.
+std::string ComputeMinPossibleMatch(const std::string& str) {
+ string min, max; // N.B.: RE2 type string!
+
+ RE2::Options options;
+ options.set_literal(true);
+ options.set_case_sensitive(false);
+ RE2 matcher(str, options);
+
+ bool success = matcher.PossibleMatchRange(&min, &max, str.size());
+ assert(success);
+ (void)success; // Prevent unused variable if assert() is optimized away.
+
+ return min;
+}
+
+} // namespace
+} // namespace re2
+
namespace i18n {
namespace addressinput {
class StringCompare::Impl {
+ enum { MAX_CACHE_SIZE = 1 << 15 };
+
public:
- Impl() {
+ Impl() : min_possible_match_(&re2::ComputeMinPossibleMatch, MAX_CACHE_SIZE) {
options_.set_literal(true);
options_.set_case_sensitive(false);
}
@@ -37,8 +71,15 @@ class StringCompare::Impl {
return RE2::FullMatch(a, matcher);
}
+ bool NaturalLess(const std::string& a, const std::string& b) const {
+ const std::string& min_a(min_possible_match_(a));
+ const std::string& min_b(min_possible_match_(b));
+ return min_a < min_b;
+ }
+
private:
RE2::Options options_;
+ mutable lru_cache_using_std<std::string, std::string> min_possible_match_;
DISALLOW_COPY_AND_ASSIGN(Impl);
};
@@ -52,5 +93,10 @@ bool StringCompare::NaturalEquals(const std::string& a,
return impl_->NaturalEquals(a, b);
}
+bool StringCompare::NaturalLess(const std::string& a,
+ const std::string& b) const {
+ return impl_->NaturalLess(a, b);
+}
+
} // namespace addressinput
} // namespace i18n