diff options
Diffstat (limited to 'third_party/abseil-cpp/absl/strings')
113 files changed, 20958 insertions, 2258 deletions
diff --git a/third_party/abseil-cpp/absl/strings/BUILD.bazel b/third_party/abseil-cpp/absl/strings/BUILD.bazel index b950ec769f..090fc58a2c 100644 --- a/third_party/abseil-cpp/absl/strings/BUILD.bazel +++ b/third_party/abseil-cpp/absl/strings/BUILD.bazel @@ -13,7 +13,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -load("@rules_cc//cc:defs.bzl", "cc_library", "cc_test") load( "//absl:copts/configure_copts.bzl", "ABSL_DEFAULT_COPTS", @@ -54,6 +53,7 @@ cc_library( "ascii.h", "charconv.h", "escaping.h", + "internal/string_constant.h", "match.h", "numbers.h", "str_cat.h", @@ -68,7 +68,6 @@ cc_library( deps = [ ":internal", "//absl/base", - "//absl/base:bits", "//absl/base:config", "//absl/base:core_headers", "//absl/base:endian", @@ -76,6 +75,7 @@ cc_library( "//absl/base:throw_delegate", "//absl/memory", "//absl/meta:type_traits", + "//absl/numeric:bits", "//absl/numeric:int128", ], ) @@ -223,6 +223,19 @@ cc_test( ) cc_test( + name = "string_constant_test", + size = "small", + srcs = ["internal/string_constant_test.cc"], + copts = ABSL_TEST_COPTS, + visibility = ["//visibility:private"], + deps = [ + ":strings", + "//absl/meta:type_traits", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( name = "string_view_benchmark", srcs = ["string_view_benchmark.cc"], copts = ABSL_TEST_COPTS, @@ -253,12 +266,125 @@ cc_test( cc_library( name = "cord_internal", - hdrs = ["internal/cord_internal.h"], + srcs = [ + "internal/cord_internal.cc", + "internal/cord_rep_btree.cc", + "internal/cord_rep_btree_navigator.cc", + "internal/cord_rep_btree_reader.cc", + "internal/cord_rep_consume.cc", + "internal/cord_rep_ring.cc", + ], + hdrs = [ + "internal/cord_internal.h", + "internal/cord_rep_btree.h", + "internal/cord_rep_btree_navigator.h", + "internal/cord_rep_btree_reader.h", + "internal/cord_rep_consume.h", + "internal/cord_rep_flat.h", + "internal/cord_rep_ring.h", + "internal/cord_rep_ring_reader.h", + ], copts = ABSL_DEFAULT_COPTS, - visibility = ["//visibility:private"], + visibility = [ + "//visibility:private", + ], deps = [ ":strings", + "//absl/base:base_internal", + "//absl/base:config", + "//absl/base:core_headers", + "//absl/base:endian", + "//absl/base:raw_logging_internal", + "//absl/base:throw_delegate", + "//absl/container:compressed_tuple", + "//absl/container:inlined_vector", + "//absl/container:layout", + "//absl/functional:function_ref", "//absl/meta:type_traits", + "//absl/types:span", + ], +) + +cc_test( + name = "cord_internal_test", + srcs = ["internal/cord_internal_test.cc"], + copts = ABSL_TEST_COPTS, + visibility = ["//visibility:private"], + deps = [ + ":cord_internal", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "cord_rep_btree_test", + size = "medium", + srcs = ["internal/cord_rep_btree_test.cc"], + copts = ABSL_TEST_COPTS, + visibility = ["//visibility:private"], + deps = [ + ":cord_internal", + ":cord_rep_test_util", + ":strings", + "//absl/base:config", + "//absl/base:raw_logging_internal", + "//absl/cleanup", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "cord_rep_btree_navigator_test", + size = "medium", + srcs = ["internal/cord_rep_btree_navigator_test.cc"], + copts = ABSL_TEST_COPTS, + visibility = ["//visibility:private"], + deps = [ + ":cord_internal", + ":cord_rep_test_util", + ":strings", + "//absl/base:config", + "//absl/base:raw_logging_internal", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "cord_rep_btree_reader_test", + size = "medium", + srcs = ["internal/cord_rep_btree_reader_test.cc"], + copts = ABSL_TEST_COPTS, + visibility = ["//visibility:private"], + deps = [ + ":cord", + ":cord_internal", + ":cord_rep_test_util", + ":strings", + "//absl/base:config", + "//absl/base:raw_logging_internal", + "@com_google_googletest//:gtest_main", + ], +) + +cc_library( + name = "cordz_update_tracker", + hdrs = ["internal/cordz_update_tracker.h"], + copts = ABSL_DEFAULT_COPTS, + visibility = [ + "//absl:__subpackages__", + ], + deps = ["//absl/base:config"], +) + +cc_test( + name = "cordz_update_tracker_test", + srcs = ["internal/cordz_update_tracker_test.cc"], + deps = [ + ":cordz_update_tracker", + "//absl/base:config", + "//absl/base:core_headers", + "//absl/synchronization", + "@com_google_googletest//:gtest_main", ], ) @@ -273,11 +399,16 @@ cc_library( copts = ABSL_DEFAULT_COPTS, deps = [ ":cord_internal", + ":cordz_functions", + ":cordz_info", + ":cordz_statistics", + ":cordz_update_scope", + ":cordz_update_tracker", ":internal", ":str_format", ":strings", "//absl/base", - "//absl/base:base_internal", + "//absl/base:config", "//absl/base:core_headers", "//absl/base:endian", "//absl/base:raw_logging_internal", @@ -285,6 +416,216 @@ cc_library( "//absl/container:inlined_vector", "//absl/functional:function_ref", "//absl/meta:type_traits", + "//absl/types:optional", + ], +) + +cc_library( + name = "cordz_handle", + srcs = ["internal/cordz_handle.cc"], + hdrs = ["internal/cordz_handle.h"], + copts = ABSL_DEFAULT_COPTS, + visibility = [ + "//absl:__subpackages__", + ], + deps = [ + "//absl/base", + "//absl/base:config", + "//absl/base:raw_logging_internal", + "//absl/synchronization", + ], +) + +cc_library( + name = "cordz_info", + srcs = ["internal/cordz_info.cc"], + hdrs = ["internal/cordz_info.h"], + copts = ABSL_DEFAULT_COPTS, + visibility = [ + "//absl:__subpackages__", + ], + deps = [ + ":cord_internal", + ":cordz_functions", + ":cordz_handle", + ":cordz_statistics", + ":cordz_update_tracker", + "//absl/base", + "//absl/base:config", + "//absl/base:core_headers", + "//absl/base:raw_logging_internal", + "//absl/container:inlined_vector", + "//absl/debugging:stacktrace", + "//absl/synchronization", + "//absl/types:span", + ], +) + +cc_library( + name = "cordz_update_scope", + hdrs = ["internal/cordz_update_scope.h"], + copts = ABSL_DEFAULT_COPTS, + visibility = [ + "//absl:__subpackages__", + ], + deps = [ + ":cord_internal", + ":cordz_info", + ":cordz_update_tracker", + "//absl/base:config", + "//absl/base:core_headers", + ], +) + +cc_test( + name = "cordz_update_scope_test", + srcs = ["internal/cordz_update_scope_test.cc"], + copts = ABSL_DEFAULT_COPTS, + deps = [ + ":cord_internal", + ":cordz_info", + ":cordz_test_helpers", + ":cordz_update_scope", + ":cordz_update_tracker", + "//absl/base:config", + "@com_google_googletest//:gtest_main", + ], +) + +cc_library( + name = "cordz_sample_token", + srcs = ["internal/cordz_sample_token.cc"], + hdrs = ["internal/cordz_sample_token.h"], + copts = ABSL_DEFAULT_COPTS, + visibility = [ + "//absl:__subpackages__", + ], + deps = [ + ":cordz_handle", + ":cordz_info", + "//absl/base:config", + ], +) + +cc_library( + name = "cordz_functions", + srcs = ["internal/cordz_functions.cc"], + hdrs = ["internal/cordz_functions.h"], + copts = ABSL_DEFAULT_COPTS, + visibility = [ + "//absl:__subpackages__", + ], + deps = [ + "//absl/base:config", + "//absl/base:core_headers", + "//absl/base:raw_logging_internal", + "//absl/profiling:exponential_biased", + ], +) + +cc_library( + name = "cordz_statistics", + hdrs = ["internal/cordz_statistics.h"], + copts = ABSL_DEFAULT_COPTS, + visibility = [ + "//absl:__subpackages__", + ], + deps = [ + ":cordz_update_tracker", + "//absl/base:config", + ], +) + +cc_test( + name = "cordz_functions_test", + srcs = [ + "internal/cordz_functions_test.cc", + ], + deps = [ + ":cordz_functions", + ":cordz_test_helpers", + "//absl/base:config", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "cordz_handle_test", + srcs = [ + "internal/cordz_handle_test.cc", + ], + deps = [ + ":cordz_handle", + "//absl/base:config", + "//absl/memory", + "//absl/random", + "//absl/random:distributions", + "//absl/synchronization", + "//absl/synchronization:thread_pool", + "//absl/time", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "cordz_info_test", + srcs = [ + "internal/cordz_info_test.cc", + ], + deps = [ + ":cord_internal", + ":cordz_handle", + ":cordz_info", + ":cordz_statistics", + ":cordz_test_helpers", + ":cordz_update_tracker", + ":strings", + "//absl/base:config", + "//absl/debugging:stacktrace", + "//absl/debugging:symbolize", + "//absl/types:span", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "cordz_info_statistics_test", + srcs = [ + "internal/cordz_info_statistics_test.cc", + ], + deps = [ + ":cord", + ":cord_internal", + ":cordz_info", + ":cordz_sample_token", + ":cordz_statistics", + ":cordz_update_scope", + ":cordz_update_tracker", + "//absl/base:config", + "//absl/synchronization", + "//absl/synchronization:thread_pool", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "cordz_sample_token_test", + srcs = [ + "internal/cordz_sample_token_test.cc", + ], + deps = [ + ":cord_internal", + ":cordz_handle", + ":cordz_info", + ":cordz_sample_token", + ":cordz_test_helpers", + "//absl/base:config", + "//absl/memory", + "//absl/random", + "//absl/synchronization", + "//absl/synchronization:thread_pool", + "//absl/time", + "@com_google_googletest//:gtest_main", ], ) @@ -297,6 +638,41 @@ cc_library( copts = ABSL_DEFAULT_COPTS, deps = [ ":cord", + ":cord_internal", + ":strings", + "//absl/base:config", + ], +) + +cc_library( + name = "cord_rep_test_util", + testonly = 1, + hdrs = ["internal/cord_rep_test_util.h"], + copts = ABSL_DEFAULT_COPTS, + deps = [ + ":cord_internal", + ":strings", + "//absl/base:config", + "//absl/base:raw_logging_internal", + ], +) + +cc_library( + name = "cordz_test_helpers", + testonly = 1, + hdrs = ["cordz_test_helpers.h"], + copts = ABSL_DEFAULT_COPTS, + deps = [ + ":cord", + ":cord_internal", + ":cordz_info", + ":cordz_sample_token", + ":cordz_statistics", + ":cordz_update_tracker", + ":strings", + "//absl/base:config", + "//absl/base:core_headers", + "@com_google_googletest//:gtest", ], ) @@ -309,12 +685,98 @@ cc_test( deps = [ ":cord", ":cord_test_helpers", + ":cordz_functions", + ":cordz_test_helpers", + ":str_format", ":strings", "//absl/base", "//absl/base:config", + "//absl/base:core_headers", "//absl/base:endian", "//absl/base:raw_logging_internal", "//absl/container:fixed_array", + "//absl/random", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "cordz_test", + size = "medium", + srcs = ["cordz_test.cc"], + copts = ABSL_TEST_COPTS, + tags = [ + "benchmark", + "no_test_android_arm", + "no_test_android_arm64", + "no_test_android_x86", + "no_test_darwin_x86_64", + "no_test_ios_x86_64", + "no_test_loonix", + "no_test_msvc_x64", + ], + visibility = ["//visibility:private"], + deps = [ + ":cord", + ":cord_test_helpers", + ":cordz_functions", + ":cordz_info", + ":cordz_sample_token", + ":cordz_statistics", + ":cordz_test_helpers", + ":cordz_update_tracker", + ":strings", + "//absl/base:config", + "//absl/base:core_headers", + "//absl/base:raw_logging_internal", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "cord_rep_consume_test", + size = "medium", + srcs = ["internal/cord_rep_consume_test.cc"], + copts = ABSL_TEST_COPTS, + visibility = ["//visibility:private"], + deps = [ + ":cord_internal", + ":strings", + "//absl/base:config", + "//absl/base:core_headers", + "//absl/debugging:leak_check", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "cord_ring_test", + size = "medium", + srcs = ["cord_ring_test.cc"], + copts = ABSL_TEST_COPTS, + visibility = ["//visibility:private"], + deps = [ + ":cord_internal", + ":strings", + "//absl/base:config", + "//absl/base:core_headers", + "//absl/base:raw_logging_internal", + "//absl/debugging:leak_check", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "cord_ring_reader_test", + size = "medium", + srcs = ["cord_ring_reader_test.cc"], + copts = ABSL_TEST_COPTS, + visibility = ["//visibility:private"], + deps = [ + ":cord_internal", + ":strings", + "//absl/base:core_headers", + "//absl/debugging:leak_check", "@com_google_googletest//:gtest_main", ], ) @@ -366,6 +828,9 @@ cc_test( ":strings", "//absl/base:core_headers", "//absl/base:dynamic_annotations", + "//absl/container:btree", + "//absl/container:flat_hash_map", + "//absl/container:node_hash_map", "@com_google_googletest//:gtest_main", ], ) @@ -484,6 +949,7 @@ cc_test( copts = ABSL_TEST_COPTS, visibility = ["//visibility:private"], deps = [ + ":internal", ":pow10_helper", ":strings", "//absl/base:config", @@ -634,8 +1100,12 @@ cc_library( ":strings", "//absl/base:config", "//absl/base:core_headers", + "//absl/functional:function_ref", "//absl/meta:type_traits", + "//absl/numeric:bits", "//absl/numeric:int128", + "//absl/numeric:representation", + "//absl/types:optional", "//absl/types:span", ], ) @@ -646,6 +1116,7 @@ cc_test( copts = ABSL_TEST_COPTS, visibility = ["//visibility:private"], deps = [ + ":cord", ":str_format", ":strings", "//absl/base:core_headers", @@ -663,6 +1134,7 @@ cc_test( deps = [ ":str_format", ":str_format_internal", + ":strings", "@com_google_googletest//:gtest_main", ], ) @@ -709,8 +1181,9 @@ cc_test( visibility = ["//visibility:private"], deps = [ ":str_format_internal", + ":strings", "//absl/base:raw_logging_internal", - "//absl/numeric:int128", + "//absl/types:optional", "@com_google_googletest//:gtest_main", ], ) @@ -721,6 +1194,7 @@ cc_test( copts = ABSL_TEST_COPTS, visibility = ["//visibility:private"], deps = [ + ":cord", ":str_format_internal", "@com_google_googletest//:gtest_main", ], diff --git a/third_party/abseil-cpp/absl/strings/CMakeLists.txt b/third_party/abseil-cpp/absl/strings/CMakeLists.txt index fbf0f5ac52..d6801fe6fd 100644 --- a/third_party/abseil-cpp/absl/strings/CMakeLists.txt +++ b/third_party/abseil-cpp/absl/strings/CMakeLists.txt @@ -21,6 +21,7 @@ absl_cc_library( "ascii.h" "charconv.h" "escaping.h" + "internal/string_constant.h" "match.h" "numbers.h" "str_cat.h" @@ -100,7 +101,7 @@ absl_cc_test( DEPS absl::strings absl::base - gmock_main + GTest::gmock_main ) absl_cc_test( @@ -114,7 +115,7 @@ absl_cc_test( absl::strings absl::core_headers absl::fixed_array - gmock_main + GTest::gmock_main ) absl_cc_test( @@ -127,7 +128,7 @@ absl_cc_test( DEPS absl::strings absl::core_headers - gmock_main + GTest::gmock_main ) absl_cc_test( @@ -141,7 +142,7 @@ absl_cc_test( DEPS absl::strings absl::core_headers - gmock_main + GTest::gmock_main ) absl_cc_test( @@ -155,7 +156,20 @@ absl_cc_test( absl::strings_internal absl::base absl::core_headers - gmock_main + GTest::gmock_main +) + +absl_cc_test( + NAME + string_constant_test + SRCS + "internal/string_constant_test.cc" + COPTS + ${ABSL_TEST_COPTS} + DEPS + absl::strings + absl::type_traits + GTest::gmock_main ) absl_cc_test( @@ -170,7 +184,7 @@ absl_cc_test( absl::config absl::core_headers absl::dynamic_annotations - gmock_main + GTest::gmock_main ) absl_cc_test( @@ -183,7 +197,7 @@ absl_cc_test( DEPS absl::strings absl::core_headers - gmock_main + GTest::gmock_main ) absl_cc_test( @@ -195,7 +209,7 @@ absl_cc_test( ${ABSL_TEST_COPTS} DEPS absl::strings - gmock_main + GTest::gmock_main ) absl_cc_test( @@ -207,10 +221,12 @@ absl_cc_test( ${ABSL_TEST_COPTS} DEPS absl::strings - absl::base absl::core_headers absl::dynamic_annotations - gmock_main + absl::btree + absl::flat_hash_map + absl::node_hash_map + GTest::gmock_main ) absl_cc_test( @@ -222,7 +238,7 @@ absl_cc_test( ${ABSL_TEST_COPTS} DEPS absl::strings_internal - gmock_main + GTest::gmock_main ) absl_cc_test( @@ -237,7 +253,7 @@ absl_cc_test( absl::base absl::core_headers absl::type_traits - gmock_main + GTest::gmock_main ) absl_cc_test( @@ -252,7 +268,7 @@ absl_cc_test( absl::base absl::core_headers absl::memory - gmock_main + GTest::gmock_main ) absl_cc_test( @@ -265,7 +281,7 @@ absl_cc_test( DEPS absl::strings absl::core_headers - gmock_main + GTest::gmock_main ) absl_cc_test( @@ -284,7 +300,8 @@ absl_cc_test( absl::raw_logging_internal absl::random_random absl::random_distributions - gmock_main + absl::strings_internal + GTest::gmock_main ) absl_cc_test( @@ -297,7 +314,7 @@ absl_cc_test( DEPS absl::strings absl::base - gmock_main + GTest::gmock_main ) absl_cc_test( @@ -309,7 +326,7 @@ absl_cc_test( ${ABSL_TEST_COPTS} DEPS absl::strings_internal - gmock_main + GTest::gmock_main ) absl_cc_test( @@ -323,7 +340,7 @@ absl_cc_test( absl::strings absl::str_format absl::pow10_helper - gmock_main + GTest::gmock_main ) absl_cc_test( @@ -338,7 +355,7 @@ absl_cc_test( absl::strings absl::config absl::raw_logging_internal - gmock_main + GTest::gmock_main ) absl_cc_test( @@ -353,7 +370,7 @@ absl_cc_test( DEPS absl::strings absl::config - gmock_main + GTest::gmock_main ) absl_cc_library( @@ -389,9 +406,11 @@ absl_cc_library( COPTS ${ABSL_DEFAULT_COPTS} DEPS + absl::bits absl::strings absl::config absl::core_headers + absl::numeric_representation absl::type_traits absl::int128 absl::span @@ -406,9 +425,10 @@ absl_cc_test( ${ABSL_TEST_COPTS} DEPS absl::str_format + absl::cord absl::strings absl::core_headers - gmock_main + GTest::gmock_main ) absl_cc_test( @@ -421,7 +441,8 @@ absl_cc_test( DEPS absl::str_format absl::str_format_internal - gmock_main + absl::strings + GTest::gmock_main ) absl_cc_test( @@ -434,7 +455,7 @@ absl_cc_test( DEPS absl::str_format absl::str_format_internal - gmock_main + GTest::gmock_main ) absl_cc_test( @@ -446,7 +467,7 @@ absl_cc_test( ${ABSL_TEST_COPTS} DEPS absl::str_format_internal - gmock_main + GTest::gmock_main ) absl_cc_test( @@ -458,7 +479,7 @@ absl_cc_test( ${ABSL_TEST_COPTS} DEPS absl::str_format - gmock_main + GTest::gmock_main ) absl_cc_test( @@ -469,10 +490,11 @@ absl_cc_test( COPTS ${ABSL_TEST_COPTS} DEPS + absl::strings absl::str_format_internal absl::raw_logging_internal absl::int128 - gmock_main + GTest::gmock_main ) absl_cc_test( @@ -484,7 +506,8 @@ absl_cc_test( ${ABSL_TEST_COPTS} DEPS absl::str_format_internal - gmock_main + absl::cord + GTest::gmock_main ) absl_cc_test( @@ -497,7 +520,7 @@ absl_cc_test( DEPS absl::str_format_internal absl::core_headers - gmock_main + GTest::gmock_main ) absl_cc_library( @@ -524,7 +547,281 @@ absl_cc_test( DEPS absl::pow10_helper absl::str_format - gmock_main + GTest::gmock_main +) + +absl_cc_library( + NAME + cord_internal + HDRS + "internal/cord_internal.h" + "internal/cord_rep_btree.h" + "internal/cord_rep_btree_navigator.h" + "internal/cord_rep_btree_reader.h" + "internal/cord_rep_consume.h" + "internal/cord_rep_flat.h" + "internal/cord_rep_ring.h" + "internal/cord_rep_ring_reader.h" + SRCS + "internal/cord_internal.cc" + "internal/cord_rep_btree.cc" + "internal/cord_rep_btree_navigator.cc" + "internal/cord_rep_btree_reader.cc" + "internal/cord_rep_consume.cc" + "internal/cord_rep_ring.cc" + COPTS + ${ABSL_DEFAULT_COPTS} + DEPS + absl::base_internal + absl::compressed_tuple + absl::config + absl::core_headers + absl::endian + absl::inlined_vector + absl::layout + absl::raw_logging_internal + absl::strings + absl::throw_delegate + absl::type_traits +) + +absl_cc_library( + NAME + cordz_update_tracker + HDRS + "internal/cordz_update_tracker.h" + COPTS + ${ABSL_DEFAULT_COPTS} + DEPS + absl::config +) + +absl_cc_test( + NAME + cordz_update_tracker_test + SRCS + "internal/cordz_update_tracker_test.cc" + COPTS + ${ABSL_TEST_COPTS} + DEPS + absl::config + absl::cordz_update_tracker + absl::core_headers + absl::synchronization + GTest::gmock_main +) + +absl_cc_library( + NAME + cordz_functions + HDRS + "internal/cordz_functions.h" + SRCS + "internal/cordz_functions.cc" + COPTS + ${ABSL_DEFAULT_COPTS} + DEPS + absl::config + absl::core_headers + absl::exponential_biased + absl::raw_logging_internal +) + +absl_cc_test( + NAME + cordz_functions_test + SRCS + "internal/cordz_functions_test.cc" + DEPS + absl::config + absl::cordz_functions + absl::cordz_test_helpers + GTest::gmock_main +) + +absl_cc_library( + NAME + cordz_statistics + HDRS + "internal/cordz_statistics.h" + COPTS + ${ABSL_DEFAULT_COPTS} + DEPS + absl::config + absl::core_headers + absl::cordz_update_tracker + absl::synchronization +) + +absl_cc_library( + NAME + cordz_handle + HDRS + "internal/cordz_handle.h" + SRCS + "internal/cordz_handle.cc" + COPTS + ${ABSL_DEFAULT_COPTS} + DEPS + absl::base + absl::config + absl::raw_logging_internal + absl::synchronization +) + +absl_cc_test( + NAME + cordz_handle_test + SRCS + "internal/cordz_handle_test.cc" + DEPS + absl::config + absl::cordz_handle + absl::cordz_test_helpers + absl::memory + absl::random_random + absl::random_distributions + absl::synchronization + absl::time + GTest::gmock_main +) + +absl_cc_library( + NAME + cordz_info + HDRS + "internal/cordz_info.h" + SRCS + "internal/cordz_info.cc" + COPTS + ${ABSL_DEFAULT_COPTS} + DEPS + absl::base + absl::config + absl::cord_internal + absl::cordz_functions + absl::cordz_handle + absl::cordz_statistics + absl::cordz_update_tracker + absl::core_headers + absl::inlined_vector + absl::span + absl::raw_logging_internal + absl::stacktrace + absl::synchronization +) + +absl_cc_test( + NAME + cordz_info_test + SRCS + "internal/cordz_info_test.cc" + COPTS + ${ABSL_TEST_COPTS} + DEPS + absl::config + absl::cord_internal + absl::cordz_test_helpers + absl::cordz_handle + absl::cordz_info + absl::cordz_statistics + absl::cordz_test_helpers + absl::cordz_update_tracker + absl::span + absl::stacktrace + absl::symbolize + GTest::gmock_main +) + +absl_cc_test( + NAME + cordz_info_statistics_test + SRCS + "internal/cordz_info_statistics_test.cc" + COPTS + ${ABSL_TEST_COPTS} + DEPS + absl::config + absl::cord + absl::cord_internal + absl::cordz_info + absl::cordz_sample_token + absl::cordz_statistics + absl::cordz_update_scope + absl::cordz_update_tracker + absl::thread_pool + GTest::gmock_main +) + +absl_cc_library( + NAME + cordz_sample_token + HDRS + "internal/cordz_sample_token.h" + SRCS + "internal/cordz_sample_token.cc" + COPTS + ${ABSL_DEFAULT_COPTS} + DEPS + absl::config + absl::cordz_handle + absl::cordz_info +) + +absl_cc_test( + NAME + cordz_sample_token_test + SRCS + "internal/cordz_sample_token_test.cc" + COPTS + ${ABSL_TEST_COPTS} + DEPS + absl::config + absl::cord_internal + absl::cordz_handle + absl::cordz_info + absl::cordz_info + absl::cordz_sample_token + absl::cordz_test_helpers + absl::memory + absl::random_random + absl::synchronization + absl::thread_pool + absl::time + GTest::gmock_main +) + +absl_cc_library( + NAME + cordz_update_scope + HDRS + "internal/cordz_update_scope.h" + COPTS + ${ABSL_DEFAULT_COPTS} + DEPS + absl::config + absl::cord_internal + absl::cordz_info + absl::cordz_update_tracker + absl::core_headers +) + +absl_cc_test( + NAME + cordz_update_scope_test + SRCS + "internal/cordz_update_scope_test.cc" + COPTS + ${ABSL_TEST_COPTS} + DEPS + absl::config + absl::cord_internal + absl::cordz_info + absl::cordz_test_helpers + absl::cordz_update_scope + absl::cordz_update_tracker + absl::core_headers + GTest::gmock_main ) absl_cc_library( @@ -534,33 +831,75 @@ absl_cc_library( "cord.h" SRCS "cord.cc" - "internal/cord_internal.h" COPTS ${ABSL_DEFAULT_COPTS} DEPS - absl::strings - absl::strings_internal absl::base - absl::base_internal + absl::config + absl::cord_internal + absl::cordz_functions + absl::cordz_info + absl::cordz_update_scope + absl::cordz_update_tracker absl::core_headers absl::endian absl::fixed_array absl::function_ref absl::inlined_vector + absl::optional absl::raw_logging_internal + absl::strings absl::type_traits PUBLIC ) absl_cc_library( NAME + cord_rep_test_util + HDRS + "internal/cord_rep_test_util.h" + COPTS + ${ABSL_TEST_COPTS} + DEPS + absl::config + absl::cord_internal + absl::raw_logging_internal + absl::strings + TESTONLY +) + +absl_cc_library( + NAME cord_test_helpers HDRS "cord_test_helpers.h" COPTS ${ABSL_TEST_COPTS} DEPS + absl::config + absl::cord + absl::cord_internal + absl::strings + TESTONLY +) + +absl_cc_library( + NAME + cordz_test_helpers + HDRS + "cordz_test_helpers.h" + COPTS + ${ABSL_TEST_COPTS} + DEPS + absl::config absl::cord + absl::cord_internal + absl::cordz_info + absl::cordz_sample_token + absl::cordz_statistics + absl::cordz_update_tracker + absl::core_headers + absl::strings TESTONLY ) @@ -573,11 +912,157 @@ absl_cc_test( ${ABSL_TEST_COPTS} DEPS absl::cord + absl::str_format absl::strings absl::base absl::config + absl::cord_test_helpers + absl::cordz_test_helpers + absl::core_headers absl::endian + absl::random_random absl::raw_logging_internal absl::fixed_array - gmock_main + GTest::gmock_main +) + +absl_cc_test( + NAME + cord_rep_consume_test + SRCS + "internal/cord_rep_consume_test.cc" + COPTS + ${ABSL_TEST_COPTS} + DEPS + absl::base + absl::config + absl::cord_internal + absl::core_headers + absl::function_ref + absl::raw_logging_internal + absl::strings + GTest::gmock_main +) + +absl_cc_test( + NAME + cord_internal_test + SRCS + "internal/cord_internal_test.cc" + COPTS + ${ABSL_TEST_COPTS} + DEPS + absl::cord_internal + GTest::gmock_main +) + +absl_cc_test( + NAME + cord_rep_btree_test + SRCS + "internal/cord_rep_btree_test.cc" + COPTS + ${ABSL_TEST_COPTS} + DEPS + absl::base + absl::cleanup + absl::config + absl::cord_internal + absl::cord_rep_test_util + absl::core_headers + absl::raw_logging_internal + absl::strings + GTest::gmock_main +) + +absl_cc_test( + NAME + cord_rep_btree_navigator_test + SRCS + "internal/cord_rep_btree_navigator_test.cc" + COPTS + ${ABSL_TEST_COPTS} + DEPS + absl::base + absl::config + absl::cord_internal + absl::cord_rep_test_util + absl::core_headers + absl::raw_logging_internal + absl::strings + GTest::gmock_main +) + +absl_cc_test( + NAME + cord_rep_btree_reader_test + SRCS + "internal/cord_rep_btree_reader_test.cc" + COPTS + ${ABSL_TEST_COPTS} + DEPS + absl::base + absl::config + absl::cord_internal + absl::cord_rep_test_util + absl::core_headers + absl::raw_logging_internal + absl::strings + GTest::gmock_main +) + +absl_cc_test( + NAME + cord_ring_test + SRCS + "cord_ring_test.cc" + COPTS + ${ABSL_TEST_COPTS} + DEPS + absl::base + absl::config + absl::cord_internal + absl::core_headers + absl::raw_logging_internal + absl::strings + GTest::gmock_main +) + +absl_cc_test( + NAME + cord_ring_reader_test + SRCS + "cord_ring_reader_test.cc" + COPTS + ${ABSL_TEST_COPTS} + DEPS + absl::base + absl::cord_internal + absl::core_headers + absl::strings + GTest::gmock_main +) + +absl_cc_test( + NAME + cordz_test + SRCS + "cordz_test.cc" + COPTS + ${ABSL_TEST_COPTS} + DEPS + absl::cord + absl::cord_test_helpers + absl::cordz_test_helpers + absl::cordz_functions + absl::cordz_info + absl::cordz_sample_token + absl::cordz_statistics + absl::cordz_update_tracker + absl::base + absl::config + absl::core_headers + absl::raw_logging_internal + absl::strings + GTest::gmock_main ) diff --git a/third_party/abseil-cpp/absl/strings/ascii_test.cc b/third_party/abseil-cpp/absl/strings/ascii_test.cc index 5ecd23f869..83af7825e1 100644 --- a/third_party/abseil-cpp/absl/strings/ascii_test.cc +++ b/third_party/abseil-cpp/absl/strings/ascii_test.cc @@ -197,11 +197,15 @@ TEST(AsciiStrTo, Lower) { const std::string str("GHIJKL"); const std::string str2("MNOPQR"); const absl::string_view sp(str2); + std::string mutable_str("STUVWX"); EXPECT_EQ("abcdef", absl::AsciiStrToLower(buf)); EXPECT_EQ("ghijkl", absl::AsciiStrToLower(str)); EXPECT_EQ("mnopqr", absl::AsciiStrToLower(sp)); + absl::AsciiStrToLower(&mutable_str); + EXPECT_EQ("stuvwx", mutable_str); + char mutable_buf[] = "Mutable"; std::transform(mutable_buf, mutable_buf + strlen(mutable_buf), mutable_buf, absl::ascii_tolower); diff --git a/third_party/abseil-cpp/absl/strings/charconv.cc b/third_party/abseil-cpp/absl/strings/charconv.cc index bdba768dcd..fefcfc90a5 100644 --- a/third_party/abseil-cpp/absl/strings/charconv.cc +++ b/third_party/abseil-cpp/absl/strings/charconv.cc @@ -20,7 +20,7 @@ #include <cstring> #include "absl/base/casts.h" -#include "absl/base/internal/bits.h" +#include "absl/numeric/bits.h" #include "absl/numeric/int128.h" #include "absl/strings/internal/charconv_bigint.h" #include "absl/strings/internal/charconv_parse.h" @@ -111,7 +111,7 @@ struct FloatTraits<double> { return sign ? -ldexp(mantissa, exponent) : ldexp(mantissa, exponent); #else constexpr uint64_t kMantissaMask = - (uint64_t(1) << (kTargetMantissaBits - 1)) - 1; + (uint64_t{1} << (kTargetMantissaBits - 1)) - 1; uint64_t dbl = static_cast<uint64_t>(sign) << 63; if (mantissa > kMantissaMask) { // Normal value. @@ -151,7 +151,7 @@ struct FloatTraits<float> { return sign ? -ldexpf(mantissa, exponent) : ldexpf(mantissa, exponent); #else constexpr uint32_t kMantissaMask = - (uint32_t(1) << (kTargetMantissaBits - 1)) - 1; + (uint32_t{1} << (kTargetMantissaBits - 1)) - 1; uint32_t flt = static_cast<uint32_t>(sign) << 31; if (mantissa > kMantissaMask) { // Normal value. @@ -242,11 +242,11 @@ struct CalculatedFloat { // Returns the bit width of the given uint128. (Equivalently, returns 128 // minus the number of leading zero bits.) -int BitWidth(uint128 value) { +unsigned BitWidth(uint128 value) { if (Uint128High64(value) == 0) { - return 64 - base_internal::CountLeadingZeros64(Uint128Low64(value)); + return static_cast<unsigned>(bit_width(Uint128Low64(value))); } - return 128 - base_internal::CountLeadingZeros64(Uint128High64(value)); + return 128 - countl_zero(Uint128High64(value)); } // Calculates how far to the right a mantissa needs to be shifted to create a @@ -499,7 +499,7 @@ bool MustRoundUp(uint64_t guess_mantissa, int guess_exponent, template <typename FloatType> CalculatedFloat CalculatedFloatFromRawValues(uint64_t mantissa, int exponent) { CalculatedFloat result; - if (mantissa == uint64_t(1) << FloatTraits<FloatType>::kTargetMantissaBits) { + if (mantissa == uint64_t{1} << FloatTraits<FloatType>::kTargetMantissaBits) { mantissa >>= 1; exponent += 1; } @@ -519,7 +519,7 @@ CalculatedFloat CalculateFromParsedHexadecimal( const strings_internal::ParsedFloat& parsed_hex) { uint64_t mantissa = parsed_hex.mantissa; int exponent = parsed_hex.exponent; - int mantissa_width = 64 - base_internal::CountLeadingZeros64(mantissa); + auto mantissa_width = static_cast<unsigned>(bit_width(mantissa)); const int shift = NormalizedShiftSize<FloatType>(mantissa_width, exponent); bool result_exact; exponent += shift; @@ -619,10 +619,10 @@ from_chars_result FromCharsImpl(const char* first, const char* last, // Either we failed to parse a hex float after the "0x", or we read // "0xinf" or "0xnan" which we don't want to match. // - // However, a std::string that begins with "0x" also begins with "0", which + // However, a string that begins with "0x" also begins with "0", which // is normally a valid match for the number zero. So we want these // strings to match zero unless fmt_flags is `scientific`. (This flag - // means an exponent is required, which the std::string "0" does not have.) + // means an exponent is required, which the string "0" does not have.) if (fmt_flags == chars_format::scientific) { result.ec = std::errc::invalid_argument; } else { diff --git a/third_party/abseil-cpp/absl/strings/charconv.h b/third_party/abseil-cpp/absl/strings/charconv.h index e04be32f95..7c50981245 100644 --- a/third_party/abseil-cpp/absl/strings/charconv.h +++ b/third_party/abseil-cpp/absl/strings/charconv.h @@ -64,8 +64,9 @@ struct from_chars_result { // the result in `value`. // // The matching pattern format is almost the same as that of strtod(), except -// that C locale is not respected, and an initial '+' character in the input -// range will never be matched. +// that (1) C locale is not respected, (2) an initial '+' character in the +// input range will never be matched, and (3) leading whitespaces are not +// ignored. // // If `fmt` is set, it must be one of the enumerator values of the chars_format. // (This is despite the fact that chars_format is a bitmask type.) If set to diff --git a/third_party/abseil-cpp/absl/strings/charconv_benchmark.cc b/third_party/abseil-cpp/absl/strings/charconv_benchmark.cc index 644b2abdf6..e8c7371d65 100644 --- a/third_party/abseil-cpp/absl/strings/charconv_benchmark.cc +++ b/third_party/abseil-cpp/absl/strings/charconv_benchmark.cc @@ -132,7 +132,7 @@ BENCHMARK(BM_Absl_HugeMantissa); std::string MakeHardCase(int length) { // The number 1.1521...e-297 is exactly halfway between 12345 * 2**-1000 and // the next larger representable number. The digits of this number are in - // the std::string below. + // the string below. const std::string digits = "1." "152113937042223790993097181572444900347587985074226836242307364987727724" diff --git a/third_party/abseil-cpp/absl/strings/charconv_test.cc b/third_party/abseil-cpp/absl/strings/charconv_test.cc index 9090e9c89c..b83de5a0ba 100644 --- a/third_party/abseil-cpp/absl/strings/charconv_test.cc +++ b/third_party/abseil-cpp/absl/strings/charconv_test.cc @@ -653,7 +653,9 @@ TEST(FromChars, NaNFloats) { negative_from_chars_float); EXPECT_TRUE(std::signbit(negative_from_chars_float)); EXPECT_FALSE(Identical(negative_from_chars_float, from_chars_float)); - from_chars_float = std::copysign(from_chars_float, -1.0); + // Use the (float, float) overload of std::copysign to prevent narrowing; + // see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98251. + from_chars_float = std::copysign(from_chars_float, -1.0f); EXPECT_TRUE(Identical(negative_from_chars_float, from_chars_float)); } } diff --git a/third_party/abseil-cpp/absl/strings/cord.cc b/third_party/abseil-cpp/absl/strings/cord.cc index d9503ae332..854047ca98 100644 --- a/third_party/abseil-cpp/absl/strings/cord.cc +++ b/third_party/abseil-cpp/absl/strings/cord.cc @@ -15,10 +15,12 @@ #include "absl/strings/cord.h" #include <algorithm> +#include <atomic> #include <cstddef> #include <cstdio> #include <cstdlib> #include <iomanip> +#include <iostream> #include <limits> #include <ostream> #include <sstream> @@ -28,11 +30,17 @@ #include "absl/base/casts.h" #include "absl/base/internal/raw_logging.h" +#include "absl/base/macros.h" #include "absl/base/port.h" #include "absl/container/fixed_array.h" #include "absl/container/inlined_vector.h" #include "absl/strings/escaping.h" #include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/cord_rep_btree.h" +#include "absl/strings/internal/cord_rep_flat.h" +#include "absl/strings/internal/cordz_statistics.h" +#include "absl/strings/internal/cordz_update_scope.h" +#include "absl/strings/internal/cordz_update_tracker.h" #include "absl/strings/internal/resize_uninitialized.h" #include "absl/strings/str_cat.h" #include "absl/strings/str_format.h" @@ -43,144 +51,18 @@ namespace absl { ABSL_NAMESPACE_BEGIN using ::absl::cord_internal::CordRep; +using ::absl::cord_internal::CordRepBtree; using ::absl::cord_internal::CordRepConcat; using ::absl::cord_internal::CordRepExternal; +using ::absl::cord_internal::CordRepFlat; using ::absl::cord_internal::CordRepSubstring; +using ::absl::cord_internal::CordzUpdateTracker; +using ::absl::cord_internal::InlineData; +using ::absl::cord_internal::kMaxFlatLength; +using ::absl::cord_internal::kMinFlatLength; -// Various representations that we allow -enum CordRepKind { - CONCAT = 0, - EXTERNAL = 1, - SUBSTRING = 2, - - // We have different tags for different sized flat arrays, - // starting with FLAT - FLAT = 3, -}; - -namespace { - -// Type used with std::allocator for allocating and deallocating -// `CordRepExternal`. std::allocator is used because it opaquely handles the -// different new / delete overloads available on a given platform. -struct alignas(absl::cord_internal::ExternalRepAlignment()) ExternalAllocType { - unsigned char value[absl::cord_internal::ExternalRepAlignment()]; -}; - -// Returns the number of objects to pass in to std::allocator<ExternalAllocType> -// allocate() and deallocate() to create enough room for `CordRepExternal` with -// `releaser_size` bytes on the end. -constexpr size_t GetExternalAllocNumObjects(size_t releaser_size) { - // Be sure to round up since `releaser_size` could be smaller than - // `sizeof(ExternalAllocType)`. - return (sizeof(CordRepExternal) + releaser_size + sizeof(ExternalAllocType) - - 1) / - sizeof(ExternalAllocType); -} - -// Allocates enough memory for `CordRepExternal` and a releaser with size -// `releaser_size` bytes. -void* AllocateExternal(size_t releaser_size) { - return std::allocator<ExternalAllocType>().allocate( - GetExternalAllocNumObjects(releaser_size)); -} - -// Deallocates the memory for a `CordRepExternal` assuming it was allocated with -// a releaser of given size and alignment. -void DeallocateExternal(CordRepExternal* p, size_t releaser_size) { - std::allocator<ExternalAllocType>().deallocate( - reinterpret_cast<ExternalAllocType*>(p), - GetExternalAllocNumObjects(releaser_size)); -} - -// Returns a pointer to the type erased releaser for the given CordRepExternal. -void* GetExternalReleaser(CordRepExternal* rep) { - return rep + 1; -} - -} // namespace - -namespace cord_internal { - -inline CordRepConcat* CordRep::concat() { - assert(tag == CONCAT); - return static_cast<CordRepConcat*>(this); -} - -inline const CordRepConcat* CordRep::concat() const { - assert(tag == CONCAT); - return static_cast<const CordRepConcat*>(this); -} - -inline CordRepSubstring* CordRep::substring() { - assert(tag == SUBSTRING); - return static_cast<CordRepSubstring*>(this); -} - -inline const CordRepSubstring* CordRep::substring() const { - assert(tag == SUBSTRING); - return static_cast<const CordRepSubstring*>(this); -} - -inline CordRepExternal* CordRep::external() { - assert(tag == EXTERNAL); - return static_cast<CordRepExternal*>(this); -} - -inline const CordRepExternal* CordRep::external() const { - assert(tag == EXTERNAL); - return static_cast<const CordRepExternal*>(this); -} - -} // namespace cord_internal - -static const size_t kFlatOverhead = offsetof(CordRep, data); - -static_assert(kFlatOverhead == 13, "Unittests assume kFlatOverhead == 13"); - -// Largest and smallest flat node lengths we are willing to allocate -// Flat allocation size is stored in tag, which currently can encode sizes up -// to 4K, encoded as multiple of either 8 or 32 bytes. -// If we allow for larger sizes, we need to change this to 8/64, 16/128, etc. -static constexpr size_t kMaxFlatSize = 4096; -static constexpr size_t kMaxFlatLength = kMaxFlatSize - kFlatOverhead; -static constexpr size_t kMinFlatLength = 32 - kFlatOverhead; - -// Prefer copying blocks of at most this size, otherwise reference count. -static const size_t kMaxBytesToCopy = 511; - -// Helper functions for rounded div, and rounding to exact sizes. -static size_t DivUp(size_t n, size_t m) { return (n + m - 1) / m; } -static size_t RoundUp(size_t n, size_t m) { return DivUp(n, m) * m; } - -// Returns the size to the nearest equal or larger value that can be -// expressed exactly as a tag value. -static size_t RoundUpForTag(size_t size) { - return RoundUp(size, (size <= 1024) ? 8 : 32); -} - -// Converts the allocated size to a tag, rounding down if the size -// does not exactly match a 'tag expressible' size value. The result is -// undefined if the size exceeds the maximum size that can be encoded in -// a tag, i.e., if size is larger than TagToAllocatedSize(<max tag>). -static uint8_t AllocatedSizeToTag(size_t size) { - const size_t tag = (size <= 1024) ? size / 8 : 128 + size / 32 - 1024 / 32; - assert(tag <= std::numeric_limits<uint8_t>::max()); - return tag; -} - -// Converts the provided tag to the corresponding allocated size -static constexpr size_t TagToAllocatedSize(uint8_t tag) { - return (tag <= 128) ? (tag * 8) : (1024 + (tag - 128) * 32); -} - -// Converts the provided tag to the corresponding available data length -static constexpr size_t TagToLength(uint8_t tag) { - return TagToAllocatedSize(tag) - kFlatOverhead; -} - -// Enforce that kMaxFlatSize maps to a well-known exact tag value. -static_assert(TagToAllocatedSize(224) == kMaxFlatSize, "Bad tag logic"); +using ::absl::cord_internal::kInlinedVectorSize; +using ::absl::cord_internal::kMaxBytesToCopy; constexpr uint64_t Fibonacci(unsigned char n, uint64_t a = 0, uint64_t b = 1) { return n == 0 ? a : Fibonacci(n - 1, b, a + b); @@ -195,70 +77,30 @@ static_assert(Fibonacci(63) == 6557470319842, // The root node depth is allowed to become twice as large to reduce rebalancing // for larger strings (see IsRootBalanced). static constexpr uint64_t min_length[] = { - Fibonacci(2), - Fibonacci(3), - Fibonacci(4), - Fibonacci(5), - Fibonacci(6), - Fibonacci(7), - Fibonacci(8), - Fibonacci(9), - Fibonacci(10), - Fibonacci(11), - Fibonacci(12), - Fibonacci(13), - Fibonacci(14), - Fibonacci(15), - Fibonacci(16), - Fibonacci(17), - Fibonacci(18), - Fibonacci(19), - Fibonacci(20), - Fibonacci(21), - Fibonacci(22), - Fibonacci(23), - Fibonacci(24), - Fibonacci(25), - Fibonacci(26), - Fibonacci(27), - Fibonacci(28), - Fibonacci(29), - Fibonacci(30), - Fibonacci(31), - Fibonacci(32), - Fibonacci(33), - Fibonacci(34), - Fibonacci(35), - Fibonacci(36), - Fibonacci(37), - Fibonacci(38), - Fibonacci(39), - Fibonacci(40), - Fibonacci(41), - Fibonacci(42), - Fibonacci(43), - Fibonacci(44), - Fibonacci(45), - Fibonacci(46), - Fibonacci(47), + Fibonacci(2), Fibonacci(3), Fibonacci(4), Fibonacci(5), + Fibonacci(6), Fibonacci(7), Fibonacci(8), Fibonacci(9), + Fibonacci(10), Fibonacci(11), Fibonacci(12), Fibonacci(13), + Fibonacci(14), Fibonacci(15), Fibonacci(16), Fibonacci(17), + Fibonacci(18), Fibonacci(19), Fibonacci(20), Fibonacci(21), + Fibonacci(22), Fibonacci(23), Fibonacci(24), Fibonacci(25), + Fibonacci(26), Fibonacci(27), Fibonacci(28), Fibonacci(29), + Fibonacci(30), Fibonacci(31), Fibonacci(32), Fibonacci(33), + Fibonacci(34), Fibonacci(35), Fibonacci(36), Fibonacci(37), + Fibonacci(38), Fibonacci(39), Fibonacci(40), Fibonacci(41), + Fibonacci(42), Fibonacci(43), Fibonacci(44), Fibonacci(45), + Fibonacci(46), Fibonacci(47), 0xffffffffffffffffull, // Avoid overflow }; static const int kMinLengthSize = ABSL_ARRAYSIZE(min_length); -// The inlined size to use with absl::InlinedVector. -// -// Note: The InlinedVectors in this file (and in cord.h) do not need to use -// the same value for their inlined size. The fact that they do is historical. -// It may be desirable for each to use a different inlined size optimized for -// that InlinedVector's usage. -// -// TODO(jgm): Benchmark to see if there's a more optimal value than 47 for -// the inlined vector size (47 exists for backward compatibility). -static const int kInlinedVectorSize = 47; +static inline bool btree_enabled() { + return cord_internal::cord_btree_enabled.load( + std::memory_order_relaxed); +} static inline bool IsRootBalanced(CordRep* node) { - if (node->tag != CONCAT) { + if (!node->IsConcat()) { return true; } else if (node->concat()->depth() <= 15) { return true; @@ -272,7 +114,8 @@ static inline bool IsRootBalanced(CordRep* node) { } static CordRep* Rebalance(CordRep* node); -static void DumpNode(CordRep* rep, bool include_data, std::ostream* os); +static void DumpNode(CordRep* rep, bool include_data, std::ostream* os, + int indent = 0); static bool VerifyNode(CordRep* root, CordRep* start_node, bool full_validation); @@ -292,103 +135,9 @@ static inline CordRep* VerifyTree(CordRep* node) { return node; } -// -------------------------------------------------------------------- -// Memory management - -inline CordRep* Ref(CordRep* rep) { - if (rep != nullptr) { - rep->refcount.Increment(); - } - return rep; -} - -// This internal routine is called from the cold path of Unref below. Keeping it -// in a separate routine allows good inlining of Unref into many profitable call -// sites. However, the call to this function can be highly disruptive to the -// register pressure in those callers. To minimize the cost to callers, we use -// a special LLVM calling convention that preserves most registers. This allows -// the call to this routine in cold paths to not disrupt the caller's register -// pressure. This calling convention is not available on all platforms; we -// intentionally allow LLVM to ignore the attribute rather than attempting to -// hardcode the list of supported platforms. -#if defined(__clang__) && !defined(__i386__) -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wattributes" -__attribute__((preserve_most)) -#pragma clang diagnostic pop -#endif -static void UnrefInternal(CordRep* rep) { - assert(rep != nullptr); - - absl::InlinedVector<CordRep*, kInlinedVectorSize> pending; - while (true) { - if (rep->tag == CONCAT) { - CordRepConcat* rep_concat = rep->concat(); - CordRep* right = rep_concat->right; - if (!right->refcount.Decrement()) { - pending.push_back(right); - } - CordRep* left = rep_concat->left; - delete rep_concat; - rep = nullptr; - if (!left->refcount.Decrement()) { - rep = left; - continue; - } - } else if (rep->tag == EXTERNAL) { - CordRepExternal* rep_external = rep->external(); - absl::string_view data(rep_external->base, rep->length); - void* releaser = GetExternalReleaser(rep_external); - size_t releaser_size = rep_external->releaser_invoker(releaser, data); - rep_external->~CordRepExternal(); - DeallocateExternal(rep_external, releaser_size); - rep = nullptr; - } else if (rep->tag == SUBSTRING) { - CordRepSubstring* rep_substring = rep->substring(); - CordRep* child = rep_substring->child; - delete rep_substring; - rep = nullptr; - if (!child->refcount.Decrement()) { - rep = child; - continue; - } - } else { - // Flat CordReps are allocated and constructed with raw ::operator new - // and placement new, and must be destructed and deallocated - // accordingly. -#if defined(__cpp_sized_deallocation) - size_t size = TagToAllocatedSize(rep->tag); - rep->~CordRep(); - ::operator delete(rep, size); -#else - rep->~CordRep(); - ::operator delete(rep); -#endif - rep = nullptr; - } - - if (!pending.empty()) { - rep = pending.back(); - pending.pop_back(); - } else { - break; - } - } -} - -inline void Unref(CordRep* rep) { - // Fast-path for two common, hot cases: a null rep and a shared root. - if (ABSL_PREDICT_TRUE(rep == nullptr || - rep->refcount.DecrementExpectHighRefcount())) { - return; - } - - UnrefInternal(rep); -} - // Return the depth of a node static int Depth(const CordRep* rep) { - if (rep->tag == CONCAT) { + if (rep->IsConcat()) { return rep->concat()->depth(); } else { return 0; @@ -409,17 +158,19 @@ static void SetConcatChildren(CordRepConcat* concat, CordRep* left, // The returned node has a refcount of 1. static CordRep* RawConcat(CordRep* left, CordRep* right) { // Avoid making degenerate concat nodes (one child is empty) - if (left == nullptr || left->length == 0) { - Unref(left); + if (left == nullptr) return right; + if (right == nullptr) return left; + if (left->length == 0) { + CordRep::Unref(left); return right; } - if (right == nullptr || right->length == 0) { - Unref(right); + if (right->length == 0) { + CordRep::Unref(right); return left; } CordRepConcat* rep = new CordRepConcat(); - rep->tag = CONCAT; + rep->tag = cord_internal::CONCAT; SetConcatChildren(rep, left, right); return rep; @@ -453,35 +204,41 @@ static CordRep* MakeBalancedTree(CordRep** reps, size_t n) { return reps[0]; } -// Create a new flat node. -static CordRep* NewFlat(size_t length_hint) { - if (length_hint <= kMinFlatLength) { - length_hint = kMinFlatLength; - } else if (length_hint > kMaxFlatLength) { - length_hint = kMaxFlatLength; - } +static CordRepFlat* CreateFlat(const char* data, size_t length, + size_t alloc_hint) { + CordRepFlat* flat = CordRepFlat::New(length + alloc_hint); + flat->length = length; + memcpy(flat->Data(), data, length); + return flat; +} - // Round size up so it matches a size we can exactly express in a tag. - const size_t size = RoundUpForTag(length_hint + kFlatOverhead); - void* const raw_rep = ::operator new(size); - CordRep* rep = new (raw_rep) CordRep(); - rep->tag = AllocatedSizeToTag(size); - return VerifyTree(rep); +// Creates a new flat or Btree out of the specified array. +// The returned node has a refcount of 1. +static CordRep* NewBtree(const char* data, size_t length, size_t alloc_hint) { + if (length <= kMaxFlatLength) { + return CreateFlat(data, length, alloc_hint); + } + CordRepFlat* flat = CreateFlat(data, kMaxFlatLength, 0); + data += kMaxFlatLength; + length -= kMaxFlatLength; + auto* root = CordRepBtree::Create(flat); + return CordRepBtree::Append(root, {data, length}, alloc_hint); } // Create a new tree out of the specified array. // The returned node has a refcount of 1. -static CordRep* NewTree(const char* data, - size_t length, - size_t alloc_hint) { +static CordRep* NewTree(const char* data, size_t length, size_t alloc_hint) { if (length == 0) return nullptr; + if (btree_enabled()) { + return NewBtree(data, length, alloc_hint); + } absl::FixedArray<CordRep*> reps((length - 1) / kMaxFlatLength + 1); size_t n = 0; do { const size_t len = std::min(length, kMaxFlatLength); - CordRep* rep = NewFlat(len + alloc_hint); + CordRepFlat* rep = CordRepFlat::New(len + alloc_hint); rep->length = len; - memcpy(rep->data, data, len); + memcpy(rep->Data(), data, len); reps[n++] = VerifyTree(rep); data += len; length -= len; @@ -491,18 +248,12 @@ static CordRep* NewTree(const char* data, namespace cord_internal { -ExternalRepReleaserPair NewExternalWithUninitializedReleaser( - absl::string_view data, ExternalReleaserInvoker invoker, - size_t releaser_size) { +void InitializeCordRepExternal(absl::string_view data, CordRepExternal* rep) { assert(!data.empty()); - - void* raw_rep = AllocateExternal(releaser_size); - auto* rep = new (raw_rep) CordRepExternal(); rep->length = data.size(); rep->tag = EXTERNAL; rep->base = data.data(); - rep->releaser_invoker = invoker; - return {VerifyTree(rep), GetExternalReleaser(rep)}; + VerifyTree(rep); } } // namespace cord_internal @@ -510,87 +261,155 @@ ExternalRepReleaserPair NewExternalWithUninitializedReleaser( static CordRep* NewSubstring(CordRep* child, size_t offset, size_t length) { // Never create empty substring nodes if (length == 0) { - Unref(child); + CordRep::Unref(child); return nullptr; } else { CordRepSubstring* rep = new CordRepSubstring(); assert((offset + length) <= child->length); rep->length = length; - rep->tag = SUBSTRING; + rep->tag = cord_internal::SUBSTRING; rep->start = offset; rep->child = child; return VerifyTree(rep); } } +// Creates a CordRep from the provided string. If the string is large enough, +// and not wasteful, we move the string into an external cord rep, preserving +// the already allocated string contents. +// Requires the provided string length to be larger than `kMaxInline`. +static CordRep* CordRepFromString(std::string&& src) { + assert(src.length() > cord_internal::kMaxInline); + if ( + // String is short: copy data to avoid external block overhead. + src.size() <= kMaxBytesToCopy || + // String is wasteful: copy data to avoid pinning too much unused memory. + src.size() < src.capacity() / 2 + ) { + return NewTree(src.data(), src.size(), 0); + } + + struct StringReleaser { + void operator()(absl::string_view /* data */) {} + std::string data; + }; + const absl::string_view original_data = src; + auto* rep = + static_cast<::absl::cord_internal::CordRepExternalImpl<StringReleaser>*>( + absl::cord_internal::NewExternalRep(original_data, + StringReleaser{std::move(src)})); + // Moving src may have invalidated its data pointer, so adjust it. + rep->base = rep->template get<0>().data.data(); + return rep; +} + // -------------------------------------------------------------------- // Cord::InlineRep functions -// This will trigger LNK2005 in MSVC. -#ifndef COMPILER_MSVC -const unsigned char Cord::InlineRep::kMaxInline; -#endif // COMPILER_MSVC +constexpr unsigned char Cord::InlineRep::kMaxInline; inline void Cord::InlineRep::set_data(const char* data, size_t n, bool nullify_tail) { static_assert(kMaxInline == 15, "set_data is hard-coded for a length of 15"); - cord_internal::SmallMemmove(data_, data, n, nullify_tail); - data_[kMaxInline] = static_cast<char>(n); + cord_internal::SmallMemmove(data_.as_chars(), data, n, nullify_tail); + set_inline_size(n); } inline char* Cord::InlineRep::set_data(size_t n) { assert(n <= kMaxInline); - memset(data_, 0, sizeof(data_)); - data_[kMaxInline] = static_cast<char>(n); - return data_; -} - -inline CordRep* Cord::InlineRep::force_tree(size_t extra_hint) { - size_t len = data_[kMaxInline]; - CordRep* result; - if (len > kMaxInline) { - memcpy(&result, data_, sizeof(result)); - } else { - result = NewFlat(len + extra_hint); - result->length = len; - memcpy(result->data, data_, len); - set_tree(result); - } - return result; + ResetToEmpty(); + set_inline_size(n); + return data_.as_chars(); } inline void Cord::InlineRep::reduce_size(size_t n) { - size_t tag = data_[kMaxInline]; + size_t tag = inline_size(); assert(tag <= kMaxInline); assert(tag >= n); tag -= n; - memset(data_ + tag, 0, n); - data_[kMaxInline] = static_cast<char>(tag); + memset(data_.as_chars() + tag, 0, n); + set_inline_size(static_cast<char>(tag)); } inline void Cord::InlineRep::remove_prefix(size_t n) { - cord_internal::SmallMemmove(data_, data_ + n, data_[kMaxInline] - n); + cord_internal::SmallMemmove(data_.as_chars(), data_.as_chars() + n, + inline_size() - n); reduce_size(n); } -void Cord::InlineRep::AppendTree(CordRep* tree) { - if (tree == nullptr) return; - size_t len = data_[kMaxInline]; - if (len == 0) { - set_tree(tree); +// Returns `rep` converted into a CordRepBtree. +// Directly returns `rep` if `rep` is already a CordRepBtree. +static CordRepBtree* ForceBtree(CordRep* rep) { + return rep->IsBtree() ? rep->btree() : CordRepBtree::Create(rep); +} + +void Cord::InlineRep::AppendTreeToInlined(CordRep* tree, + MethodIdentifier method) { + assert(!is_tree()); + if (!data_.is_empty()) { + CordRepFlat* flat = MakeFlatWithExtraCapacity(0); + if (btree_enabled()) { + tree = CordRepBtree::Append(CordRepBtree::Create(flat), tree); + } else { + tree = Concat(flat, tree); + } + } + EmplaceTree(tree, method); +} + +void Cord::InlineRep::AppendTreeToTree(CordRep* tree, MethodIdentifier method) { + assert(is_tree()); + const CordzUpdateScope scope(data_.cordz_info(), method); + if (btree_enabled()) { + tree = CordRepBtree::Append(ForceBtree(data_.as_tree()), tree); } else { - set_tree(Concat(force_tree(0), tree)); + tree = Concat(data_.as_tree(), tree); } + SetTree(tree, scope); } -void Cord::InlineRep::PrependTree(CordRep* tree) { +void Cord::InlineRep::AppendTree(CordRep* tree, MethodIdentifier method) { if (tree == nullptr) return; - size_t len = data_[kMaxInline]; - if (len == 0) { - set_tree(tree); + if (data_.is_tree()) { + AppendTreeToTree(tree, method); } else { - set_tree(Concat(tree, force_tree(0))); + AppendTreeToInlined(tree, method); + } +} + +void Cord::InlineRep::PrependTreeToInlined(CordRep* tree, + MethodIdentifier method) { + assert(!is_tree()); + if (!data_.is_empty()) { + CordRepFlat* flat = MakeFlatWithExtraCapacity(0); + if (btree_enabled()) { + tree = CordRepBtree::Prepend(CordRepBtree::Create(flat), tree); + } else { + tree = Concat(tree, flat); + } + } + EmplaceTree(tree, method); +} + +void Cord::InlineRep::PrependTreeToTree(CordRep* tree, + MethodIdentifier method) { + assert(is_tree()); + const CordzUpdateScope scope(data_.cordz_info(), method); + if (btree_enabled()) { + tree = CordRepBtree::Prepend(ForceBtree(data_.as_tree()), tree); + } else { + tree = Concat(tree, data_.as_tree()); + } + SetTree(tree, scope); +} + +void Cord::InlineRep::PrependTree(CordRep* tree, MethodIdentifier method) { + assert(tree != nullptr); + if (data_.is_tree()) { + PrependTreeToTree(tree, method); + } else { + PrependTreeToInlined(tree, method); } } @@ -600,20 +419,29 @@ void Cord::InlineRep::PrependTree(CordRep* tree) { // written to region and the actual size increase will be written to size. static inline bool PrepareAppendRegion(CordRep* root, char** region, size_t* size, size_t max_length) { + if (root->IsBtree() && root->refcount.IsMutable()) { + Span<char> span = root->btree()->GetAppendBuffer(max_length); + if (!span.empty()) { + *region = span.data(); + *size = span.size(); + return true; + } + } + // Search down the right-hand path for a non-full FLAT node. CordRep* dst = root; - while (dst->tag == CONCAT && dst->refcount.IsOne()) { + while (dst->IsConcat() && dst->refcount.IsMutable()) { dst = dst->concat()->right; } - if (dst->tag < FLAT || !dst->refcount.IsOne()) { + if (!dst->IsFlat() || !dst->refcount.IsMutable()) { *region = nullptr; *size = 0; return false; } const size_t in_use = dst->length; - const size_t capacity = TagToLength(dst->tag); + const size_t capacity = dst->flat()->Capacity(); if (in_use == capacity) { *region = nullptr; *size = 0; @@ -628,214 +456,281 @@ static inline bool PrepareAppendRegion(CordRep* root, char** region, } dst->length += size_increase; - *region = dst->data + in_use; + *region = dst->flat()->Data() + in_use; *size = size_increase; return true; } +template <bool has_length> void Cord::InlineRep::GetAppendRegion(char** region, size_t* size, - size_t max_length) { - if (max_length == 0) { - *region = nullptr; - *size = 0; - return; - } + size_t length) { + auto constexpr method = CordzUpdateTracker::kGetAppendRegion; - // Try to fit in the inline buffer if possible. - size_t inline_length = data_[kMaxInline]; - if (inline_length < kMaxInline && max_length <= kMaxInline - inline_length) { - *region = data_ + inline_length; - *size = max_length; - data_[kMaxInline] = static_cast<char>(inline_length + max_length); - return; + CordRep* root = tree(); + size_t sz = root ? root->length : inline_size(); + if (root == nullptr) { + size_t available = kMaxInline - sz; + if (available >= (has_length ? length : 1)) { + *region = data_.as_chars() + sz; + *size = has_length ? length : available; + set_inline_size(has_length ? sz + length : kMaxInline); + return; + } } - CordRep* root = force_tree(max_length); - - if (PrepareAppendRegion(root, region, size, max_length)) { + size_t extra = has_length ? length : (std::max)(sz, kMinFlatLength); + CordRep* rep = root ? root : MakeFlatWithExtraCapacity(extra); + CordzUpdateScope scope(root ? data_.cordz_info() : nullptr, method); + if (PrepareAppendRegion(rep, region, size, length)) { + CommitTree(root, rep, scope, method); return; } // Allocate new node. - CordRep* new_node = - NewFlat(std::max(static_cast<size_t>(root->length), max_length)); - new_node->length = - std::min(static_cast<size_t>(TagToLength(new_node->tag)), max_length); - *region = new_node->data; + CordRepFlat* new_node = CordRepFlat::New(extra); + new_node->length = std::min(new_node->Capacity(), length); + *region = new_node->Data(); *size = new_node->length; - replace_tree(Concat(root, new_node)); -} - -void Cord::InlineRep::GetAppendRegion(char** region, size_t* size) { - const size_t max_length = std::numeric_limits<size_t>::max(); - // Try to fit in the inline buffer if possible. - size_t inline_length = data_[kMaxInline]; - if (inline_length < kMaxInline) { - *region = data_ + inline_length; - *size = kMaxInline - inline_length; - data_[kMaxInline] = kMaxInline; - return; + if (btree_enabled()) { + rep = CordRepBtree::Append(ForceBtree(rep), new_node); + } else { + rep = Concat(rep, new_node); } + CommitTree(root, rep, scope, method); +} - CordRep* root = force_tree(max_length); - - if (PrepareAppendRegion(root, region, size, max_length)) { - return; +// Computes the memory side of the provided edge which must be a valid data edge +// for a btrtee, i.e., a FLAT, EXTERNAL or SUBSTRING of a FLAT or EXTERNAL node. +static bool RepMemoryUsageDataEdge(const CordRep* rep, + size_t* total_mem_usage) { + size_t maybe_sub_size = 0; + if (ABSL_PREDICT_FALSE(rep->IsSubstring())) { + maybe_sub_size = sizeof(cord_internal::CordRepSubstring); + rep = rep->substring()->child; } - - // Allocate new node. - CordRep* new_node = NewFlat(root->length); - new_node->length = TagToLength(new_node->tag); - *region = new_node->data; - *size = new_node->length; - replace_tree(Concat(root, new_node)); + if (rep->IsFlat()) { + *total_mem_usage += maybe_sub_size + rep->flat()->AllocatedSize(); + return true; + } + if (rep->IsExternal()) { + // We don't know anything about the embedded / bound data, but we can safely + // assume it is 'at least' a word / pointer to data. In the future we may + // choose to use the 'data' byte as a tag to identify the types of some + // well-known externals, such as a std::string instance. + *total_mem_usage += maybe_sub_size + + sizeof(cord_internal::CordRepExternalImpl<intptr_t>) + + rep->length; + return true; + } + return false; } // If the rep is a leaf, this will increment the value at total_mem_usage and // will return true. static bool RepMemoryUsageLeaf(const CordRep* rep, size_t* total_mem_usage) { - if (rep->tag >= FLAT) { - *total_mem_usage += TagToAllocatedSize(rep->tag); + if (rep->IsFlat()) { + *total_mem_usage += rep->flat()->AllocatedSize(); return true; } - if (rep->tag == EXTERNAL) { - *total_mem_usage += sizeof(CordRepConcat) + rep->length; + if (rep->IsExternal()) { + // We don't know anything about the embedded / bound data, but we can safely + // assume it is 'at least' a word / pointer to data. In the future we may + // choose to use the 'data' byte as a tag to identify the types of some + // well-known externals, such as a std::string instance. + *total_mem_usage += + sizeof(cord_internal::CordRepExternalImpl<intptr_t>) + rep->length; return true; } return false; } void Cord::InlineRep::AssignSlow(const Cord::InlineRep& src) { - ClearSlow(); + assert(&src != this); + assert(is_tree() || src.is_tree()); + auto constexpr method = CordzUpdateTracker::kAssignCord; + if (ABSL_PREDICT_TRUE(!is_tree())) { + EmplaceTree(CordRep::Ref(src.as_tree()), src.data_, method); + return; + } - memcpy(data_, src.data_, sizeof(data_)); - if (is_tree()) { - Ref(tree()); + CordRep* tree = as_tree(); + if (CordRep* src_tree = src.tree()) { + // Leave any existing `cordz_info` in place, and let MaybeTrackCord() + // decide if this cord should be (or remains to be) sampled or not. + data_.set_tree(CordRep::Ref(src_tree)); + CordzInfo::MaybeTrackCord(data_, src.data_, method); + } else { + CordzInfo::MaybeUntrackCord(data_.cordz_info()); + data_ = src.data_; } + CordRep::Unref(tree); } -void Cord::InlineRep::ClearSlow() { +void Cord::InlineRep::UnrefTree() { if (is_tree()) { - Unref(tree()); + CordzInfo::MaybeUntrackCord(data_.cordz_info()); + CordRep::Unref(tree()); } - memset(data_, 0, sizeof(data_)); } // -------------------------------------------------------------------- // Constructors and destructors -Cord::Cord(const Cord& src) : contents_(src.contents_) { - Ref(contents_.tree()); // Does nothing if contents_ has embedded data -} - -Cord::Cord(absl::string_view src) { +Cord::Cord(absl::string_view src, MethodIdentifier method) + : contents_(InlineData::kDefaultInit) { const size_t n = src.size(); if (n <= InlineRep::kMaxInline) { - contents_.set_data(src.data(), n, false); + contents_.set_data(src.data(), n, true); + } else { + CordRep* rep = NewTree(src.data(), n, 0); + contents_.EmplaceTree(rep, method); + } +} + +template <typename T, Cord::EnableIfString<T>> +Cord::Cord(T&& src) : contents_(InlineData::kDefaultInit) { + if (src.size() <= InlineRep::kMaxInline) { + contents_.set_data(src.data(), src.size(), true); } else { - contents_.set_tree(NewTree(src.data(), n, 0)); + CordRep* rep = CordRepFromString(std::forward<T>(src)); + contents_.EmplaceTree(rep, CordzUpdateTracker::kConstructorString); } } +template Cord::Cord(std::string&& src); + // The destruction code is separate so that the compiler can determine // that it does not need to call the destructor on a moved-from Cord. void Cord::DestroyCordSlow() { - Unref(VerifyTree(contents_.tree())); + assert(contents_.is_tree()); + CordzInfo::MaybeUntrackCord(contents_.cordz_info()); + CordRep::Unref(VerifyTree(contents_.as_tree())); } // -------------------------------------------------------------------- // Mutators void Cord::Clear() { - Unref(contents_.clear()); + if (CordRep* tree = contents_.clear()) { + CordRep::Unref(tree); + } } -Cord& Cord::operator=(absl::string_view src) { +Cord& Cord::AssignLargeString(std::string&& src) { + auto constexpr method = CordzUpdateTracker::kAssignString; + assert(src.size() > kMaxBytesToCopy); + CordRep* rep = CordRepFromString(std::move(src)); + if (CordRep* tree = contents_.tree()) { + CordzUpdateScope scope(contents_.cordz_info(), method); + contents_.SetTree(rep, scope); + CordRep::Unref(tree); + } else { + contents_.EmplaceTree(rep, method); + } + return *this; +} +Cord& Cord::operator=(absl::string_view src) { + auto constexpr method = CordzUpdateTracker::kAssignString; const char* data = src.data(); size_t length = src.size(); CordRep* tree = contents_.tree(); if (length <= InlineRep::kMaxInline) { - // Embed into this->contents_ + // Embed into this->contents_, which is somewhat subtle: + // - MaybeUntrackCord must be called before Unref(tree). + // - MaybeUntrackCord must be called before set_data() clobbers cordz_info. + // - set_data() must be called before Unref(tree) as it may reference tree. + if (tree != nullptr) CordzInfo::MaybeUntrackCord(contents_.cordz_info()); contents_.set_data(data, length, true); - Unref(tree); + if (tree != nullptr) CordRep::Unref(tree); return *this; } - if (tree != nullptr && tree->tag >= FLAT && - TagToLength(tree->tag) >= length && tree->refcount.IsOne()) { - // Copy in place if the existing FLAT node is reusable. - memmove(tree->data, data, length); - tree->length = length; - VerifyTree(tree); - return *this; + if (tree != nullptr) { + CordzUpdateScope scope(contents_.cordz_info(), method); + if (tree->IsFlat() && tree->flat()->Capacity() >= length && + tree->refcount.IsMutable()) { + // Copy in place if the existing FLAT node is reusable. + memmove(tree->flat()->Data(), data, length); + tree->length = length; + VerifyTree(tree); + return *this; + } + contents_.SetTree(NewTree(data, length, 0), scope); + CordRep::Unref(tree); + } else { + contents_.EmplaceTree(NewTree(data, length, 0), method); } - contents_.set_tree(NewTree(data, length, 0)); - Unref(tree); return *this; } // TODO(sanjay): Move to Cord::InlineRep section of file. For now, // we keep it here to make diffs easier. -void Cord::InlineRep::AppendArray(const char* src_data, size_t src_size) { - if (src_size == 0) return; // memcpy(_, nullptr, 0) is undefined. - // Try to fit in the inline buffer if possible. - size_t inline_length = data_[kMaxInline]; - if (inline_length < kMaxInline && src_size <= kMaxInline - inline_length) { - // Append new data to embedded array - data_[kMaxInline] = static_cast<char>(inline_length + src_size); - memcpy(data_ + inline_length, src_data, src_size); - return; - } - - CordRep* root = tree(); +void Cord::InlineRep::AppendArray(absl::string_view src, + MethodIdentifier method) { + if (src.empty()) return; // memcpy(_, nullptr, 0) is undefined. size_t appended = 0; - if (root) { + CordRep* rep = tree(); + const CordRep* const root = rep; + CordzUpdateScope scope(root ? cordz_info() : nullptr, method); + if (root != nullptr) { char* region; - if (PrepareAppendRegion(root, ®ion, &appended, src_size)) { - memcpy(region, src_data, appended); + if (PrepareAppendRegion(rep, ®ion, &appended, src.size())) { + memcpy(region, src.data(), appended); } } else { - // It is possible that src_data == data_, but when we transition from an - // InlineRep to a tree we need to assign data_ = root via set_tree. To - // avoid corrupting the source data before we copy it, delay calling - // set_tree until after we've copied data. - // We are going from an inline size to beyond inline size. Make the new size - // either double the inlined size, or the added size + 10%. - const size_t size1 = inline_length * 2 + src_size; - const size_t size2 = inline_length + src_size / 10; - root = NewFlat(std::max<size_t>(size1, size2)); - appended = std::min(src_size, TagToLength(root->tag) - inline_length); - memcpy(root->data, data_, inline_length); - memcpy(root->data + inline_length, src_data, appended); - root->length = inline_length + appended; - set_tree(root); - } - - src_data += appended; - src_size -= appended; - if (src_size == 0) { + // Try to fit in the inline buffer if possible. + size_t inline_length = inline_size(); + if (src.size() <= kMaxInline - inline_length) { + // Append new data to embedded array + memcpy(data_.as_chars() + inline_length, src.data(), src.size()); + set_inline_size(inline_length + src.size()); + return; + } + + // Allocate flat to be a perfect fit on first append exceeding inlined size. + // Subsequent growth will use amortized growth until we reach maximum flat + // size. + rep = CordRepFlat::New(inline_length + src.size()); + appended = std::min(src.size(), rep->flat()->Capacity() - inline_length); + memcpy(rep->flat()->Data(), data_.as_chars(), inline_length); + memcpy(rep->flat()->Data() + inline_length, src.data(), appended); + rep->length = inline_length + appended; + } + + src.remove_prefix(appended); + if (src.empty()) { + CommitTree(root, rep, scope, method); return; } - // Use new block(s) for any remaining bytes that were not handled above. - // Alloc extra memory only if the right child of the root of the new tree is - // going to be a FLAT node, which will permit further inplace appends. - size_t length = src_size; - if (src_size < kMaxFlatLength) { - // The new length is either - // - old size + 10% - // - old_size + src_size - // This will cause a reasonable conservative step-up in size that is still - // large enough to avoid excessive amounts of small fragments being added. - length = std::max<size_t>(root->length / 10, src_size); + if (btree_enabled()) { + // TODO(b/192061034): keep legacy 10% growth rate: consider other rates. + rep = ForceBtree(rep); + const size_t min_growth = std::max<size_t>(rep->length / 10, src.size()); + rep = CordRepBtree::Append(rep->btree(), src, min_growth - src.size()); + } else { + // Use new block(s) for any remaining bytes that were not handled above. + // Alloc extra memory only if the right child of the root of the new tree + // is going to be a FLAT node, which will permit further inplace appends. + size_t length = src.size(); + if (src.size() < kMaxFlatLength) { + // The new length is either + // - old size + 10% + // - old_size + src.size() + // This will cause a reasonable conservative step-up in size that is + // still large enough to avoid excessive amounts of small fragments + // being added. + length = std::max<size_t>(rep->length / 10, src.size()); + } + rep = Concat(rep, NewTree(src.data(), src.size(), length - src.size())); } - set_tree(Concat(root, NewTree(src_data, src_size, length - src_size))); + CommitTree(root, rep, scope, method); } inline CordRep* Cord::TakeRep() const& { - return Ref(contents_.tree()); + return CordRep::Ref(contents_.tree()); } inline CordRep* Cord::TakeRep() && { @@ -846,10 +741,17 @@ inline CordRep* Cord::TakeRep() && { template <typename C> inline void Cord::AppendImpl(C&& src) { + auto constexpr method = CordzUpdateTracker::kAppendCord; if (empty()) { - // In case of an empty destination avoid allocating a new node, do not copy - // data. - *this = std::forward<C>(src); + // Since destination is empty, we can avoid allocating a node, + if (src.contents_.is_tree()) { + // by taking the tree directly + CordRep* rep = std::forward<C>(src).TakeRep(); + contents_.EmplaceTree(rep, method); + } else { + // or copying over inline data + contents_.data_ = src.contents_.data_; + } return; } @@ -859,12 +761,12 @@ inline void Cord::AppendImpl(C&& src) { CordRep* src_tree = src.contents_.tree(); if (src_tree == nullptr) { // src has embedded data. - contents_.AppendArray(src.contents_.data(), src_size); + contents_.AppendArray({src.contents_.data(), src_size}, method); return; } - if (src_tree->tag >= FLAT) { + if (src_tree->IsFlat()) { // src tree just has one flat node. - contents_.AppendArray(src_tree->data, src_size); + contents_.AppendArray({src_tree->flat()->Data(), src_size}, method); return; } if (&src == this) { @@ -879,18 +781,36 @@ inline void Cord::AppendImpl(C&& src) { return; } - contents_.AppendTree(std::forward<C>(src).TakeRep()); + // Guaranteed to be a tree (kMaxBytesToCopy > kInlinedSize) + CordRep* rep = std::forward<C>(src).TakeRep(); + contents_.AppendTree(rep, CordzUpdateTracker::kAppendCord); +} + +void Cord::Append(const Cord& src) { + AppendImpl(src); } -void Cord::Append(const Cord& src) { AppendImpl(src); } +void Cord::Append(Cord&& src) { + AppendImpl(std::move(src)); +} -void Cord::Append(Cord&& src) { AppendImpl(std::move(src)); } +template <typename T, Cord::EnableIfString<T>> +void Cord::Append(T&& src) { + if (src.size() <= kMaxBytesToCopy) { + Append(absl::string_view(src)); + } else { + CordRep* rep = CordRepFromString(std::forward<T>(src)); + contents_.AppendTree(rep, CordzUpdateTracker::kAppendString); + } +} + +template void Cord::Append(std::string&& src); void Cord::Prepend(const Cord& src) { CordRep* src_tree = src.contents_.tree(); if (src_tree != nullptr) { - Ref(src_tree); - contents_.PrependTree(src_tree); + CordRep::Ref(src_tree); + contents_.PrependTree(src_tree, CordzUpdateTracker::kPrependCord); return; } @@ -899,28 +819,42 @@ void Cord::Prepend(const Cord& src) { return Prepend(src_contents); } -void Cord::Prepend(absl::string_view src) { +void Cord::PrependArray(absl::string_view src, MethodIdentifier method) { if (src.empty()) return; // memcpy(_, nullptr, 0) is undefined. - size_t cur_size = contents_.size(); - if (!contents_.is_tree() && cur_size + src.size() <= InlineRep::kMaxInline) { - // Use embedded storage. - char data[InlineRep::kMaxInline + 1] = {0}; - data[InlineRep::kMaxInline] = cur_size + src.size(); // set size - memcpy(data, src.data(), src.size()); - memcpy(data + src.size(), contents_.data(), cur_size); - memcpy(reinterpret_cast<void*>(&contents_), data, - InlineRep::kMaxInline + 1); + if (!contents_.is_tree()) { + size_t cur_size = contents_.inline_size(); + if (cur_size + src.size() <= InlineRep::kMaxInline) { + // Use embedded storage. + char data[InlineRep::kMaxInline + 1] = {0}; + memcpy(data, src.data(), src.size()); + memcpy(data + src.size(), contents_.data(), cur_size); + memcpy(contents_.data_.as_chars(), data, InlineRep::kMaxInline + 1); + contents_.set_inline_size(cur_size + src.size()); + return; + } + } + CordRep* rep = NewTree(src.data(), src.size(), 0); + contents_.PrependTree(rep, method); +} + +template <typename T, Cord::EnableIfString<T>> +inline void Cord::Prepend(T&& src) { + if (src.size() <= kMaxBytesToCopy) { + Prepend(absl::string_view(src)); } else { - contents_.PrependTree(NewTree(src.data(), src.size(), 0)); + CordRep* rep = CordRepFromString(std::forward<T>(src)); + contents_.PrependTree(rep, CordzUpdateTracker::kPrependString); } } +template void Cord::Prepend(std::string&& src); + static CordRep* RemovePrefixFrom(CordRep* node, size_t n) { if (n >= node->length) return nullptr; - if (n == 0) return Ref(node); + if (n == 0) return CordRep::Ref(node); absl::InlinedVector<CordRep*, kInlinedVectorSize> rhs_stack; - while (node->tag == CONCAT) { + while (node->IsConcat()) { assert(n <= node->length); if (n < node->concat()->left->length) { // Push right to stack, descend left. @@ -935,19 +869,19 @@ static CordRep* RemovePrefixFrom(CordRep* node, size_t n) { assert(n <= node->length); if (n == 0) { - Ref(node); + CordRep::Ref(node); } else { size_t start = n; size_t len = node->length - n; - if (node->tag == SUBSTRING) { + if (node->IsSubstring()) { // Consider in-place update of node, similar to in RemoveSuffixFrom(). start += node->substring()->start; node = node->substring()->child; } - node = NewSubstring(Ref(node), start, len); + node = NewSubstring(CordRep::Ref(node), start, len); } while (!rhs_stack.empty()) { - node = Concat(node, Ref(rhs_stack.back())); + node = Concat(node, CordRep::Ref(rhs_stack.back())); rhs_stack.pop_back(); } return node; @@ -958,11 +892,11 @@ static CordRep* RemovePrefixFrom(CordRep* node, size_t n) { // edited in place iff that node and all its ancestors have a refcount of 1. static CordRep* RemoveSuffixFrom(CordRep* node, size_t n) { if (n >= node->length) return nullptr; - if (n == 0) return Ref(node); + if (n == 0) return CordRep::Ref(node); absl::InlinedVector<CordRep*, kInlinedVectorSize> lhs_stack; - bool inplace_ok = node->refcount.IsOne(); + bool inplace_ok = node->refcount.IsMutable(); - while (node->tag == CONCAT) { + while (node->IsConcat()) { assert(n <= node->length); if (n < node->concat()->right->length) { // Push left to stack, descend right. @@ -973,28 +907,28 @@ static CordRep* RemoveSuffixFrom(CordRep* node, size_t n) { n -= node->concat()->right->length; node = node->concat()->left; } - inplace_ok = inplace_ok && node->refcount.IsOne(); + inplace_ok = inplace_ok && node->refcount.IsMutable(); } assert(n <= node->length); if (n == 0) { - Ref(node); - } else if (inplace_ok && node->tag != EXTERNAL) { + CordRep::Ref(node); + } else if (inplace_ok && !node->IsExternal()) { // Consider making a new buffer if the current node capacity is much // larger than the new length. - Ref(node); + CordRep::Ref(node); node->length -= n; } else { size_t start = 0; size_t len = node->length - n; - if (node->tag == SUBSTRING) { + if (node->IsSubstring()) { start = node->substring()->start; node = node->substring()->child; } - node = NewSubstring(Ref(node), start, len); + node = NewSubstring(CordRep::Ref(node), start, len); } while (!lhs_stack.empty()) { - node = Concat(Ref(lhs_stack.back()), node); + node = Concat(CordRep::Ref(lhs_stack.back()), node); lhs_stack.pop_back(); } return node; @@ -1008,9 +942,18 @@ void Cord::RemovePrefix(size_t n) { if (tree == nullptr) { contents_.remove_prefix(n); } else { - CordRep* newrep = RemovePrefixFrom(tree, n); - Unref(tree); - contents_.replace_tree(VerifyTree(newrep)); + auto constexpr method = CordzUpdateTracker::kRemovePrefix; + CordzUpdateScope scope(contents_.cordz_info(), method); + if (tree->IsBtree()) { + CordRep* old = tree; + tree = tree->btree()->SubTree(n, tree->length - n); + CordRep::Unref(old); + } else { + CordRep* newrep = RemovePrefixFrom(tree, n); + CordRep::Unref(tree); + tree = VerifyTree(newrep); + } + contents_.SetTreeOrEmpty(tree, scope); } } @@ -1022,9 +965,16 @@ void Cord::RemoveSuffix(size_t n) { if (tree == nullptr) { contents_.reduce_size(n); } else { - CordRep* newrep = RemoveSuffixFrom(tree, n); - Unref(tree); - contents_.replace_tree(VerifyTree(newrep)); + auto constexpr method = CordzUpdateTracker::kRemoveSuffix; + CordzUpdateScope scope(contents_.cordz_info(), method); + if (tree->IsBtree()) { + tree = CordRepBtree::RemoveSuffix(tree->btree(), n); + } else { + CordRep* newrep = RemoveSuffixFrom(tree, n); + CordRep::Unref(tree); + tree = VerifyTree(newrep); + } + contents_.SetTreeOrEmpty(tree, scope); } } @@ -1056,13 +1006,13 @@ static CordRep* NewSubRange(CordRep* node, size_t pos, size_t n) { results.pop_back(); results.push_back(Concat(left, right)); } else if (pos == 0 && n == node->length) { - results.push_back(Ref(node)); - } else if (node->tag != CONCAT) { - if (node->tag == SUBSTRING) { + results.push_back(CordRep::Ref(node)); + } else if (!node->IsConcat()) { + if (node->IsSubstring()) { pos += node->substring()->start; node = node->substring()->child; } - results.push_back(NewSubstring(Ref(node), pos, n)); + results.push_back(NewSubstring(CordRep::Ref(node), pos, n)); } else if (pos + n <= node->concat()->left->length) { todo.push_back(SubRange(node->concat()->left, pos, n)); } else if (pos >= node->concat()->left->length) { @@ -1084,17 +1034,20 @@ Cord Cord::Subcord(size_t pos, size_t new_size) const { size_t length = size(); if (pos > length) pos = length; if (new_size > length - pos) new_size = length - pos; + if (new_size == 0) return sub_cord; + CordRep* tree = contents_.tree(); if (tree == nullptr) { // sub_cord is newly constructed, no need to re-zero-out the tail of // contents_ memory. sub_cord.contents_.set_data(contents_.data() + pos, new_size, false); - } else if (new_size == 0) { - // We want to return empty subcord, so nothing to do. - } else if (new_size <= InlineRep::kMaxInline) { + return sub_cord; + } + + if (new_size <= InlineRep::kMaxInline) { + char* dest = sub_cord.contents_.data_.as_chars(); Cord::ChunkIterator it = chunk_begin(); it.AdvanceBytes(pos); - char* dest = sub_cord.contents_.data_; size_t remaining_size = new_size; while (remaining_size > it->size()) { cord_internal::SmallMemmove(dest, it->data(), it->size()); @@ -1103,10 +1056,17 @@ Cord Cord::Subcord(size_t pos, size_t new_size) const { ++it; } cord_internal::SmallMemmove(dest, it->data(), remaining_size); - sub_cord.contents_.data_[InlineRep::kMaxInline] = new_size; + sub_cord.contents_.set_inline_size(new_size); + return sub_cord; + } + + if (tree->IsBtree()) { + tree = tree->btree()->SubTree(pos, new_size); } else { - sub_cord.contents_.set_tree(NewSubRange(tree, pos, new_size)); + tree = NewSubRange(tree, pos, new_size); } + sub_cord.contents_.EmplaceTree(tree, contents_.data_, + CordzUpdateTracker::kSubCord); return sub_cord; } @@ -1125,7 +1085,7 @@ class CordForest { CordRep* node = pending.back(); pending.pop_back(); CheckNode(node); - if (ABSL_PREDICT_FALSE(node->tag != CONCAT)) { + if (ABSL_PREDICT_FALSE(!node->IsConcat())) { AddNode(node); continue; } @@ -1140,9 +1100,9 @@ class CordForest { concat_node->left = concat_freelist_; concat_freelist_ = concat_node; } else { - Ref(concat_node->right); - Ref(concat_node->left); - Unref(concat_node); + CordRep::Ref(concat_node->right); + CordRep::Ref(concat_node->left); + CordRep::Unref(concat_node); } } else { AddNode(node); @@ -1175,7 +1135,7 @@ class CordForest { void AddNode(CordRep* node) { CordRep* sum = nullptr; - // Collect together everything with which we will merge node + // Collect together everything with which we will merge with node int i = 0; for (; node->length > min_length[i + 1]; ++i) { auto& tree_at_i = trees_[i]; @@ -1219,7 +1179,7 @@ class CordForest { static void CheckNode(CordRep* node) { ABSL_INTERNAL_CHECK(node->length != 0u, ""); - if (node->tag == CONCAT) { + if (node->IsConcat()) { ABSL_INTERNAL_CHECK(node->concat()->left != nullptr, ""); ABSL_INTERNAL_CHECK(node->concat()->right != nullptr, ""); ABSL_INTERNAL_CHECK(node->length == (node->concat()->left->length + @@ -1239,7 +1199,7 @@ class CordForest { static CordRep* Rebalance(CordRep* node) { VerifyTree(node); - assert(node->tag == CONCAT); + assert(node->IsConcat()); if (node->length == 0) { return nullptr; @@ -1289,25 +1249,33 @@ bool ComputeCompareResult<bool>(int memcmp_res) { } // namespace -// Helper routine. Locates the first flat chunk of the Cord without -// initializing the iterator. +// Helper routine. Locates the first flat or external chunk of the Cord without +// initializing the iterator, and returns a string_view referencing the data. inline absl::string_view Cord::InlineRep::FindFlatStartPiece() const { - size_t n = data_[kMaxInline]; - if (n <= kMaxInline) { - return absl::string_view(data_, n); + if (!is_tree()) { + return absl::string_view(data_.as_chars(), data_.inline_size()); } CordRep* node = tree(); - if (node->tag >= FLAT) { - return absl::string_view(node->data, node->length); + if (node->IsFlat()) { + return absl::string_view(node->flat()->Data(), node->length); } - if (node->tag == EXTERNAL) { + if (node->IsExternal()) { return absl::string_view(node->external()->base, node->length); } + if (node->IsBtree()) { + CordRepBtree* tree = node->btree(); + int height = tree->height(); + while (--height >= 0) { + tree = tree->Edge(CordRepBtree::kFront)->btree(); + } + return tree->Data(tree->begin()); + } + // Walk down the left branches until we hit a non-CONCAT node. - while (node->tag == CONCAT) { + while (node->IsConcat()) { node = node->concat()->left; } @@ -1316,16 +1284,16 @@ inline absl::string_view Cord::InlineRep::FindFlatStartPiece() const { size_t length = node->length; assert(length != 0); - if (node->tag == SUBSTRING) { + if (node->IsSubstring()) { offset = node->substring()->start; node = node->substring()->child; } - if (node->tag >= FLAT) { - return absl::string_view(node->data + offset, length); + if (node->IsFlat()) { + return absl::string_view(node->flat()->Data() + offset, length); } - assert((node->tag == EXTERNAL) && "Expect FLAT or EXTERNAL node here"); + assert(node->IsExternal() && "Expect FLAT or EXTERNAL node here"); return absl::string_view(node->external()->base + offset, length); } @@ -1505,48 +1473,47 @@ void Cord::CopyToArraySlowPath(char* dst) const { } } -Cord::ChunkIterator& Cord::ChunkIterator::operator++() { - assert(bytes_remaining_ > 0 && "Attempted to iterate past `end()`"); - assert(bytes_remaining_ >= current_chunk_.size()); - bytes_remaining_ -= current_chunk_.size(); - - if (stack_of_right_children_.empty()) { +Cord::ChunkIterator& Cord::ChunkIterator::AdvanceStack() { + auto& stack_of_right_children = stack_of_right_children_; + if (stack_of_right_children.empty()) { assert(!current_chunk_.empty()); // Called on invalid iterator. // We have reached the end of the Cord. return *this; } // Process the next node on the stack. - CordRep* node = stack_of_right_children_.back(); - stack_of_right_children_.pop_back(); + CordRep* node = stack_of_right_children.back(); + stack_of_right_children.pop_back(); // Walk down the left branches until we hit a non-CONCAT node. Save the // right children to the stack for subsequent traversal. - while (node->tag == CONCAT) { - stack_of_right_children_.push_back(node->concat()->right); + while (node->IsConcat()) { + stack_of_right_children.push_back(node->concat()->right); node = node->concat()->left; } // Get the child node if we encounter a SUBSTRING. size_t offset = 0; size_t length = node->length; - if (node->tag == SUBSTRING) { + if (node->IsSubstring()) { offset = node->substring()->start; node = node->substring()->child; } - assert(node->tag == EXTERNAL || node->tag >= FLAT); + assert(node->IsExternal() || node->IsFlat()); assert(length != 0); const char* data = - node->tag == EXTERNAL ? node->external()->base : node->data; + node->IsExternal() ? node->external()->base : node->flat()->Data(); current_chunk_ = absl::string_view(data + offset, length); current_leaf_ = node; return *this; } Cord Cord::ChunkIterator::AdvanceAndReadBytes(size_t n) { - assert(bytes_remaining_ >= n && "Attempted to iterate past `end()`"); + ABSL_HARDENING_ASSERT(bytes_remaining_ >= n && + "Attempted to iterate past `end()`"); Cord subcord; + auto constexpr method = CordzUpdateTracker::kCordReader; if (n <= InlineRep::kMaxInline) { // Range to read fits in inline data. Flatten it. @@ -1565,14 +1532,34 @@ Cord Cord::ChunkIterator::AdvanceAndReadBytes(size_t n) { } return subcord; } + + if (btree_reader_) { + size_t chunk_size = current_chunk_.size(); + if (n <= chunk_size && n <= kMaxBytesToCopy) { + subcord = Cord(current_chunk_.substr(0, n), method); + if (n < chunk_size) { + current_chunk_.remove_prefix(n); + } else { + current_chunk_ = btree_reader_.Next(); + } + } else { + CordRep* rep; + current_chunk_ = btree_reader_.Read(n, chunk_size, rep); + subcord.contents_.EmplaceTree(rep, method); + } + bytes_remaining_ -= n; + return subcord; + } + + auto& stack_of_right_children = stack_of_right_children_; if (n < current_chunk_.size()) { // Range to read is a proper subrange of the current chunk. assert(current_leaf_ != nullptr); - CordRep* subnode = Ref(current_leaf_); - const char* data = - subnode->tag == EXTERNAL ? subnode->external()->base : subnode->data; + CordRep* subnode = CordRep::Ref(current_leaf_); + const char* data = subnode->IsExternal() ? subnode->external()->base + : subnode->flat()->Data(); subnode = NewSubstring(subnode, current_chunk_.data() - data, n); - subcord.contents_.set_tree(VerifyTree(subnode)); + subcord.contents_.EmplaceTree(VerifyTree(subnode), method); RemoveChunkPrefix(n); return subcord; } @@ -1580,10 +1567,10 @@ Cord Cord::ChunkIterator::AdvanceAndReadBytes(size_t n) { // Range to read begins with a proper subrange of the current chunk. assert(!current_chunk_.empty()); assert(current_leaf_ != nullptr); - CordRep* subnode = Ref(current_leaf_); + CordRep* subnode = CordRep::Ref(current_leaf_); if (current_chunk_.size() < subnode->length) { - const char* data = - subnode->tag == EXTERNAL ? subnode->external()->base : subnode->data; + const char* data = subnode->IsExternal() ? subnode->external()->base + : subnode->flat()->Data(); subnode = NewSubstring(subnode, current_chunk_.data() - data, current_chunk_.size()); } @@ -1593,20 +1580,20 @@ Cord Cord::ChunkIterator::AdvanceAndReadBytes(size_t n) { // Process the next node(s) on the stack, reading whole subtrees depending on // their length and how many bytes we are advancing. CordRep* node = nullptr; - while (!stack_of_right_children_.empty()) { - node = stack_of_right_children_.back(); - stack_of_right_children_.pop_back(); + while (!stack_of_right_children.empty()) { + node = stack_of_right_children.back(); + stack_of_right_children.pop_back(); if (node->length > n) break; // TODO(qrczak): This might unnecessarily recreate existing concat nodes. // Avoiding that would need pretty complicated logic (instead of - // current_leaf_, keep current_subtree_ which points to the highest node + // current_leaf, keep current_subtree_ which points to the highest node // such that the current leaf can be found on the path of left children // starting from current_subtree_; delay creating subnode while node is // below current_subtree_; find the proper node along the path of left // children starting from current_subtree_ if this loop exits while staying // below current_subtree_; etc.; alternatively, push parents instead of // right children on the stack). - subnode = Concat(subnode, Ref(node)); + subnode = Concat(subnode, CordRep::Ref(node)); n -= node->length; bytes_remaining_ -= node->length; node = nullptr; @@ -1615,20 +1602,20 @@ Cord Cord::ChunkIterator::AdvanceAndReadBytes(size_t n) { if (node == nullptr) { // We have reached the end of the Cord. assert(bytes_remaining_ == 0); - subcord.contents_.set_tree(VerifyTree(subnode)); + subcord.contents_.EmplaceTree(VerifyTree(subnode), method); return subcord; } // Walk down the appropriate branches until we hit a non-CONCAT node. Save the // right children to the stack for subsequent traversal. - while (node->tag == CONCAT) { + while (node->IsConcat()) { if (node->concat()->left->length > n) { // Push right, descend left. - stack_of_right_children_.push_back(node->concat()->right); + stack_of_right_children.push_back(node->concat()->right); node = node->concat()->left; } else { // Read left, descend right. - subnode = Concat(subnode, Ref(node->concat()->left)); + subnode = Concat(subnode, CordRep::Ref(node->concat()->left)); n -= node->concat()->left->length; bytes_remaining_ -= node->concat()->left->length; node = node->concat()->right; @@ -1638,22 +1625,24 @@ Cord Cord::ChunkIterator::AdvanceAndReadBytes(size_t n) { // Get the child node if we encounter a SUBSTRING. size_t offset = 0; size_t length = node->length; - if (node->tag == SUBSTRING) { + if (node->IsSubstring()) { offset = node->substring()->start; node = node->substring()->child; } // Range to read ends with a proper (possibly empty) subrange of the current // chunk. - assert(node->tag == EXTERNAL || node->tag >= FLAT); + assert(node->IsExternal() || node->IsFlat()); assert(length > n); - if (n > 0) subnode = Concat(subnode, NewSubstring(Ref(node), offset, n)); + if (n > 0) { + subnode = Concat(subnode, NewSubstring(CordRep::Ref(node), offset, n)); + } const char* data = - node->tag == EXTERNAL ? node->external()->base : node->data; + node->IsExternal() ? node->external()->base : node->flat()->Data(); current_chunk_ = absl::string_view(data + offset + n, length - n); current_leaf_ = node; bytes_remaining_ -= n; - subcord.contents_.set_tree(VerifyTree(subnode)); + subcord.contents_.EmplaceTree(VerifyTree(subnode), method); return subcord; } @@ -1665,12 +1654,19 @@ void Cord::ChunkIterator::AdvanceBytesSlowPath(size_t n) { n -= current_chunk_.size(); bytes_remaining_ -= current_chunk_.size(); + if (stack_of_right_children_.empty()) { + // We have reached the end of the Cord. + assert(bytes_remaining_ == 0); + return; + } + // Process the next node(s) on the stack, skipping whole subtrees depending on // their length and how many bytes we are advancing. CordRep* node = nullptr; - while (!stack_of_right_children_.empty()) { - node = stack_of_right_children_.back(); - stack_of_right_children_.pop_back(); + auto& stack_of_right_children = stack_of_right_children_; + while (!stack_of_right_children.empty()) { + node = stack_of_right_children.back(); + stack_of_right_children.pop_back(); if (node->length > n) break; n -= node->length; bytes_remaining_ -= node->length; @@ -1685,10 +1681,10 @@ void Cord::ChunkIterator::AdvanceBytesSlowPath(size_t n) { // Walk down the appropriate branches until we hit a non-CONCAT node. Save the // right children to the stack for subsequent traversal. - while (node->tag == CONCAT) { + while (node->IsConcat()) { if (node->concat()->left->length > n) { // Push right, descend left. - stack_of_right_children_.push_back(node->concat()->right); + stack_of_right_children.push_back(node->concat()->right); node = node->concat()->left; } else { // Skip left, descend right. @@ -1701,22 +1697,22 @@ void Cord::ChunkIterator::AdvanceBytesSlowPath(size_t n) { // Get the child node if we encounter a SUBSTRING. size_t offset = 0; size_t length = node->length; - if (node->tag == SUBSTRING) { + if (node->IsSubstring()) { offset = node->substring()->start; node = node->substring()->child; } - assert(node->tag == EXTERNAL || node->tag >= FLAT); + assert(node->IsExternal() || node->IsFlat()); assert(length > n); const char* data = - node->tag == EXTERNAL ? node->external()->base : node->data; + node->IsExternal() ? node->external()->base : node->flat()->Data(); current_chunk_ = absl::string_view(data + offset + n, length - n); current_leaf_ = node; bytes_remaining_ -= n; } char Cord::operator[](size_t i) const { - assert(i < size()); + ABSL_HARDENING_ASSERT(i < size()); size_t offset = i; const CordRep* rep = contents_.tree(); if (rep == nullptr) { @@ -1725,13 +1721,15 @@ char Cord::operator[](size_t i) const { while (true) { assert(rep != nullptr); assert(offset < rep->length); - if (rep->tag >= FLAT) { + if (rep->IsFlat()) { // Get the "i"th character directly from the flat array. - return rep->data[offset]; - } else if (rep->tag == EXTERNAL) { + return rep->flat()->Data()[offset]; + } else if (rep->IsBtree()) { + return rep->btree()->GetCharacter(offset); + } else if (rep->IsExternal()) { // Get the "i"th character from the external array. return rep->external()->base[offset]; - } else if (rep->tag == CONCAT) { + } else if (rep->IsConcat()) { // Recursively branch to the side of the concatenation that the "i"th // character is on. size_t left_length = rep->concat()->left->length; @@ -1743,7 +1741,7 @@ char Cord::operator[](size_t i) const { } } else { // This must be a substring a node, so bypass it to get to the child. - assert(rep->tag == SUBSTRING); + assert(rep->IsSubstring()); offset += rep->substring()->start; rep = rep->substring()->child; } @@ -1751,6 +1749,7 @@ char Cord::operator[](size_t i) const { } absl::string_view Cord::FlattenSlowPath() { + assert(contents_.is_tree()); size_t total_size = size(); CordRep* new_rep; char* new_buffer; @@ -1758,9 +1757,9 @@ absl::string_view Cord::FlattenSlowPath() { // Try to put the contents into a new flat rep. If they won't fit in the // biggest possible flat node, use an external rep instead. if (total_size <= kMaxFlatLength) { - new_rep = NewFlat(total_size); + new_rep = CordRepFlat::New(total_size); new_rep->length = total_size; - new_buffer = new_rep->data; + new_buffer = new_rep->flat()->Data(); CopyToArraySlowPath(new_buffer); } else { new_buffer = std::allocator<char>().allocate(total_size); @@ -1771,29 +1770,35 @@ absl::string_view Cord::FlattenSlowPath() { s.size()); }); } - Unref(contents_.tree()); - contents_.set_tree(new_rep); + CordzUpdateScope scope(contents_.cordz_info(), CordzUpdateTracker::kFlatten); + CordRep::Unref(contents_.as_tree()); + contents_.SetTree(new_rep, scope); return absl::string_view(new_buffer, total_size); } /* static */ bool Cord::GetFlatAux(CordRep* rep, absl::string_view* fragment) { assert(rep != nullptr); - if (rep->tag >= FLAT) { - *fragment = absl::string_view(rep->data, rep->length); + if (rep->IsFlat()) { + *fragment = absl::string_view(rep->flat()->Data(), rep->length); return true; - } else if (rep->tag == EXTERNAL) { + } else if (rep->IsExternal()) { *fragment = absl::string_view(rep->external()->base, rep->length); return true; - } else if (rep->tag == SUBSTRING) { + } else if (rep->IsBtree()) { + return rep->btree()->IsFlat(fragment); + } else if (rep->IsSubstring()) { CordRep* child = rep->substring()->child; - if (child->tag >= FLAT) { - *fragment = - absl::string_view(child->data + rep->substring()->start, rep->length); + if (child->IsFlat()) { + *fragment = absl::string_view( + child->flat()->Data() + rep->substring()->start, rep->length); return true; - } else if (child->tag == EXTERNAL) { + } else if (child->IsExternal()) { *fragment = absl::string_view( child->external()->base + rep->substring()->start, rep->length); return true; + } else if (child->IsBtree()) { + return child->btree()->IsFlat(rep->substring()->start, rep->length, + fragment); } } return false; @@ -1802,6 +1807,15 @@ absl::string_view Cord::FlattenSlowPath() { /* static */ void Cord::ForEachChunkAux( absl::cord_internal::CordRep* rep, absl::FunctionRef<void(absl::string_view)> callback) { + if (rep->IsBtree()) { + ChunkIterator it(rep), end; + while (it != end) { + callback(*it); + ++it; + } + return; + } + assert(rep != nullptr); int stack_pos = 0; constexpr int stack_max = 128; @@ -1809,7 +1823,7 @@ absl::string_view Cord::FlattenSlowPath() { absl::cord_internal::CordRep* stack[stack_max]; absl::cord_internal::CordRep* current_node = rep; while (true) { - if (current_node->tag == CONCAT) { + if (current_node->IsConcat()) { if (stack_pos == stack_max) { // There's no more room on our stack array to add another right branch, // and the idea is to avoid allocations, so call this function @@ -1843,9 +1857,9 @@ absl::string_view Cord::FlattenSlowPath() { } } -static void DumpNode(CordRep* rep, bool include_data, std::ostream* os) { +static void DumpNode(CordRep* rep, bool include_data, std::ostream* os, + int indent) { const int kIndentStep = 1; - int indent = 0; absl::InlinedVector<CordRep*, kInlinedVectorSize> stack; absl::InlinedVector<int, kInlinedVectorSize> indents; for (;;) { @@ -1856,27 +1870,29 @@ static void DumpNode(CordRep* rep, bool include_data, std::ostream* os) { *os << "]"; *os << " " << (IsRootBalanced(rep) ? 'b' : 'u'); *os << " " << std::setw(indent) << ""; - if (rep->tag == CONCAT) { + if (rep->IsConcat()) { *os << "CONCAT depth=" << Depth(rep) << "\n"; indent += kIndentStep; indents.push_back(indent); stack.push_back(rep->concat()->right); rep = rep->concat()->left; - } else if (rep->tag == SUBSTRING) { + } else if (rep->IsSubstring()) { *os << "SUBSTRING @ " << rep->substring()->start << "\n"; indent += kIndentStep; rep = rep->substring()->child; - } else { // Leaf - if (rep->tag == EXTERNAL) { + } else { // Leaf or ring + if (rep->IsExternal()) { *os << "EXTERNAL ["; if (include_data) *os << absl::CEscape(std::string(rep->external()->base, rep->length)); *os << "]\n"; - } else { - *os << "FLAT cap=" << TagToLength(rep->tag) << " ["; + } else if (rep->IsFlat()) { + *os << "FLAT cap=" << rep->flat()->Capacity() << " ["; if (include_data) - *os << absl::CEscape(std::string(rep->data, rep->length)); + *os << absl::CEscape(std::string(rep->flat()->Data(), rep->length)); *os << "]\n"; + } else { + CordRepBtree::Dump(rep, /*label=*/ "", include_data, *os); } if (stack.empty()) break; rep = stack.back(); @@ -1908,7 +1924,7 @@ static bool VerifyNode(CordRep* root, CordRep* start_node, ABSL_INTERNAL_CHECK(node->length != 0, ReportError(root, node)); } - if (node->tag == CONCAT) { + if (node->IsConcat()) { ABSL_INTERNAL_CHECK(node->concat()->left != nullptr, ReportError(root, node)); ABSL_INTERNAL_CHECK(node->concat()->right != nullptr, @@ -1920,13 +1936,13 @@ static bool VerifyNode(CordRep* root, CordRep* start_node, worklist.push_back(node->concat()->right); worklist.push_back(node->concat()->left); } - } else if (node->tag >= FLAT) { - ABSL_INTERNAL_CHECK(node->length <= TagToLength(node->tag), + } else if (node->IsFlat()) { + ABSL_INTERNAL_CHECK(node->length <= node->flat()->Capacity(), ReportError(root, node)); - } else if (node->tag == EXTERNAL) { + } else if (node->IsExternal()) { ABSL_INTERNAL_CHECK(node->external()->base != nullptr, ReportError(root, node)); - } else if (node->tag == SUBSTRING) { + } else if (node->IsSubstring()) { ABSL_INTERNAL_CHECK( node->substring()->start < node->substring()->child->length, ReportError(root, node)); @@ -1955,7 +1971,7 @@ static bool VerifyNode(CordRep* root, CordRep* start_node, while (true) { const CordRep* next_node = nullptr; - if (cur_node->tag == CONCAT) { + if (cur_node->IsConcat()) { total_mem_usage += sizeof(CordRepConcat); const CordRep* left = cur_node->concat()->left; if (!RepMemoryUsageLeaf(left, &total_mem_usage)) { @@ -1969,9 +1985,21 @@ static bool VerifyNode(CordRep* root, CordRep* start_node, } next_node = right; } + } else if (cur_node->IsBtree()) { + total_mem_usage += sizeof(CordRepBtree); + const CordRepBtree* node = cur_node->btree(); + if (node->height() == 0) { + for (const CordRep* edge : node->Edges()) { + RepMemoryUsageDataEdge(edge, &total_mem_usage); + } + } else { + for (const CordRep* edge : node->Edges()) { + tree_stack.push_back(edge); + } + } } else { // Since cur_node is not a leaf or a concat node it must be a substring. - assert(cur_node->tag == SUBSTRING); + assert(cur_node->IsSubstring()); total_mem_usage += sizeof(CordRepSubstring); next_node = cur_node->substring()->child; if (RepMemoryUsageLeaf(next_node, &total_mem_usage)) { @@ -1998,14 +2026,14 @@ std::ostream& operator<<(std::ostream& out, const Cord& cord) { } namespace strings_internal { -size_t CordTestAccess::FlatOverhead() { return kFlatOverhead; } -size_t CordTestAccess::MaxFlatLength() { return kMaxFlatLength; } +size_t CordTestAccess::FlatOverhead() { return cord_internal::kFlatOverhead; } +size_t CordTestAccess::MaxFlatLength() { return cord_internal::kMaxFlatLength; } size_t CordTestAccess::FlatTagToLength(uint8_t tag) { - return TagToLength(tag); + return cord_internal::TagToLength(tag); } uint8_t CordTestAccess::LengthToTag(size_t s) { ABSL_INTERNAL_CHECK(s <= kMaxFlatLength, absl::StrCat("Invalid length ", s)); - return AllocatedSizeToTag(s + kFlatOverhead); + return cord_internal::AllocatedSizeToTag(s + cord_internal::kFlatOverhead); } size_t CordTestAccess::SizeofCordRepConcat() { return sizeof(CordRepConcat); } size_t CordTestAccess::SizeofCordRepExternal() { diff --git a/third_party/abseil-cpp/absl/strings/cord.h b/third_party/abseil-cpp/absl/strings/cord.h index 40566cbaa0..f0a1991471 100644 --- a/third_party/abseil-cpp/absl/strings/cord.h +++ b/third_party/abseil-cpp/absl/strings/cord.h @@ -11,25 +11,52 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. - -// A Cord is a sequence of characters with some unusual access propreties. -// A Cord supports efficient insertions and deletions at the start and end of -// the byte sequence, but random access reads are slower, and random access -// modifications are not supported by the API. Cord also provides cheap copies -// (using a copy-on-write strategy) and cheap substring operations. // -// Thread safety -// ------------- +// ----------------------------------------------------------------------------- +// File: cord.h +// ----------------------------------------------------------------------------- +// +// This file defines the `absl::Cord` data structure and operations on that data +// structure. A Cord is a string-like sequence of characters optimized for +// specific use cases. Unlike a `std::string`, which stores an array of +// contiguous characters, Cord data is stored in a structure consisting of +// separate, reference-counted "chunks." (Currently, this implementation is a +// tree structure, though that implementation may change.) +// +// Because a Cord consists of these chunks, data can be added to or removed from +// a Cord during its lifetime. Chunks may also be shared between Cords. Unlike a +// `std::string`, a Cord can therefore accommodate data that changes over its +// lifetime, though it's not quite "mutable"; it can change only in the +// attachment, detachment, or rearrangement of chunks of its constituent data. +// +// A Cord provides some benefit over `std::string` under the following (albeit +// narrow) circumstances: +// +// * Cord data is designed to grow and shrink over a Cord's lifetime. Cord +// provides efficient insertions and deletions at the start and end of the +// character sequences, avoiding copies in those cases. Static data should +// generally be stored as strings. +// * External memory consisting of string-like data can be directly added to +// a Cord without requiring copies or allocations. +// * Cord data may be shared and copied cheaply. Cord provides a copy-on-write +// implementation and cheap sub-Cord operations. Copying a Cord is an O(1) +// operation. +// +// As a consequence to the above, Cord data is generally large. Small data +// should generally use strings, as construction of a Cord requires some +// overhead. Small Cords (<= 15 bytes) are represented inline, but most small +// Cords are expected to grow over their lifetimes. +// +// Note that because a Cord is made up of separate chunked data, random access +// to character data within a Cord is slower than within a `std::string`. +// +// Thread Safety +// // Cord has the same thread-safety properties as many other types like // std::string, std::vector<>, int, etc -- it is thread-compatible. In -// particular, if no thread may call a non-const method, then it is safe to -// concurrently call const methods. Copying a Cord produces a new instance that -// can be used concurrently with the original in arbitrary ways. -// -// Implementation is similar to the "Ropes" described in: -// Ropes: An alternative to strings -// Hans J. Boehm, Russ Atkinson, Michael Plass -// Software Practice and Experience, December 1995 +// particular, if threads do not call non-const methods, then it is safe to call +// const methods without synchronization. Copying a Cord produces a new instance +// that can be used concurrently with the original in arbitrary ways. #ifndef ABSL_STRINGS_CORD_H_ #define ABSL_STRINGS_CORD_H_ @@ -38,12 +65,13 @@ #include <cstddef> #include <cstdint> #include <cstring> -#include <iostream> +#include <iosfwd> #include <iterator> #include <string> +#include <type_traits> +#include "absl/base/config.h" #include "absl/base/internal/endian.h" -#include "absl/base/internal/invoke.h" #include "absl/base/internal/per_thread_tls.h" #include "absl/base/macros.h" #include "absl/base/port.h" @@ -51,8 +79,18 @@ #include "absl/functional/function_ref.h" #include "absl/meta/type_traits.h" #include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/cord_rep_btree.h" +#include "absl/strings/internal/cord_rep_btree_reader.h" +#include "absl/strings/internal/cord_rep_ring.h" +#include "absl/strings/internal/cordz_functions.h" +#include "absl/strings/internal/cordz_info.h" +#include "absl/strings/internal/cordz_statistics.h" +#include "absl/strings/internal/cordz_update_scope.h" +#include "absl/strings/internal/cordz_update_tracker.h" #include "absl/strings/internal/resize_uninitialized.h" +#include "absl/strings/internal/string_constant.h" #include "absl/strings/string_view.h" +#include "absl/types/optional.h" namespace absl { ABSL_NAMESPACE_BEGIN @@ -61,12 +99,35 @@ class CordTestPeer; template <typename Releaser> Cord MakeCordFromExternal(absl::string_view, Releaser&&); void CopyCordToString(const Cord& src, std::string* dst); -namespace hash_internal { -template <typename H> -H HashFragmentedCord(H, const Cord&); -} -// A Cord is a sequence of characters. +// Cord +// +// A Cord is a sequence of characters, designed to be more efficient than a +// `std::string` in certain circumstances: namely, large string data that needs +// to change over its lifetime or shared, especially when such data is shared +// across API boundaries. +// +// A Cord stores its character data in a structure that allows efficient prepend +// and append operations. This makes a Cord useful for large string data sent +// over in a wire format that may need to be prepended or appended at some point +// during the data exchange (e.g. HTTP, protocol buffers). For example, a +// Cord is useful for storing an HTTP request, and prepending an HTTP header to +// such a request. +// +// Cords should not be used for storing general string data, however. They +// require overhead to construct and are slower than strings for random access. +// +// The Cord API provides the following common API operations: +// +// * Create or assign Cords out of existing string data, memory, or other Cords +// * Append and prepend data to an existing Cord +// * Create new Sub-Cords from existing Cord data +// * Swap Cord data and compare Cord equality +// * Write out Cord data by constructing a `std::string` +// +// Additionally, the API provides iterator utilities to iterate through Cord +// data via chunks or character bytes. +// class Cord { private: template <typename T> @@ -74,51 +135,53 @@ class Cord { absl::enable_if_t<std::is_same<T, std::string>::value, int>; public: - // -------------------------------------------------------------------- - // Constructors, destructors and helper factories + // Cord::Cord() Constructors. - // Create an empty cord + // Creates an empty Cord. constexpr Cord() noexcept; - // Cord is copyable and efficiently movable. - // The moved-from state is valid but unspecified. + // Creates a Cord from an existing Cord. Cord is copyable and efficiently + // movable. The moved-from state is valid but unspecified. Cord(const Cord& src); Cord(Cord&& src) noexcept; Cord& operator=(const Cord& x); Cord& operator=(Cord&& x) noexcept; - // Create a cord out of "src". This constructor is explicit on - // purpose so that people do not get automatic type conversions. + // Creates a Cord from a `src` string. This constructor is marked explicit to + // prevent implicit Cord constructions from arguments convertible to an + // `absl::string_view`. explicit Cord(absl::string_view src); Cord& operator=(absl::string_view src); - // These are templated to avoid ambiguities for types that are convertible to - // both `absl::string_view` and `std::string`, such as `const char*`. - // - // Note that these functions reserve the right to reuse the `string&&`'s - // memory and that they will do so in the future. + // Creates a Cord from a `std::string&&` rvalue. These constructors are + // templated to avoid ambiguities for types that are convertible to both + // `absl::string_view` and `std::string`, such as `const char*`. template <typename T, EnableIfString<T> = 0> - explicit Cord(T&& src) : Cord(absl::string_view(src)) {} + explicit Cord(T&& src); template <typename T, EnableIfString<T> = 0> Cord& operator=(T&& src); - // Destroy the cord + // Cord::~Cord() + // + // Destructs the Cord. ~Cord() { if (contents_.is_tree()) DestroyCordSlow(); } - // Creates a Cord that takes ownership of external memory. The contents of - // `data` are not copied. + // MakeCordFromExternal() + // + // Creates a Cord that takes ownership of external string memory. The + // contents of `data` are not copied to the Cord; instead, the external + // memory is added to the Cord and reference-counted. This data may not be + // changed for the life of the Cord, though it may be prepended or appended + // to. // - // This function takes a callable that is invoked when all Cords are - // finished with `data`. The data must remain live and unchanging until the - // releaser is called. The requirements for the releaser are that it: - // * is move constructible, - // * supports `void operator()(absl::string_view) const`, - // * does not have alignment requirement greater than what is guaranteed by - // ::operator new. This is dictated by alignof(std::max_align_t) before - // C++17 and __STDCPP_DEFAULT_NEW_ALIGNMENT__ if compiling with C++17 or - // it is supported by the implementation. + // `MakeCordFromExternal()` takes a callable "releaser" that is invoked when + // the reference count for `data` reaches zero. As noted above, this data must + // remain live until the releaser is invoked. The callable releaser also must: + // + // * be move constructible + // * support `void operator()(absl::string_view) const` or `void operator()` // // Example: // @@ -127,13 +190,13 @@ class Cord { // FillBlock(block); // return absl::MakeCordFromExternal( // block->ToStringView(), - // [pool, block](absl::string_view /*ignored*/) { - // pool->FreeBlock(block); + // [pool, block](absl::string_view v) { + // pool->FreeBlock(block, v); // }); // } // - // WARNING: It's likely a bug if your releaser doesn't do anything. - // For example, consider the following: + // WARNING: Because a Cord can be reference-counted, it's likely a bug if your + // releaser doesn't do anything. For example, consider the following: // // void Foo(const char* buffer, int len) { // auto c = absl::MakeCordFromExternal(absl::string_view(buffer, len), @@ -147,97 +210,141 @@ class Cord { template <typename Releaser> friend Cord MakeCordFromExternal(absl::string_view data, Releaser&& releaser); - // -------------------------------------------------------------------- - // Mutations - + // Cord::Clear() + // + // Releases the Cord data. Any nodes that share data with other Cords, if + // applicable, will have their reference counts reduced by 1. void Clear(); + // Cord::Append() + // + // Appends data to the Cord, which may come from another Cord or other string + // data. void Append(const Cord& src); void Append(Cord&& src); void Append(absl::string_view src); template <typename T, EnableIfString<T> = 0> void Append(T&& src); + // Cord::Prepend() + // + // Prepends data to the Cord, which may come from another Cord or other string + // data. void Prepend(const Cord& src); void Prepend(absl::string_view src); template <typename T, EnableIfString<T> = 0> void Prepend(T&& src); + // Cord::RemovePrefix() + // + // Removes the first `n` bytes of a Cord. void RemovePrefix(size_t n); void RemoveSuffix(size_t n); - // Returns a new cord representing the subrange [pos, pos + new_size) of + // Cord::Subcord() + // + // Returns a new Cord representing the subrange [pos, pos + new_size) of // *this. If pos >= size(), the result is empty(). If // (pos + new_size) >= size(), the result is the subrange [pos, size()). Cord Subcord(size_t pos, size_t new_size) const; - friend void swap(Cord& x, Cord& y) noexcept; + // Cord::swap() + // + // Swaps the contents of the Cord with `other`. + void swap(Cord& other) noexcept; - // -------------------------------------------------------------------- - // Accessors + // swap() + // + // Swaps the contents of two Cords. + friend void swap(Cord& x, Cord& y) noexcept { x.swap(y); } + // Cord::size() + // + // Returns the size of the Cord. size_t size() const; + + // Cord::empty() + // + // Determines whether the given Cord is empty, returning `true` is so. bool empty() const; - // Returns the approximate number of bytes pinned by this Cord. Note that - // Cords that share memory could each be "charged" independently for the same - // shared memory. + // Cord::EstimatedMemoryUsage() + // + // Returns the *approximate* number of bytes held in full or in part by this + // Cord (which may not remain the same between invocations). Note that Cords + // that share memory could each be "charged" independently for the same shared + // memory. size_t EstimatedMemoryUsage() const; - // -------------------------------------------------------------------- - // Comparators - - // Compares 'this' Cord with rhs. This function and its relatives - // treat Cords as sequences of unsigned bytes. The comparison is a - // straightforward lexicographic comparison. Return value: + // Cord::Compare() + // + // Compares 'this' Cord with rhs. This function and its relatives treat Cords + // as sequences of unsigned bytes. The comparison is a straightforward + // lexicographic comparison. `Cord::Compare()` returns values as follows: + // // -1 'this' Cord is smaller // 0 two Cords are equal // 1 'this' Cord is larger int Compare(absl::string_view rhs) const; int Compare(const Cord& rhs) const; - // Does 'this' cord start/end with rhs + // Cord::StartsWith() + // + // Determines whether the Cord starts with the passed string data `rhs`. bool StartsWith(const Cord& rhs) const; bool StartsWith(absl::string_view rhs) const; + + // Cord::EndsWith() + // + // Determines whether the Cord ends with the passed string data `rhs`. bool EndsWith(absl::string_view rhs) const; bool EndsWith(const Cord& rhs) const; - // -------------------------------------------------------------------- - // Conversion to other types - + // Cord::operator std::string() + // + // Converts a Cord into a `std::string()`. This operator is marked explicit to + // prevent unintended Cord usage in functions that take a string. explicit operator std::string() const; - // Copies the contents from `src` to `*dst`. + // CopyCordToString() // - // This function optimizes the case of reusing the destination std::string since it + // Copies the contents of a `src` Cord into a `*dst` string. + // + // This function optimizes the case of reusing the destination string since it // can reuse previously allocated capacity. However, this function does not // guarantee that pointers previously returned by `dst->data()` remain valid // even if `*dst` had enough capacity to hold `src`. If `*dst` is a new // object, prefer to simply use the conversion operator to `std::string`. friend void CopyCordToString(const Cord& src, std::string* dst); - // -------------------------------------------------------------------- - // Iteration - class CharIterator; - // Type for iterating over the chunks of a `Cord`. See comments for - // `Cord::chunk_begin()`, `Cord::chunk_end()` and `Cord::Chunks()` below for - // preferred usage. + //---------------------------------------------------------------------------- + // Cord::ChunkIterator + //---------------------------------------------------------------------------- + // + // A `Cord::ChunkIterator` allows iteration over the constituent chunks of its + // Cord. Such iteration allows you to perform non-const operatons on the data + // of a Cord without modifying it. + // + // Generally, you do not instantiate a `Cord::ChunkIterator` directly; + // instead, you create one implicitly through use of the `Cord::Chunks()` + // member function. + // + // The `Cord::ChunkIterator` has the following properties: // - // Additional notes: + // * The iterator is invalidated after any non-const operation on the + // Cord object over which it iterates. // * The `string_view` returned by dereferencing a valid, non-`end()` // iterator is guaranteed to be non-empty. - // * A `ChunkIterator` object is invalidated after any non-const - // operation on the `Cord` object over which it iterates. - // * Two `ChunkIterator` objects can be equality compared if and only if - // they remain valid and iterate over the same `Cord`. - // * This is a proxy iterator. This means the `string_view` returned by the - // iterator does not live inside the Cord, and its lifetime is limited to - // the lifetime of the iterator itself. To help prevent issues, - // `ChunkIterator::reference` is not a true reference type and is - // equivalent to `value_type`. - // * The iterator keeps state that can grow for `Cord`s that contain many + // * Two `ChunkIterator` objects can be compared equal if and only if they + // remain valid and iterate over the same Cord. + // * The iterator in this case is a proxy iterator; the `string_view` + // returned by the iterator does not live inside the Cord, and its + // lifetime is limited to the lifetime of the iterator itself. To help + // prevent lifetime issues, `ChunkIterator::reference` is not a true + // reference type and is equivalent to `value_type`. + // * The iterator keeps state that can grow for Cords that contain many // nodes and are imbalanced due to sharing. Prefer to pass this type by // const reference instead of by value. class ChunkIterator { @@ -261,14 +368,38 @@ class Cord { friend class CharIterator; private: + using CordRep = absl::cord_internal::CordRep; + using CordRepBtree = absl::cord_internal::CordRepBtree; + using CordRepBtreeReader = absl::cord_internal::CordRepBtreeReader; + + // Stack of right children of concat nodes that we have to visit. + // Keep this at the end of the structure to avoid cache-thrashing. + // TODO(jgm): Benchmark to see if there's a more optimal value than 47 for + // the inlined vector size (47 exists for backward compatibility). + using Stack = absl::InlinedVector<absl::cord_internal::CordRep*, 47>; + + // Constructs a `begin()` iterator from `tree`. `tree` must not be null. + explicit ChunkIterator(cord_internal::CordRep* tree); + // Constructs a `begin()` iterator from `cord`. explicit ChunkIterator(const Cord* cord); + // Initializes this instance from a tree. Invoked by constructors. + void InitTree(cord_internal::CordRep* tree); + // Removes `n` bytes from `current_chunk_`. Expects `n` to be smaller than // `current_chunk_.size()`. void RemoveChunkPrefix(size_t n); Cord AdvanceAndReadBytes(size_t n); void AdvanceBytes(size_t n); + + // Stack specific operator++ + ChunkIterator& AdvanceStack(); + + // Btree specific operator++ + ChunkIterator& AdvanceBtree(); + void AdvanceBytesBtree(size_t n); + // Iterates `n` bytes, where `n` is expected to be greater than or equal to // `current_chunk_.size()`. void AdvanceBytesSlowPath(size_t n); @@ -282,14 +413,21 @@ class Cord { absl::cord_internal::CordRep* current_leaf_ = nullptr; // The number of bytes left in the `Cord` over which we are iterating. size_t bytes_remaining_ = 0; - absl::InlinedVector<absl::cord_internal::CordRep*, 4> - stack_of_right_children_; + + // Cord reader for cord btrees. Empty if not traversing a btree. + CordRepBtreeReader btree_reader_; + + // See 'Stack' alias definition. + Stack stack_of_right_children_; }; + // Cord::ChunkIterator::chunk_begin() + // // Returns an iterator to the first chunk of the `Cord`. // - // This is useful for getting a `ChunkIterator` outside the context of a - // range-based for-loop (in which case see `Cord::Chunks()` below). + // Generally, prefer using `Cord::Chunks()` within a range-based for loop for + // iterating over the chunks of a Cord. This method may be useful for getting + // a `ChunkIterator` where range-based for-loops are not useful. // // Example: // @@ -298,15 +436,40 @@ class Cord { // return std::find(c.chunk_begin(), c.chunk_end(), s); // } ChunkIterator chunk_begin() const; + + // Cord::ChunkItertator::chunk_end() + // // Returns an iterator one increment past the last chunk of the `Cord`. + // + // Generally, prefer using `Cord::Chunks()` within a range-based for loop for + // iterating over the chunks of a Cord. This method may be useful for getting + // a `ChunkIterator` where range-based for-loops may not be available. ChunkIterator chunk_end() const; - // Convenience wrapper over `Cord::chunk_begin()` and `Cord::chunk_end()` to - // enable range-based for-loop iteration over `Cord` chunks. + //---------------------------------------------------------------------------- + // Cord::ChunkIterator::ChunkRange + //---------------------------------------------------------------------------- + // + // `ChunkRange` is a helper class for iterating over the chunks of the `Cord`, + // producing an iterator which can be used within a range-based for loop. + // Construction of a `ChunkRange` will return an iterator pointing to the + // first chunk of the Cord. Generally, do not construct a `ChunkRange` + // directly; instead, prefer to use the `Cord::Chunks()` method. // - // Prefer to use `Cord::Chunks()` below instead of constructing this directly. + // Implementation note: `ChunkRange` is simply a convenience wrapper over + // `Cord::chunk_begin()` and `Cord::chunk_end()`. class ChunkRange { public: + // Fulfill minimum c++ container requirements [container.requirements] + // Theses (partial) container type definitions allow ChunkRange to be used + // in various utilities expecting a subset of [container.requirements]. + // For example, the below enables using `::testing::ElementsAre(...)` + using value_type = absl::string_view; + using reference = value_type&; + using const_reference = const value_type&; + using iterator = ChunkIterator; + using const_iterator = ChunkIterator; + explicit ChunkRange(const Cord* cord) : cord_(cord) {} ChunkIterator begin() const; @@ -316,8 +479,11 @@ class Cord { const Cord* cord_; }; - // Returns a range for iterating over the chunks of a `Cord` with a - // range-based for-loop. + // Cord::Chunks() + // + // Returns a `Cord::ChunkIterator::ChunkRange` for iterating over the chunks + // of a `Cord` with a range-based for-loop. For most iteration tasks on a + // Cord, use `Cord::Chunks()` to retrieve this iterator. // // Example: // @@ -334,22 +500,30 @@ class Cord { // } ChunkRange Chunks() const; - // Type for iterating over the characters of a `Cord`. See comments for - // `Cord::char_begin()`, `Cord::char_end()` and `Cord::Chars()` below for - // preferred usage. + //---------------------------------------------------------------------------- + // Cord::CharIterator + //---------------------------------------------------------------------------- + // + // A `Cord::CharIterator` allows iteration over the constituent characters of + // a `Cord`. // - // Additional notes: - // * A `CharIterator` object is invalidated after any non-const - // operation on the `Cord` object over which it iterates. - // * Two `CharIterator` objects can be equality compared if and only if - // they remain valid and iterate over the same `Cord`. - // * The iterator keeps state that can grow for `Cord`s that contain many + // Generally, you do not instantiate a `Cord::CharIterator` directly; instead, + // you create one implicitly through use of the `Cord::Chars()` member + // function. + // + // A `Cord::CharIterator` has the following properties: + // + // * The iterator is invalidated after any non-const operation on the + // Cord object over which it iterates. + // * Two `CharIterator` objects can be compared equal if and only if they + // remain valid and iterate over the same Cord. + // * The iterator keeps state that can grow for Cords that contain many // nodes and are imbalanced due to sharing. Prefer to pass this type by // const reference instead of by value. - // * This type cannot be a forward iterator because a `Cord` can reuse - // sections of memory. This violates the requirement that if dereferencing - // two iterators returns the same object, the iterators must compare - // equal. + // * This type cannot act as a forward iterator because a `Cord` can reuse + // sections of memory. This fact violates the requirement for forward + // iterators to compare equal if dereferencing them returns the same + // object. class CharIterator { public: using iterator_category = std::input_iterator_tag; @@ -375,36 +549,68 @@ class Cord { ChunkIterator chunk_iterator_; }; - // Advances `*it` by `n_bytes` and returns the bytes passed as a `Cord`. + // Cord::CharIterator::AdvanceAndRead() // - // `n_bytes` must be less than or equal to the number of bytes remaining for - // iteration. Otherwise the behavior is undefined. It is valid to pass - // `char_end()` and 0. + // Advances the `Cord::CharIterator` by `n_bytes` and returns the bytes + // advanced as a separate `Cord`. `n_bytes` must be less than or equal to the + // number of bytes within the Cord; otherwise, behavior is undefined. It is + // valid to pass `char_end()` and `0`. static Cord AdvanceAndRead(CharIterator* it, size_t n_bytes); - // Advances `*it` by `n_bytes`. + // Cord::CharIterator::Advance() // - // `n_bytes` must be less than or equal to the number of bytes remaining for - // iteration. Otherwise the behavior is undefined. It is valid to pass - // `char_end()` and 0. + // Advances the `Cord::CharIterator` by `n_bytes`. `n_bytes` must be less than + // or equal to the number of bytes remaining within the Cord; otherwise, + // behavior is undefined. It is valid to pass `char_end()` and `0`. static void Advance(CharIterator* it, size_t n_bytes); + // Cord::CharIterator::ChunkRemaining() + // // Returns the longest contiguous view starting at the iterator's position. // // `it` must be dereferenceable. static absl::string_view ChunkRemaining(const CharIterator& it); + // Cord::CharIterator::char_begin() + // // Returns an iterator to the first character of the `Cord`. + // + // Generally, prefer using `Cord::Chars()` within a range-based for loop for + // iterating over the chunks of a Cord. This method may be useful for getting + // a `CharIterator` where range-based for-loops may not be available. CharIterator char_begin() const; + + // Cord::CharIterator::char_end() + // // Returns an iterator to one past the last character of the `Cord`. + // + // Generally, prefer using `Cord::Chars()` within a range-based for loop for + // iterating over the chunks of a Cord. This method may be useful for getting + // a `CharIterator` where range-based for-loops are not useful. CharIterator char_end() const; - // Convenience wrapper over `Cord::char_begin()` and `Cord::char_end()` to - // enable range-based for-loop iterator over the characters of a `Cord`. + // Cord::CharIterator::CharRange // - // Prefer to use `Cord::Chars()` below instead of constructing this directly. + // `CharRange` is a helper class for iterating over the characters of a + // producing an iterator which can be used within a range-based for loop. + // Construction of a `CharRange` will return an iterator pointing to the first + // character of the Cord. Generally, do not construct a `CharRange` directly; + // instead, prefer to use the `Cord::Chars()` method show below. + // + // Implementation note: `CharRange` is simply a convenience wrapper over + // `Cord::char_begin()` and `Cord::char_end()`. class CharRange { public: + // Fulfill minimum c++ container requirements [container.requirements] + // Theses (partial) container type definitions allow CharRange to be used + // in various utilities expecting a subset of [container.requirements]. + // For example, the below enables using `::testing::ElementsAre(...)` + using value_type = char; + using reference = value_type&; + using const_reference = const value_type&; + using iterator = CharIterator; + using const_iterator = CharIterator; + explicit CharRange(const Cord* cord) : cord_(cord) {} CharIterator begin() const; @@ -414,8 +620,11 @@ class Cord { const Cord* cord_; }; - // Returns a range for iterating over the characters of a `Cord` with a - // range-based for-loop. + // Cord::CharIterator::Chars() + // + // Returns a `Cord::CharIterator` for iterating over the characters of a + // `Cord` with a range-based for-loop. For most character-based iteration + // tasks on a Cord, use `Cord::Chars()` to retrieve this iterator. // // Example: // @@ -432,32 +641,73 @@ class Cord { // } CharRange Chars() const; - // -------------------------------------------------------------------- - // Miscellaneous - - // Get the "i"th character of 'this' and return it. - // NOTE: This routine is reasonably efficient. It is roughly - // logarithmic in the number of nodes that make up the cord. Still, - // if you need to iterate over the contents of a cord, you should - // use a CharIterator/CordIterator rather than call operator[] or Get() - // repeatedly in a loop. + // Cord::operator[] // - // REQUIRES: 0 <= i < size() + // Gets the "i"th character of the Cord and returns it, provided that + // 0 <= i < Cord.size(). + // + // NOTE: This routine is reasonably efficient. It is roughly + // logarithmic based on the number of chunks that make up the cord. Still, + // if you need to iterate over the contents of a cord, you should + // use a CharIterator/ChunkIterator rather than call operator[] or Get() + // repeatedly in a loop. char operator[](size_t i) const; + // Cord::TryFlat() + // + // If this cord's representation is a single flat array, returns a + // string_view referencing that array. Otherwise returns nullopt. + absl::optional<absl::string_view> TryFlat() const; + + // Cord::Flatten() + // // Flattens the cord into a single array and returns a view of the data. // // If the cord was already flat, the contents are not modified. absl::string_view Flatten(); + // Supports absl::Cord as a sink object for absl::Format(). + friend void AbslFormatFlush(absl::Cord* cord, absl::string_view part) { + cord->Append(part); + } + + template <typename H> + friend H AbslHashValue(H hash_state, const absl::Cord& c) { + absl::optional<absl::string_view> maybe_flat = c.TryFlat(); + if (maybe_flat.has_value()) { + return H::combine(std::move(hash_state), *maybe_flat); + } + return c.HashFragmented(std::move(hash_state)); + } + + // Create a Cord with the contents of StringConstant<T>::value. + // No allocations will be done and no data will be copied. + // This is an INTERNAL API and subject to change or removal. This API can only + // be used by spelling absl::strings_internal::MakeStringConstant, which is + // also an internal API. + template <typename T> + explicit constexpr Cord(strings_internal::StringConstant<T>); + private: + using CordRep = absl::cord_internal::CordRep; + using CordRepFlat = absl::cord_internal::CordRepFlat; + using CordzInfo = cord_internal::CordzInfo; + using CordzUpdateScope = cord_internal::CordzUpdateScope; + using CordzUpdateTracker = cord_internal::CordzUpdateTracker; + using InlineData = cord_internal::InlineData; + using MethodIdentifier = CordzUpdateTracker::MethodIdentifier; + + // Creates a cord instance with `method` representing the originating + // public API call causing the cord to be created. + explicit Cord(absl::string_view src, MethodIdentifier method); + friend class CordTestPeer; - template <typename H> - friend H absl::hash_internal::HashFragmentedCord(H, const Cord&); friend bool operator==(const Cord& lhs, const Cord& rhs); friend bool operator==(const Cord& lhs, absl::string_view rhs); - // Call the provided function once for each cord chunk, in order. Unlike + friend const CordzInfo* GetCordzInfoForTesting(const Cord& cord); + + // Calls the provided function once for each cord chunk, in order. Unlike // Chunks(), this API will not allocate memory. void ForEachChunk(absl::FunctionRef<void(absl::string_view)>) const; @@ -469,60 +719,92 @@ class Cord { // class so that we can isolate the bulk of cord.cc from changes // to the representation. // - // InlineRep holds either either a tree pointer, or an array of kMaxInline - // bytes. + // InlineRep holds either a tree pointer, or an array of kMaxInline bytes. class InlineRep { public: - static const unsigned char kMaxInline = 15; + static constexpr unsigned char kMaxInline = cord_internal::kMaxInline; static_assert(kMaxInline >= sizeof(absl::cord_internal::CordRep*), ""); - // Tag byte & kMaxInline means we are storing a pointer. - static const unsigned char kTreeFlag = 1 << 4; - // Tag byte & kProfiledFlag means we are profiling the Cord. - static const unsigned char kProfiledFlag = 1 << 5; - constexpr InlineRep() : data_{} {} + constexpr InlineRep() : data_() {} + explicit InlineRep(InlineData::DefaultInitType init) : data_(init) {} InlineRep(const InlineRep& src); InlineRep(InlineRep&& src); InlineRep& operator=(const InlineRep& src); InlineRep& operator=(InlineRep&& src) noexcept; + explicit constexpr InlineRep(cord_internal::InlineData data); + void Swap(InlineRep* rhs); bool empty() const; size_t size() const; const char* data() const; // Returns nullptr if holding pointer void set_data(const char* data, size_t n, bool nullify_tail); // Discards pointer, if any - char* set_data(size_t n); // Write data to the result + char* set_data(size_t n); // Write data to the result // Returns nullptr if holding bytes absl::cord_internal::CordRep* tree() const; - // Discards old pointer, if any - void set_tree(absl::cord_internal::CordRep* rep); - // Replaces a tree with a new root. This is faster than set_tree, but it - // should only be used when it's clear that the old rep was a tree. - void replace_tree(absl::cord_internal::CordRep* rep); + absl::cord_internal::CordRep* as_tree() const; // Returns non-null iff was holding a pointer absl::cord_internal::CordRep* clear(); - // Convert to pointer if necessary - absl::cord_internal::CordRep* force_tree(size_t extra_hint); - void reduce_size(size_t n); // REQUIRES: holding data + // Converts to pointer if necessary. + void reduce_size(size_t n); // REQUIRES: holding data void remove_prefix(size_t n); // REQUIRES: holding data - void AppendArray(const char* src_data, size_t src_size); + void AppendArray(absl::string_view src, MethodIdentifier method); absl::string_view FindFlatStartPiece() const; - void AppendTree(absl::cord_internal::CordRep* tree); - void PrependTree(absl::cord_internal::CordRep* tree); - void GetAppendRegion(char** region, size_t* size, size_t max_length); - void GetAppendRegion(char** region, size_t* size); + + // Creates a CordRepFlat instance from the current inlined data with `extra' + // bytes of desired additional capacity. + CordRepFlat* MakeFlatWithExtraCapacity(size_t extra); + + // Sets the tree value for this instance. `rep` must not be null. + // Requires the current instance to hold a tree, and a lock to be held on + // any CordzInfo referenced by this instance. The latter is enforced through + // the CordzUpdateScope argument. If the current instance is sampled, then + // the CordzInfo instance is updated to reference the new `rep` value. + void SetTree(CordRep* rep, const CordzUpdateScope& scope); + + // Identical to SetTree(), except that `rep` is allowed to be null, in + // which case the current instance is reset to an empty value. + void SetTreeOrEmpty(CordRep* rep, const CordzUpdateScope& scope); + + // Sets the tree value for this instance, and randomly samples this cord. + // This function disregards existing contents in `data_`, and should be + // called when a Cord is 'promoted' from an 'uninitialized' or 'inlined' + // value to a non-inlined (tree / ring) value. + void EmplaceTree(CordRep* rep, MethodIdentifier method); + + // Identical to EmplaceTree, except that it copies the parent stack from + // the provided `parent` data if the parent is sampled. + void EmplaceTree(CordRep* rep, const InlineData& parent, + MethodIdentifier method); + + // Commits the change of a newly created, or updated `rep` root value into + // this cord. `old_rep` indicates the old (inlined or tree) value of the + // cord, and determines if the commit invokes SetTree() or EmplaceTree(). + void CommitTree(const CordRep* old_rep, CordRep* rep, + const CordzUpdateScope& scope, MethodIdentifier method); + + void AppendTreeToInlined(CordRep* tree, MethodIdentifier method); + void AppendTreeToTree(CordRep* tree, MethodIdentifier method); + void AppendTree(CordRep* tree, MethodIdentifier method); + void PrependTreeToInlined(CordRep* tree, MethodIdentifier method); + void PrependTreeToTree(CordRep* tree, MethodIdentifier method); + void PrependTree(CordRep* tree, MethodIdentifier method); + + template <bool has_length> + void GetAppendRegion(char** region, size_t* size, size_t length); + bool IsSame(const InlineRep& other) const { - return memcmp(data_, other.data_, sizeof(data_)) == 0; + return memcmp(&data_, &other.data_, sizeof(data_)) == 0; } int BitwiseCompare(const InlineRep& other) const { uint64_t x, y; - // Use memcpy to avoid anti-aliasing issues. - memcpy(&x, data_, sizeof(x)); - memcpy(&y, other.data_, sizeof(y)); + // Use memcpy to avoid aliasing issues. + memcpy(&x, &data_, sizeof(x)); + memcpy(&y, &other.data_, sizeof(y)); if (x == y) { - memcpy(&x, data_ + 8, sizeof(x)); - memcpy(&y, other.data_ + 8, sizeof(y)); + memcpy(&x, reinterpret_cast<const char*>(&data_) + 8, sizeof(x)); + memcpy(&y, reinterpret_cast<const char*>(&other.data_) + 8, sizeof(y)); if (x == y) return 0; } return absl::big_endian::FromHost64(x) < absl::big_endian::FromHost64(y) @@ -531,43 +813,62 @@ class Cord { } void CopyTo(std::string* dst) const { // memcpy is much faster when operating on a known size. On most supported - // platforms, the small std::string optimization is large enough that resizing + // platforms, the small string optimization is large enough that resizing // to 15 bytes does not cause a memory allocation. absl::strings_internal::STLStringResizeUninitialized(dst, sizeof(data_) - 1); - memcpy(&(*dst)[0], data_, sizeof(data_) - 1); + memcpy(&(*dst)[0], &data_, sizeof(data_) - 1); // erase is faster than resize because the logic for memory allocation is // not needed. - dst->erase(data_[kMaxInline]); + dst->erase(inline_size()); } // Copies the inline contents into `dst`. Assumes the cord is not empty. void CopyToArray(char* dst) const; - bool is_tree() const { return data_[kMaxInline] > kMaxInline; } + bool is_tree() const { return data_.is_tree(); } + + // Returns true if the Cord is being profiled by cordz. + bool is_profiled() const { return data_.is_tree() && data_.is_profiled(); } + + // Returns the profiled CordzInfo, or nullptr if not sampled. + absl::cord_internal::CordzInfo* cordz_info() const { + return data_.cordz_info(); + } + + // Sets the profiled CordzInfo. `cordz_info` must not be null. + void set_cordz_info(cord_internal::CordzInfo* cordz_info) { + assert(cordz_info != nullptr); + data_.set_cordz_info(cordz_info); + } + + // Resets the current cordz_info to null / empty. + void clear_cordz_info() { data_.clear_cordz_info(); } private: friend class Cord; void AssignSlow(const InlineRep& src); - // Unrefs the tree, stops profiling, and zeroes the contents - void ClearSlow(); + // Unrefs the tree and stops profiling. + void UnrefTree(); + + void ResetToEmpty() { data_ = {}; } - // If the data has length <= kMaxInline, we store it in data_[0..len-1], - // and store the length in data_[kMaxInline]. Else we store it in a tree - // and store a pointer to that tree in data_[0..sizeof(CordRep*)-1]. - alignas(absl::cord_internal::CordRep*) char data_[kMaxInline + 1]; + void set_inline_size(size_t size) { data_.set_inline_size(size); } + size_t inline_size() const { return data_.inline_size(); } + + cord_internal::InlineData data_; }; InlineRep contents_; - // Helper for MemoryUsage() + // Helper for MemoryUsage(). static size_t MemoryUsageAux(const absl::cord_internal::CordRep* rep); - // Helper for GetFlat() + // Helper for GetFlat() and TryFlat(). static bool GetFlatAux(absl::cord_internal::CordRep* rep, absl::string_view* fragment); - // Helper for ForEachChunk() + // Helper for ForEachChunk(). static void ForEachChunkAux( absl::cord_internal::CordRep* rep, absl::FunctionRef<void(absl::string_view)> callback); @@ -596,9 +897,28 @@ class Cord { absl::cord_internal::CordRep* TakeRep() const&; absl::cord_internal::CordRep* TakeRep() &&; - // Helper for Append() + // Helper for Append(). template <typename C> void AppendImpl(C&& src); + + // Prepends the provided data to this instance. `method` contains the public + // API method for this action which is tracked for Cordz sampling purposes. + void PrependArray(absl::string_view src, MethodIdentifier method); + + // Assigns the value in 'src' to this instance, 'stealing' its contents. + // Requires src.length() > kMaxBytesToCopy. + Cord& AssignLargeString(std::string&& src); + + // Helper for AbslHashValue(). + template <typename H> + H HashFragmented(H hash_state) const { + typename H::AbslInternalPiecewiseCombiner combiner; + ForEachChunk([&combiner, &hash_state](absl::string_view chunk) { + hash_state = combiner.add_buffer(std::move(hash_state), chunk.data(), + chunk.size()); + }); + return H::combine(combiner.finalize(std::move(hash_state)), size()); + } }; ABSL_NAMESPACE_END @@ -655,52 +975,27 @@ inline void SmallMemmove(char* dst, const char* src, size_t n, } } -struct ExternalRepReleaserPair { - CordRep* rep; - void* releaser_address; -}; - -// Allocates a new external `CordRep` and returns a pointer to it and a pointer -// to `releaser_size` bytes where the desired releaser can be constructed. +// Does non-template-specific `CordRepExternal` initialization. // Expects `data` to be non-empty. -ExternalRepReleaserPair NewExternalWithUninitializedReleaser( - absl::string_view data, ExternalReleaserInvoker invoker, - size_t releaser_size); +void InitializeCordRepExternal(absl::string_view data, CordRepExternal* rep); // Creates a new `CordRep` that owns `data` and `releaser` and returns a pointer // to it, or `nullptr` if `data` was empty. template <typename Releaser> // NOLINTNEXTLINE - suppress clang-tidy raw pointer return. CordRep* NewExternalRep(absl::string_view data, Releaser&& releaser) { - static_assert( -#if defined(__STDCPP_DEFAULT_NEW_ALIGNMENT__) - alignof(Releaser) <= __STDCPP_DEFAULT_NEW_ALIGNMENT__, -#else - alignof(Releaser) <= alignof(max_align_t), -#endif - "Releasers with alignment requirement greater than what is returned by " - "default `::operator new()` are not supported."); - using ReleaserType = absl::decay_t<Releaser>; if (data.empty()) { // Never create empty external nodes. - ::absl::base_internal::Invoke( - ReleaserType(std::forward<Releaser>(releaser)), data); + InvokeReleaser(Rank0{}, ReleaserType(std::forward<Releaser>(releaser)), + data); return nullptr; } - auto releaser_invoker = [](void* type_erased_releaser, absl::string_view d) { - auto* my_releaser = static_cast<ReleaserType*>(type_erased_releaser); - ::absl::base_internal::Invoke(std::move(*my_releaser), d); - my_releaser->~ReleaserType(); - return sizeof(Releaser); - }; - - ExternalRepReleaserPair external = NewExternalWithUninitializedReleaser( - data, releaser_invoker, sizeof(releaser)); - ::new (external.releaser_address) - ReleaserType(std::forward<Releaser>(releaser)); - return external.rep; + CordRepExternal* rep = new CordRepExternalImpl<ReleaserType>( + std::forward<Releaser>(releaser), 0); + InitializeCordRepExternal(data, rep); + return rep; } // Overload for function reference types that dispatches using a function @@ -716,18 +1011,29 @@ inline CordRep* NewExternalRep(absl::string_view data, template <typename Releaser> Cord MakeCordFromExternal(absl::string_view data, Releaser&& releaser) { Cord cord; - cord.contents_.set_tree(::absl::cord_internal::NewExternalRep( - data, std::forward<Releaser>(releaser))); + if (auto* rep = ::absl::cord_internal::NewExternalRep( + data, std::forward<Releaser>(releaser))) { + cord.contents_.EmplaceTree(rep, + Cord::MethodIdentifier::kMakeCordFromExternal); + } return cord; } -inline Cord::InlineRep::InlineRep(const Cord::InlineRep& src) { - cord_internal::SmallMemmove(data_, src.data_, sizeof(data_)); +constexpr Cord::InlineRep::InlineRep(cord_internal::InlineData data) + : data_(data) {} + +inline Cord::InlineRep::InlineRep(const Cord::InlineRep& src) + : data_(InlineData::kDefaultInit) { + if (CordRep* tree = src.tree()) { + EmplaceTree(CordRep::Ref(tree), src.data_, + CordzUpdateTracker::kConstructorCord); + } else { + data_ = src.data_; + } } -inline Cord::InlineRep::InlineRep(Cord::InlineRep&& src) { - memcpy(data_, src.data_, sizeof(data_)); - memset(src.data_, 0, sizeof(data_)); +inline Cord::InlineRep::InlineRep(Cord::InlineRep&& src) : data_(src.data_) { + src.ResetToEmpty(); } inline Cord::InlineRep& Cord::InlineRep::operator=(const Cord::InlineRep& src) { @@ -735,7 +1041,7 @@ inline Cord::InlineRep& Cord::InlineRep::operator=(const Cord::InlineRep& src) { return *this; } if (!is_tree() && !src.is_tree()) { - cord_internal::SmallMemmove(data_, src.data_, sizeof(data_)); + data_ = src.data_; return *this; } AssignSlow(src); @@ -745,10 +1051,10 @@ inline Cord::InlineRep& Cord::InlineRep::operator=(const Cord::InlineRep& src) { inline Cord::InlineRep& Cord::InlineRep::operator=( Cord::InlineRep&& src) noexcept { if (is_tree()) { - ClearSlow(); + UnrefTree(); } - memcpy(data_, src.data_, sizeof(data_)); - memset(src.data_, 0, sizeof(data_)); + data_ = src.data_; + src.ResetToEmpty(); return *this; } @@ -756,94 +1062,143 @@ inline void Cord::InlineRep::Swap(Cord::InlineRep* rhs) { if (rhs == this) { return; } - - Cord::InlineRep tmp; - cord_internal::SmallMemmove(tmp.data_, data_, sizeof(data_)); - cord_internal::SmallMemmove(data_, rhs->data_, sizeof(data_)); - cord_internal::SmallMemmove(rhs->data_, tmp.data_, sizeof(data_)); + std::swap(data_, rhs->data_); } inline const char* Cord::InlineRep::data() const { - return is_tree() ? nullptr : data_; + return is_tree() ? nullptr : data_.as_chars(); +} + +inline absl::cord_internal::CordRep* Cord::InlineRep::as_tree() const { + assert(data_.is_tree()); + return data_.as_tree(); } inline absl::cord_internal::CordRep* Cord::InlineRep::tree() const { if (is_tree()) { - absl::cord_internal::CordRep* rep; - memcpy(&rep, data_, sizeof(rep)); - return rep; + return as_tree(); } else { return nullptr; } } -inline bool Cord::InlineRep::empty() const { return data_[kMaxInline] == 0; } +inline bool Cord::InlineRep::empty() const { return data_.is_empty(); } inline size_t Cord::InlineRep::size() const { - const char tag = data_[kMaxInline]; - if (tag <= kMaxInline) return tag; - return static_cast<size_t>(tree()->length); + return is_tree() ? as_tree()->length : inline_size(); } -inline void Cord::InlineRep::set_tree(absl::cord_internal::CordRep* rep) { - if (rep == nullptr) { - memset(data_, 0, sizeof(data_)); +inline cord_internal::CordRepFlat* Cord::InlineRep::MakeFlatWithExtraCapacity( + size_t extra) { + static_assert(cord_internal::kMinFlatLength >= sizeof(data_), ""); + size_t len = data_.inline_size(); + auto* result = CordRepFlat::New(len + extra); + result->length = len; + memcpy(result->Data(), data_.as_chars(), sizeof(data_)); + return result; +} + +inline void Cord::InlineRep::EmplaceTree(CordRep* rep, + MethodIdentifier method) { + assert(rep); + data_.make_tree(rep); + CordzInfo::MaybeTrackCord(data_, method); +} + +inline void Cord::InlineRep::EmplaceTree(CordRep* rep, const InlineData& parent, + MethodIdentifier method) { + data_.make_tree(rep); + CordzInfo::MaybeTrackCord(data_, parent, method); +} + +inline void Cord::InlineRep::SetTree(CordRep* rep, + const CordzUpdateScope& scope) { + assert(rep); + assert(data_.is_tree()); + data_.set_tree(rep); + scope.SetCordRep(rep); +} + +inline void Cord::InlineRep::SetTreeOrEmpty(CordRep* rep, + const CordzUpdateScope& scope) { + assert(data_.is_tree()); + if (rep) { + data_.set_tree(rep); } else { - bool was_tree = is_tree(); - memcpy(data_, &rep, sizeof(rep)); - memset(data_ + sizeof(rep), 0, sizeof(data_) - sizeof(rep) - 1); - if (!was_tree) { - data_[kMaxInline] = kTreeFlag; - } + data_ = {}; } + scope.SetCordRep(rep); } -inline void Cord::InlineRep::replace_tree(absl::cord_internal::CordRep* rep) { - ABSL_ASSERT(is_tree()); - if (ABSL_PREDICT_FALSE(rep == nullptr)) { - set_tree(rep); - return; +inline void Cord::InlineRep::CommitTree(const CordRep* old_rep, CordRep* rep, + const CordzUpdateScope& scope, + MethodIdentifier method) { + if (old_rep) { + SetTree(rep, scope); + } else { + EmplaceTree(rep, method); } - memcpy(data_, &rep, sizeof(rep)); - memset(data_ + sizeof(rep), 0, sizeof(data_) - sizeof(rep) - 1); } inline absl::cord_internal::CordRep* Cord::InlineRep::clear() { - const char tag = data_[kMaxInline]; - absl::cord_internal::CordRep* result = nullptr; - if (tag > kMaxInline) { - memcpy(&result, data_, sizeof(result)); + if (is_tree()) { + CordzInfo::MaybeUntrackCord(cordz_info()); } - memset(data_, 0, sizeof(data_)); // Clear the cord + absl::cord_internal::CordRep* result = tree(); + ResetToEmpty(); return result; } inline void Cord::InlineRep::CopyToArray(char* dst) const { assert(!is_tree()); - size_t n = data_[kMaxInline]; + size_t n = inline_size(); assert(n != 0); - cord_internal::SmallMemmove(dst, data_, n); + cord_internal::SmallMemmove(dst, data_.as_chars(), n); } constexpr inline Cord::Cord() noexcept {} +inline Cord::Cord(absl::string_view src) + : Cord(src, CordzUpdateTracker::kConstructorString) {} + +template <typename T> +constexpr Cord::Cord(strings_internal::StringConstant<T>) + : contents_(strings_internal::StringConstant<T>::value.size() <= + cord_internal::kMaxInline + ? cord_internal::InlineData( + strings_internal::StringConstant<T>::value) + : cord_internal::InlineData( + &cord_internal::ConstInitExternalStorage< + strings_internal::StringConstant<T>>::value)) {} + inline Cord& Cord::operator=(const Cord& x) { contents_ = x.contents_; return *this; } +template <typename T, Cord::EnableIfString<T>> +Cord& Cord::operator=(T&& src) { + if (src.size() <= cord_internal::kMaxBytesToCopy) { + return operator=(absl::string_view(src)); + } else { + return AssignLargeString(std::forward<T>(src)); + } +} + +inline Cord::Cord(const Cord& src) : contents_(src.contents_) {} + inline Cord::Cord(Cord&& src) noexcept : contents_(std::move(src.contents_)) {} +inline void Cord::swap(Cord& other) noexcept { + contents_.Swap(&other.contents_); +} + inline Cord& Cord::operator=(Cord&& x) noexcept { contents_ = std::move(x.contents_); return *this; } -template <typename T, Cord::EnableIfString<T>> -inline Cord& Cord::operator=(T&& src) { - *this = absl::string_view(src); - return *this; -} +extern template Cord::Cord(std::string&& src); inline size_t Cord::size() const { // Length is 1st field in str.rep_ @@ -860,6 +1215,18 @@ inline size_t Cord::EstimatedMemoryUsage() const { return result; } +inline absl::optional<absl::string_view> Cord::TryFlat() const { + absl::cord_internal::CordRep* rep = contents_.tree(); + if (rep == nullptr) { + return absl::string_view(contents_.data(), contents_.size()); + } + absl::string_view fragment; + if (GetFlatAux(rep, &fragment)) { + return fragment; + } + return absl::nullopt; +} + inline absl::string_view Cord::Flatten() { absl::cord_internal::CordRep* rep = contents_.tree(); if (rep == nullptr) { @@ -874,22 +1241,15 @@ inline absl::string_view Cord::Flatten() { } inline void Cord::Append(absl::string_view src) { - contents_.AppendArray(src.data(), src.size()); + contents_.AppendArray(src, CordzUpdateTracker::kAppendString); } -template <typename T, Cord::EnableIfString<T>> -inline void Cord::Append(T&& src) { - // Note that this function reserves the right to reuse the `string&&`'s - // memory and that it will do so in the future. - Append(absl::string_view(src)); +inline void Cord::Prepend(absl::string_view src) { + PrependArray(src, CordzUpdateTracker::kPrependString); } -template <typename T, Cord::EnableIfString<T>> -inline void Cord::Prepend(T&& src) { - // Note that this function reserves the right to reuse the `string&&`'s - // memory and that it will do so in the future. - Prepend(absl::string_view(src)); -} +extern template void Cord::Append(std::string&& src); +extern template void Cord::Prepend(std::string&& src); inline int Cord::Compare(const Cord& rhs) const { if (!contents_.is_tree() && !rhs.contents_.is_tree()) { @@ -913,17 +1273,64 @@ inline bool Cord::StartsWith(absl::string_view rhs) const { return EqualsImpl(rhs, rhs_size); } +inline void Cord::ChunkIterator::InitTree(cord_internal::CordRep* tree) { + if (tree->tag == cord_internal::BTREE) { + current_chunk_ = btree_reader_.Init(tree->btree()); + return; + } + + stack_of_right_children_.push_back(tree); + operator++(); +} + +inline Cord::ChunkIterator::ChunkIterator(cord_internal::CordRep* tree) + : bytes_remaining_(tree->length) { + InitTree(tree); +} + inline Cord::ChunkIterator::ChunkIterator(const Cord* cord) : bytes_remaining_(cord->size()) { - if (cord->empty()) return; if (cord->contents_.is_tree()) { - stack_of_right_children_.push_back(cord->contents_.tree()); - operator++(); + InitTree(cord->contents_.as_tree()); } else { - current_chunk_ = absl::string_view(cord->contents_.data(), cord->size()); + current_chunk_ = + absl::string_view(cord->contents_.data(), bytes_remaining_); } } +inline Cord::ChunkIterator& Cord::ChunkIterator::AdvanceBtree() { + current_chunk_ = btree_reader_.Next(); + return *this; +} + +inline void Cord::ChunkIterator::AdvanceBytesBtree(size_t n) { + assert(n >= current_chunk_.size()); + bytes_remaining_ -= n; + if (bytes_remaining_) { + if (n == current_chunk_.size()) { + current_chunk_ = btree_reader_.Next(); + } else { + size_t offset = btree_reader_.length() - bytes_remaining_; + current_chunk_ = btree_reader_.Seek(offset); + } + } else { + current_chunk_ = {}; + } +} + +inline Cord::ChunkIterator& Cord::ChunkIterator::operator++() { + ABSL_HARDENING_ASSERT(bytes_remaining_ > 0 && + "Attempted to iterate past `end()`"); + assert(bytes_remaining_ >= current_chunk_.size()); + bytes_remaining_ -= current_chunk_.size(); + if (bytes_remaining_ > 0) { + return btree_reader_ ? AdvanceBtree() : AdvanceStack(); + } else { + current_chunk_ = {}; + } + return *this; +} + inline Cord::ChunkIterator Cord::ChunkIterator::operator++(int) { ChunkIterator tmp(*this); operator++(); @@ -939,12 +1346,12 @@ inline bool Cord::ChunkIterator::operator!=(const ChunkIterator& other) const { } inline Cord::ChunkIterator::reference Cord::ChunkIterator::operator*() const { - assert(bytes_remaining_ != 0); + ABSL_HARDENING_ASSERT(bytes_remaining_ != 0); return current_chunk_; } inline Cord::ChunkIterator::pointer Cord::ChunkIterator::operator->() const { - assert(bytes_remaining_ != 0); + ABSL_HARDENING_ASSERT(bytes_remaining_ != 0); return ¤t_chunk_; } @@ -955,10 +1362,11 @@ inline void Cord::ChunkIterator::RemoveChunkPrefix(size_t n) { } inline void Cord::ChunkIterator::AdvanceBytes(size_t n) { + assert(bytes_remaining_ >= n); if (ABSL_PREDICT_TRUE(n < current_chunk_.size())) { RemoveChunkPrefix(n); } else if (n != 0) { - AdvanceBytesSlowPath(n); + btree_reader_ ? AdvanceBytesBtree(n) : AdvanceBytesSlowPath(n); } } @@ -1058,12 +1466,8 @@ inline bool operator==(const Cord& lhs, const Cord& rhs) { } inline bool operator!=(const Cord& x, const Cord& y) { return !(x == y); } -inline bool operator<(const Cord& x, const Cord& y) { - return x.Compare(y) < 0; -} -inline bool operator>(const Cord& x, const Cord& y) { - return x.Compare(y) > 0; -} +inline bool operator<(const Cord& x, const Cord& y) { return x.Compare(y) < 0; } +inline bool operator>(const Cord& x, const Cord& y) { return x.Compare(y) > 0; } inline bool operator<=(const Cord& x, const Cord& y) { return x.Compare(y) <= 0; } @@ -1098,10 +1502,6 @@ inline bool operator<=(absl::string_view x, const Cord& y) { return !(y < x); } inline bool operator>=(const Cord& x, absl::string_view y) { return !(x < y); } inline bool operator>=(absl::string_view x, const Cord& y) { return !(x < y); } -// Overload of swap for Cord. The use of non-const references is -// required. :( -inline void swap(Cord& x, Cord& y) noexcept { y.contents_.Swap(&x.contents_); } - // Some internals exposed to test code. namespace strings_internal { class CordTestAccess { diff --git a/third_party/abseil-cpp/absl/strings/cord_ring_reader_test.cc b/third_party/abseil-cpp/absl/strings/cord_ring_reader_test.cc new file mode 100644 index 0000000000..d9a9a76d1e --- /dev/null +++ b/third_party/abseil-cpp/absl/strings/cord_ring_reader_test.cc @@ -0,0 +1,180 @@ +// Copyright 2020 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <cstdlib> +#include <ctime> +#include <memory> +#include <random> +#include <sstream> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/debugging/leak_check.h" +#include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/cord_rep_ring.h" +#include "absl/strings/internal/cord_rep_ring_reader.h" +#include "absl/strings/string_view.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { +namespace { + +using testing::Eq; + +// Creates a flat for testing +CordRep* MakeFlat(absl::string_view s) { + CordRepFlat* flat = CordRepFlat::New(s.length()); + memcpy(flat->Data(), s.data(), s.length()); + flat->length = s.length(); + return flat; +} + +CordRepRing* FromFlats(Span<absl::string_view const> flats) { + CordRepRing* ring = CordRepRing::Create(MakeFlat(flats[0]), flats.size() - 1); + for (int i = 1; i < flats.size(); ++i) { + ring = CordRepRing::Append(ring, MakeFlat(flats[i])); + } + return ring; +} + +std::array<absl::string_view, 12> TestFlats() { + return {"abcdefghij", "klmnopqrst", "uvwxyz", "ABCDEFGHIJ", + "KLMNOPQRST", "UVWXYZ", "1234567890", "~!@#$%^&*()_", + "+-=", "[]\\{}|;':", ",/<>?", "."}; +} + +TEST(CordRingReaderTest, DefaultInstance) { + CordRepRingReader reader; + EXPECT_FALSE(static_cast<bool>(reader)); + EXPECT_THAT(reader.ring(), Eq(nullptr)); +#ifndef NDEBUG + EXPECT_DEATH_IF_SUPPORTED(reader.length(), ".*"); + EXPECT_DEATH_IF_SUPPORTED(reader.consumed(), ".*"); + EXPECT_DEATH_IF_SUPPORTED(reader.remaining(), ".*"); + EXPECT_DEATH_IF_SUPPORTED(reader.Next(), ".*"); + EXPECT_DEATH_IF_SUPPORTED(reader.Seek(0), ".*"); +#endif +} + +TEST(CordRingReaderTest, Reset) { + CordRepRingReader reader; + auto flats = TestFlats(); + CordRepRing* ring = FromFlats(flats); + + absl::string_view first = reader.Reset(ring); + EXPECT_THAT(first, Eq(flats[0])); + EXPECT_TRUE(static_cast<bool>(reader)); + EXPECT_THAT(reader.ring(), Eq(ring)); + EXPECT_THAT(reader.index(), Eq(ring->head())); + EXPECT_THAT(reader.node(), Eq(ring->entry_child(ring->head()))); + EXPECT_THAT(reader.length(), Eq(ring->length)); + EXPECT_THAT(reader.consumed(), Eq(flats[0].length())); + EXPECT_THAT(reader.remaining(), Eq(ring->length - reader.consumed())); + + reader.Reset(); + EXPECT_FALSE(static_cast<bool>(reader)); + EXPECT_THAT(reader.ring(), Eq(nullptr)); + + CordRep::Unref(ring); +} + +TEST(CordRingReaderTest, Next) { + CordRepRingReader reader; + auto flats = TestFlats(); + CordRepRing* ring = FromFlats(flats); + CordRepRing::index_type head = ring->head(); + + reader.Reset(ring); + size_t consumed = reader.consumed(); + size_t remaining = reader.remaining(); + for (int i = 1; i < flats.size(); ++i) { + CordRepRing::index_type index = ring->advance(head, i); + consumed += flats[i].length(); + remaining -= flats[i].length(); + absl::string_view next = reader.Next(); + ASSERT_THAT(next, Eq(flats[i])); + ASSERT_THAT(reader.index(), Eq(index)); + ASSERT_THAT(reader.node(), Eq(ring->entry_child(index))); + ASSERT_THAT(reader.consumed(), Eq(consumed)); + ASSERT_THAT(reader.remaining(), Eq(remaining)); + } + +#ifndef NDEBUG + EXPECT_DEATH_IF_SUPPORTED(reader.Next(), ".*"); +#endif + + CordRep::Unref(ring); +} + +TEST(CordRingReaderTest, SeekForward) { + CordRepRingReader reader; + auto flats = TestFlats(); + CordRepRing* ring = FromFlats(flats); + CordRepRing::index_type head = ring->head(); + + reader.Reset(ring); + size_t consumed = 0; + size_t remaining = ring->length;; + for (int i = 0; i < flats.size(); ++i) { + CordRepRing::index_type index = ring->advance(head, i); + size_t offset = consumed; + consumed += flats[i].length(); + remaining -= flats[i].length(); + for (int off = 0; off < flats[i].length(); ++off) { + absl::string_view chunk = reader.Seek(offset + off); + ASSERT_THAT(chunk, Eq(flats[i].substr(off))); + ASSERT_THAT(reader.index(), Eq(index)); + ASSERT_THAT(reader.node(), Eq(ring->entry_child(index))); + ASSERT_THAT(reader.consumed(), Eq(consumed)); + ASSERT_THAT(reader.remaining(), Eq(remaining)); + } + } + + CordRep::Unref(ring); +} + +TEST(CordRingReaderTest, SeekBackward) { + CordRepRingReader reader; + auto flats = TestFlats(); + CordRepRing* ring = FromFlats(flats); + CordRepRing::index_type head = ring->head(); + + reader.Reset(ring); + size_t consumed = ring->length; + size_t remaining = 0; + for (int i = flats.size() - 1; i >= 0; --i) { + CordRepRing::index_type index = ring->advance(head, i); + size_t offset = consumed - flats[i].length(); + for (int off = 0; off < flats[i].length(); ++off) { + absl::string_view chunk = reader.Seek(offset + off); + ASSERT_THAT(chunk, Eq(flats[i].substr(off))); + ASSERT_THAT(reader.index(), Eq(index)); + ASSERT_THAT(reader.node(), Eq(ring->entry_child(index))); + ASSERT_THAT(reader.consumed(), Eq(consumed)); + ASSERT_THAT(reader.remaining(), Eq(remaining)); + } + consumed -= flats[i].length(); + remaining += flats[i].length(); + } +#ifndef NDEBUG + EXPECT_DEATH_IF_SUPPORTED(reader.Seek(ring->length), ".*"); +#endif + CordRep::Unref(ring); +} + +} // namespace +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil-cpp/absl/strings/cord_ring_test.cc b/third_party/abseil-cpp/absl/strings/cord_ring_test.cc new file mode 100644 index 0000000000..f131859532 --- /dev/null +++ b/third_party/abseil-cpp/absl/strings/cord_ring_test.cc @@ -0,0 +1,1495 @@ +// Copyright 2020 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <cstdlib> +#include <ctime> +#include <memory> +#include <random> +#include <sstream> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/base/config.h" +#include "absl/base/internal/raw_logging.h" +#include "absl/base/macros.h" +#include "absl/debugging/leak_check.h" +#include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/cord_rep_ring.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/string_view.h" + +extern thread_local bool cord_ring; + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace { + +using RandomEngine = std::mt19937_64; + +using ::absl::cord_internal::CordRep; +using ::absl::cord_internal::CordRepConcat; +using ::absl::cord_internal::CordRepExternal; +using ::absl::cord_internal::CordRepFlat; +using ::absl::cord_internal::CordRepRing; +using ::absl::cord_internal::CordRepSubstring; + +using ::absl::cord_internal::CONCAT; +using ::absl::cord_internal::EXTERNAL; +using ::absl::cord_internal::SUBSTRING; + +using testing::ElementsAre; +using testing::ElementsAreArray; +using testing::Eq; +using testing::Ge; +using testing::Le; +using testing::Lt; +using testing::Ne; +using testing::SizeIs; + +using index_type = CordRepRing::index_type; + +enum InputShareMode { kPrivate, kShared, kSharedIndirect }; + +// TestParam class used by all test fixtures. +// Not all fixtures use all possible input combinations +struct TestParam { + TestParam() = default; + explicit TestParam(InputShareMode input_share_mode) + : input_share_mode(input_share_mode) {} + + // Run the test with the 'rep under test' to be privately owned. + // Otherwise, the rep has a shared ref count of 2 or higher. + bool refcount_is_one = true; + + // Run the test with the 'rep under test' being allocated with enough capacity + // to accommodate any modifications made to it. Otherwise, the rep has zero + // extra (reserve) capacity. + bool with_capacity = true; + + // For test providing possibly shared input such as Append(.., CordpRep*), + // this field defines if that input is adopted with a refcount of one + // (privately owned / donated), or shared. For composite inputs such as + // 'substring of flat', we also have the 'shared indirect' value which means + // the top level node is not shared, but the contained child node is shared. + InputShareMode input_share_mode = kPrivate; + + std::string ToString() const { + return absl::StrCat(refcount_is_one ? "Private" : "Shared", + with_capacity ? "" : "_NoCapacity", + (input_share_mode == kPrivate) ? "" + : (input_share_mode == kShared) + ? "_SharedInput" + : "_IndirectSharedInput"); + } +}; +using TestParams = std::vector<TestParam>; + +// Matcher validating when mutable copies are required / performed. +MATCHER_P2(EqIfPrivate, param, rep, + absl::StrCat("Equal 0x", absl::Hex(rep), " if private")) { + return param.refcount_is_one ? arg == rep : true; +} + +// Matcher validating when mutable copies are required / performed. +MATCHER_P2(EqIfPrivateAndCapacity, param, rep, + absl::StrCat("Equal 0x", absl::Hex(rep), + " if private and capacity")) { + return (param.refcount_is_one && param.with_capacity) ? arg == rep : true; +} + +// Matcher validating a shared ring was re-allocated. Should only be used for +// tests doing exactly one update as subsequent updates could return the +// original (freed and re-used) pointer. +MATCHER_P2(NeIfShared, param, rep, + absl::StrCat("Not equal 0x", absl::Hex(rep), " if shared")) { + return param.refcount_is_one ? true : arg != rep; +} + +MATCHER_P2(EqIfInputPrivate, param, rep, "Equal if input is private") { + return param.input_share_mode == kPrivate ? arg == rep : arg != rep; +} + +// Matcher validating the core in-variants of the CordRepRing instance. +MATCHER(IsValidRingBuffer, "RingBuffer is valid") { + std::stringstream ss; + if (!arg->IsValid(ss)) { + *result_listener << "\nERROR: " << ss.str() << "\nRING = " << *arg; + return false; + } + return true; +} + +// Returns the flats contained in the provided CordRepRing +std::vector<string_view> ToFlats(const CordRepRing* r) { + std::vector<string_view> flats; + flats.reserve(r->entries()); + index_type pos = r->head(); + do { + flats.push_back(r->entry_data(pos)); + } while ((pos = r->advance(pos)) != r->tail()); + return flats; +} + +class not_a_string_view { + public: + explicit not_a_string_view(absl::string_view s) + : data_(s.data()), size_(s.size()) {} + explicit not_a_string_view(const void* data, size_t size) + : data_(data), size_(size) {} + + not_a_string_view remove_prefix(size_t n) const { + return not_a_string_view(static_cast<const char*>(data_) + n, size_ - n); + } + + not_a_string_view remove_suffix(size_t n) const { + return not_a_string_view(data_, size_ - n); + } + + const void* data() const { return data_; } + size_t size() const { return size_; } + + private: + const void* data_; + size_t size_; +}; + +bool operator==(not_a_string_view lhs, not_a_string_view rhs) { + return lhs.data() == rhs.data() && lhs.size() == rhs.size(); +} + +std::ostream& operator<<(std::ostream& s, not_a_string_view rhs) { + return s << "{ data: " << rhs.data() << " size: " << rhs.size() << "}"; +} + +std::vector<not_a_string_view> ToRawFlats(const CordRepRing* r) { + std::vector<not_a_string_view> flats; + flats.reserve(r->entries()); + index_type pos = r->head(); + do { + flats.emplace_back(r->entry_data(pos)); + } while ((pos = r->advance(pos)) != r->tail()); + return flats; +} + +// Returns the value contained in the provided CordRepRing +std::string ToString(const CordRepRing* r) { + std::string value; + value.reserve(r->length); + index_type pos = r->head(); + do { + absl::string_view sv = r->entry_data(pos); + value.append(sv.data(), sv.size()); + } while ((pos = r->advance(pos)) != r->tail()); + return value; +} + +// Creates a flat for testing +CordRep* MakeFlat(absl::string_view s, size_t extra = 0) { + CordRepFlat* flat = CordRepFlat::New(s.length() + extra); + memcpy(flat->Data(), s.data(), s.length()); + flat->length = s.length(); + return flat; +} + +// Creates an external node for testing +CordRepExternal* MakeExternal(absl::string_view s) { + struct Rep : public CordRepExternal { + std::string s; + explicit Rep(absl::string_view s) : s(s) { + this->tag = EXTERNAL; + this->base = s.data(); + this->length = s.length(); + this->releaser_invoker = [](CordRepExternal* self) { + delete static_cast<Rep*>(self); + }; + } + }; + return new Rep(s); +} + +CordRepExternal* MakeFakeExternal(size_t length) { + struct Rep : public CordRepExternal { + std::string s; + explicit Rep(size_t len) { + this->tag = EXTERNAL; + this->base = reinterpret_cast<const char*>(this->storage); + this->length = len; + this->releaser_invoker = [](CordRepExternal* self) { + delete static_cast<Rep*>(self); + }; + } + }; + return new Rep(length); +} + +// Creates a flat or an external node for testing depending on the size. +CordRep* MakeLeaf(absl::string_view s, size_t extra = 0) { + if (s.size() <= absl::cord_internal::kMaxFlatLength) { + return MakeFlat(s, extra); + } else { + return MakeExternal(s); + } +} + +// Creates a substring node +CordRepSubstring* MakeSubstring(size_t start, size_t len, CordRep* rep) { + auto* sub = new CordRepSubstring; + sub->tag = SUBSTRING; + sub->start = start; + sub->length = (len <= 0) ? rep->length - start + len : len; + sub->child = rep; + return sub; +} + +// Creates a substring node removing the specified prefix +CordRepSubstring* RemovePrefix(size_t start, CordRep* rep) { + return MakeSubstring(start, rep->length - start, rep); +} + +// Creates a substring node removing the specified suffix +CordRepSubstring* RemoveSuffix(size_t length, CordRep* rep) { + return MakeSubstring(0, rep->length - length, rep); +} + +CordRepConcat* MakeConcat(CordRep* left, CordRep* right, int depth = 0) { + auto* concat = new CordRepConcat; + concat->tag = CONCAT; + concat->length = left->length + right->length; + concat->left = left; + concat->right = right; + concat->set_depth(depth); + return concat; +} + +enum Composition { kMix, kAppend, kPrepend }; + +Composition RandomComposition() { + RandomEngine rng(GTEST_FLAG_GET(random_seed)); + return (rng() & 1) ? kMix : ((rng() & 1) ? kAppend : kPrepend); +} + +absl::string_view ToString(Composition composition) { + switch (composition) { + case kAppend: + return "Append"; + case kPrepend: + return "Prepend"; + case kMix: + return "Mix"; + } + assert(false); + return "???"; +} + +constexpr const char* kFox = "The quick brown fox jumps over the lazy dog"; +constexpr const char* kFoxFlats[] = {"The ", "quick ", "brown ", + "fox ", "jumps ", "over ", + "the ", "lazy ", "dog"}; +constexpr const char* kAlphabet = "abcdefghijklmnopqrstuvwxyz"; + +CordRepRing* FromFlats(Span<const char* const> flats, + Composition composition = kAppend) { + if (flats.empty()) return nullptr; + CordRepRing* ring = nullptr; + switch (composition) { + case kAppend: + ring = CordRepRing::Create(MakeLeaf(flats.front()), flats.size() - 1); + for (int i = 1; i < flats.size(); ++i) { + ring = CordRepRing::Append(ring, MakeLeaf(flats[i])); + } + break; + case kPrepend: + ring = CordRepRing::Create(MakeLeaf(flats.back()), flats.size() - 1); + for (int i = static_cast<int>(flats.size() - 2); i >= 0; --i) { + ring = CordRepRing::Prepend(ring, MakeLeaf(flats[i])); + } + break; + case kMix: + size_t middle1 = flats.size() / 2, middle2 = middle1; + ring = CordRepRing::Create(MakeLeaf(flats[middle1]), flats.size() - 1); + if (!flats.empty()) { + if ((flats.size() & 1) == 0) { + ring = CordRepRing::Prepend(ring, MakeLeaf(flats[--middle1])); + } + for (int i = 1; i <= middle1; ++i) { + ring = CordRepRing::Prepend(ring, MakeLeaf(flats[middle1 - i])); + ring = CordRepRing::Append(ring, MakeLeaf(flats[middle2 + i])); + } + } + break; + } + EXPECT_THAT(ToFlats(ring), ElementsAreArray(flats)); + return ring; +} + +std::ostream& operator<<(std::ostream& s, const TestParam& param) { + return s << param.ToString(); +} + +std::string TestParamToString(const testing::TestParamInfo<TestParam>& info) { + return info.param.ToString(); +} + +class CordRingTest : public testing::Test { + public: + ~CordRingTest() override { + for (CordRep* rep : unrefs_) { + CordRep::Unref(rep); + } + } + + template <typename CordRepType> + CordRepType* NeedsUnref(CordRepType* rep) { + assert(rep); + unrefs_.push_back(rep); + return rep; + } + + template <typename CordRepType> + CordRepType* Ref(CordRepType* rep) { + CordRep::Ref(rep); + return NeedsUnref(rep); + } + + private: + std::vector<CordRep*> unrefs_; +}; + +class CordRingTestWithParam : public testing::TestWithParam<TestParam> { + public: + ~CordRingTestWithParam() override { + for (CordRep* rep : unrefs_) { + CordRep::Unref(rep); + } + } + + CordRepRing* CreateWithCapacity(CordRep* child, size_t extra_capacity) { + if (!GetParam().with_capacity) extra_capacity = 0; + CordRepRing* ring = CordRepRing::Create(child, extra_capacity); + ring->SetCapacityForTesting(1 + extra_capacity); + return RefIfShared(ring); + } + + bool Shared() const { return !GetParam().refcount_is_one; } + bool InputShared() const { return GetParam().input_share_mode == kShared; } + bool InputSharedIndirect() const { + return GetParam().input_share_mode == kSharedIndirect; + } + + template <typename CordRepType> + CordRepType* NeedsUnref(CordRepType* rep) { + assert(rep); + unrefs_.push_back(rep); + return rep; + } + + template <typename CordRepType> + CordRepType* Ref(CordRepType* rep) { + CordRep::Ref(rep); + return NeedsUnref(rep); + } + + template <typename CordRepType> + CordRepType* RefIfShared(CordRepType* rep) { + return Shared() ? Ref(rep) : rep; + } + + template <typename CordRepType> + CordRepType* RefIfInputShared(CordRepType* rep) { + return InputShared() ? Ref(rep) : rep; + } + + template <typename CordRepType> + CordRepType* RefIfInputSharedIndirect(CordRepType* rep) { + return InputSharedIndirect() ? Ref(rep) : rep; + } + + private: + std::vector<CordRep*> unrefs_; +}; + +class CordRingCreateTest : public CordRingTestWithParam { + public: + static TestParams CreateTestParams() { + TestParams params; + params.emplace_back(InputShareMode::kPrivate); + params.emplace_back(InputShareMode::kShared); + return params; + } +}; + +class CordRingSubTest : public CordRingTestWithParam { + public: + static TestParams CreateTestParams() { + TestParams params; + for (bool refcount_is_one : {true, false}) { + TestParam param; + param.refcount_is_one = refcount_is_one; + params.push_back(param); + } + return params; + } +}; + +class CordRingBuildTest : public CordRingTestWithParam { + public: + static TestParams CreateTestParams() { + TestParams params; + for (bool refcount_is_one : {true, false}) { + for (bool with_capacity : {true, false}) { + TestParam param; + param.refcount_is_one = refcount_is_one; + param.with_capacity = with_capacity; + params.push_back(param); + } + } + return params; + } +}; + +class CordRingCreateFromTreeTest : public CordRingTestWithParam { + public: + static TestParams CreateTestParams() { + TestParams params; + params.emplace_back(InputShareMode::kPrivate); + params.emplace_back(InputShareMode::kShared); + params.emplace_back(InputShareMode::kSharedIndirect); + return params; + } +}; + +class CordRingBuildInputTest : public CordRingTestWithParam { + public: + static TestParams CreateTestParams() { + TestParams params; + for (bool refcount_is_one : {true, false}) { + for (bool with_capacity : {true, false}) { + for (InputShareMode share_mode : {kPrivate, kShared, kSharedIndirect}) { + TestParam param; + param.refcount_is_one = refcount_is_one; + param.with_capacity = with_capacity; + param.input_share_mode = share_mode; + params.push_back(param); + } + } + } + return params; + } +}; + +INSTANTIATE_TEST_SUITE_P(WithParam, CordRingSubTest, + testing::ValuesIn(CordRingSubTest::CreateTestParams()), + TestParamToString); + +INSTANTIATE_TEST_SUITE_P( + WithParam, CordRingCreateTest, + testing::ValuesIn(CordRingCreateTest::CreateTestParams()), + TestParamToString); + +INSTANTIATE_TEST_SUITE_P( + WithParam, CordRingCreateFromTreeTest, + testing::ValuesIn(CordRingCreateFromTreeTest::CreateTestParams()), + TestParamToString); + +INSTANTIATE_TEST_SUITE_P( + WithParam, CordRingBuildTest, + testing::ValuesIn(CordRingBuildTest::CreateTestParams()), + TestParamToString); + +INSTANTIATE_TEST_SUITE_P( + WithParam, CordRingBuildInputTest, + testing::ValuesIn(CordRingBuildInputTest::CreateTestParams()), + TestParamToString); + +TEST_P(CordRingCreateTest, CreateFromFlat) { + absl::string_view str1 = "abcdefghijklmnopqrstuvwxyz"; + CordRepRing* result = NeedsUnref(CordRepRing::Create(MakeFlat(str1))); + ASSERT_THAT(result, IsValidRingBuffer()); + EXPECT_THAT(result->length, Eq(str1.size())); + EXPECT_THAT(ToFlats(result), ElementsAre(str1)); +} + +TEST_P(CordRingCreateTest, CreateFromRing) { + CordRepRing* ring = RefIfShared(FromFlats(kFoxFlats)); + CordRepRing* result = NeedsUnref(CordRepRing::Create(ring)); + ASSERT_THAT(result, IsValidRingBuffer()); + EXPECT_THAT(result, EqIfPrivate(GetParam(), ring)); + EXPECT_THAT(result, NeIfShared(GetParam(), ring)); + EXPECT_THAT(ToFlats(result), ElementsAreArray(kFoxFlats)); +} + +TEST_P(CordRingCreateFromTreeTest, CreateFromSubstringRing) { + CordRepRing* ring = RefIfInputSharedIndirect(FromFlats(kFoxFlats)); + CordRep* sub = RefIfInputShared(MakeSubstring(2, 11, ring)); + CordRepRing* result = NeedsUnref(CordRepRing::Create(sub)); + ASSERT_THAT(result, IsValidRingBuffer()); + EXPECT_THAT(result, EqIfInputPrivate(GetParam(), ring)); + EXPECT_THAT(ToString(result), string_view(kFox).substr(2, 11)); +} + +TEST_F(CordRingTest, CreateWithIllegalExtraCapacity) { +#if defined(ABSL_HAVE_EXCEPTIONS) + CordRep* flat = NeedsUnref(MakeFlat("Hello world")); + try { + CordRepRing::Create(flat, CordRepRing::kMaxCapacity); + GTEST_FAIL() << "expected std::length_error exception"; + } catch (const std::length_error&) { + } +#elif defined(GTEST_HAS_DEATH_TEST) + CordRep* flat = NeedsUnref(MakeFlat("Hello world")); + EXPECT_DEATH(CordRepRing::Create(flat, CordRepRing::kMaxCapacity), ".*"); +#endif +} + +TEST_P(CordRingCreateFromTreeTest, CreateFromSubstringOfFlat) { + absl::string_view str1 = "abcdefghijklmnopqrstuvwxyz"; + auto* flat = RefIfInputShared(MakeFlat(str1)); + auto* child = RefIfInputSharedIndirect(MakeSubstring(4, 20, flat)); + CordRepRing* result = NeedsUnref(CordRepRing::Create(child)); + ASSERT_THAT(result, IsValidRingBuffer()); + EXPECT_THAT(result->length, Eq(20)); + EXPECT_THAT(ToFlats(result), ElementsAre(str1.substr(4, 20))); +} + +TEST_P(CordRingCreateTest, CreateFromExternal) { + absl::string_view str1 = "abcdefghijklmnopqrstuvwxyz"; + auto* child = RefIfInputShared(MakeExternal(str1)); + CordRepRing* result = NeedsUnref(CordRepRing::Create(child)); + ASSERT_THAT(result, IsValidRingBuffer()); + EXPECT_THAT(result->length, Eq(str1.size())); + EXPECT_THAT(ToFlats(result), ElementsAre(str1)); +} + +TEST_P(CordRingCreateFromTreeTest, CreateFromSubstringOfExternal) { + absl::string_view str1 = "abcdefghijklmnopqrstuvwxyz"; + auto* external = RefIfInputShared(MakeExternal(str1)); + auto* child = RefIfInputSharedIndirect(MakeSubstring(1, 24, external)); + CordRepRing* result = NeedsUnref(CordRepRing::Create(child)); + ASSERT_THAT(result, IsValidRingBuffer()); + EXPECT_THAT(result->length, Eq(24)); + EXPECT_THAT(ToFlats(result), ElementsAre(str1.substr(1, 24))); +} + +TEST_P(CordRingCreateFromTreeTest, CreateFromSubstringOfLargeExternal) { + auto* external = RefIfInputShared(MakeFakeExternal(1 << 20)); + auto str = not_a_string_view(external->base, 1 << 20) + .remove_prefix(1 << 19) + .remove_suffix(6); + auto* child = + RefIfInputSharedIndirect(MakeSubstring(1 << 19, (1 << 19) - 6, external)); + CordRepRing* result = NeedsUnref(CordRepRing::Create(child)); + ASSERT_THAT(result, IsValidRingBuffer()); + EXPECT_THAT(result->length, Eq(str.size())); + EXPECT_THAT(ToRawFlats(result), ElementsAre(str)); +} + +TEST_P(CordRingBuildInputTest, CreateFromConcat) { + CordRep* flats[] = {MakeFlat("abcdefgh"), MakeFlat("ijklm"), + MakeFlat("nopqrstuv"), MakeFlat("wxyz")}; + auto* left = MakeConcat(RefIfInputSharedIndirect(flats[0]), flats[1]); + auto* right = MakeConcat(flats[2], RefIfInputSharedIndirect(flats[3])); + auto* concat = RefIfInputShared(MakeConcat(left, right)); + CordRepRing* result = NeedsUnref(CordRepRing::Create(concat)); + ASSERT_THAT(result, IsValidRingBuffer()); + EXPECT_THAT(result->length, Eq(26)); + EXPECT_THAT(ToString(result), Eq(kAlphabet)); +} + +TEST_P(CordRingBuildInputTest, CreateFromSubstringConcat) { + for (size_t off = 0; off < 26; ++off) { + for (size_t len = 1; len < 26 - off; ++len) { + CordRep* flats[] = {MakeFlat("abcdefgh"), MakeFlat("ijklm"), + MakeFlat("nopqrstuv"), MakeFlat("wxyz")}; + auto* left = MakeConcat(RefIfInputSharedIndirect(flats[0]), flats[1]); + auto* right = MakeConcat(flats[2], RefIfInputSharedIndirect(flats[3])); + auto* concat = MakeConcat(left, right); + auto* child = RefIfInputShared(MakeSubstring(off, len, concat)); + CordRepRing* result = NeedsUnref(CordRepRing::Create(child)); + ASSERT_THAT(result, IsValidRingBuffer()); + ASSERT_THAT(result->length, Eq(len)); + ASSERT_THAT(ToString(result), string_view(kAlphabet).substr(off, len)); + } + } +} + +TEST_P(CordRingCreateTest, Properties) { + absl::string_view str1 = "abcdefghijklmnopqrstuvwxyz"; + CordRepRing* result = NeedsUnref(CordRepRing::Create(MakeFlat(str1), 120)); + ASSERT_THAT(result, IsValidRingBuffer()); + EXPECT_THAT(result->head(), Eq(0)); + EXPECT_THAT(result->tail(), Eq(1)); + EXPECT_THAT(result->capacity(), Ge(120 + 1)); + EXPECT_THAT(result->capacity(), Le(2 * 120 + 1)); + EXPECT_THAT(result->entries(), Eq(1)); + EXPECT_THAT(result->begin_pos(), Eq(0)); +} + +TEST_P(CordRingCreateTest, EntryForNewFlat) { + absl::string_view str1 = "abcdefghijklmnopqrstuvwxyz"; + CordRep* child = MakeFlat(str1); + CordRepRing* result = NeedsUnref(CordRepRing::Create(child, 120)); + ASSERT_THAT(result, IsValidRingBuffer()); + EXPECT_THAT(result->entry_child(0), Eq(child)); + EXPECT_THAT(result->entry_end_pos(0), Eq(str1.length())); + EXPECT_THAT(result->entry_data_offset(0), Eq(0)); +} + +TEST_P(CordRingCreateTest, EntryForNewFlatSubstring) { + absl::string_view str1 = "1234567890abcdefghijklmnopqrstuvwxyz"; + CordRep* child = MakeFlat(str1); + CordRep* substring = MakeSubstring(10, 26, child); + CordRepRing* result = NeedsUnref(CordRepRing::Create(substring, 1)); + ASSERT_THAT(result, IsValidRingBuffer()); + EXPECT_THAT(result->entry_child(0), Eq(child)); + EXPECT_THAT(result->entry_end_pos(0), Eq(26)); + EXPECT_THAT(result->entry_data_offset(0), Eq(10)); +} + +TEST_P(CordRingBuildTest, AppendFlat) { + absl::string_view str1 = "abcdefghijklmnopqrstuvwxyz"; + absl::string_view str2 = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + CordRepRing* ring = CreateWithCapacity(MakeExternal(str1), 1); + CordRepRing* result = NeedsUnref(CordRepRing::Append(ring, MakeFlat(str2))); + ASSERT_THAT(result, IsValidRingBuffer()); + EXPECT_THAT(result, EqIfPrivateAndCapacity(GetParam(), ring)); + EXPECT_THAT(result, NeIfShared(GetParam(), ring)); + EXPECT_THAT(result->length, Eq(str1.size() + str2.size())); + EXPECT_THAT(ToFlats(result), ElementsAre(str1, str2)); +} + +TEST_P(CordRingBuildTest, PrependFlat) { + absl::string_view str1 = "abcdefghijklmnopqrstuvwxyz"; + absl::string_view str2 = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + CordRepRing* ring = CreateWithCapacity(MakeExternal(str1), 1); + CordRepRing* result = NeedsUnref(CordRepRing::Prepend(ring, MakeFlat(str2))); + ASSERT_THAT(result, IsValidRingBuffer()); + EXPECT_THAT(result, EqIfPrivateAndCapacity(GetParam(), ring)); + EXPECT_THAT(result, NeIfShared(GetParam(), ring)); + EXPECT_THAT(result->length, Eq(str1.size() + str2.size())); + EXPECT_THAT(ToFlats(result), ElementsAre(str2, str1)); +} + +TEST_P(CordRingBuildTest, AppendString) { + absl::string_view str1 = "abcdefghijklmnopqrstuvwxyz"; + absl::string_view str2 = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + CordRepRing* ring = CreateWithCapacity(MakeExternal(str1), 1); + CordRepRing* result = NeedsUnref(CordRepRing::Append(ring, str2)); + ASSERT_THAT(result, IsValidRingBuffer()); + EXPECT_THAT(result, EqIfPrivateAndCapacity(GetParam(), ring)); + EXPECT_THAT(result, NeIfShared(GetParam(), ring)); + EXPECT_THAT(result->length, Eq(str1.size() + str2.size())); + EXPECT_THAT(ToFlats(result), ElementsAre(str1, str2)); +} + +TEST_P(CordRingBuildTest, AppendStringHavingExtra) { + absl::string_view str1 = "1234"; + absl::string_view str2 = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + CordRepRing* ring = CreateWithCapacity(MakeFlat(str1, 26), 0); + CordRepRing* result = NeedsUnref(CordRepRing::Append(ring, str2)); + ASSERT_THAT(result, IsValidRingBuffer()); + EXPECT_THAT(result->length, Eq(str1.size() + str2.size())); + EXPECT_THAT(result, EqIfPrivate(GetParam(), ring)); + EXPECT_THAT(result, NeIfShared(GetParam(), ring)); +} + +TEST_P(CordRingBuildTest, AppendStringHavingPartialExtra) { + absl::string_view str1 = "1234"; + absl::string_view str2 = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + + // Create flat with at least one extra byte. We don't expect to have sized + // alloc and capacity rounding to grant us enough to not make it partial. + auto* flat = MakeFlat(str1, 1); + size_t avail = flat->flat()->Capacity() - flat->length; + ASSERT_THAT(avail, Lt(str2.size())) << " adjust test for larger flats!"; + + // Construct the flats we do expect using all of `avail`. + absl::string_view str1a = str2.substr(0, avail); + absl::string_view str2a = str2.substr(avail); + + CordRepRing* ring = CreateWithCapacity(flat, 1); + CordRepRing* result = NeedsUnref(CordRepRing::Append(ring, str2)); + ASSERT_THAT(result, IsValidRingBuffer()); + EXPECT_THAT(result->length, Eq(str1.size() + str2.size())); + EXPECT_THAT(result, EqIfPrivateAndCapacity(GetParam(), ring)); + EXPECT_THAT(result, NeIfShared(GetParam(), ring)); + if (GetParam().refcount_is_one) { + EXPECT_THAT(ToFlats(result), ElementsAre(StrCat(str1, str1a), str2a)); + } else { + EXPECT_THAT(ToFlats(result), ElementsAre(str1, str2)); + } +} + +TEST_P(CordRingBuildTest, AppendStringHavingExtraInSubstring) { + absl::string_view str1 = "123456789_1234"; + absl::string_view str2 = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + CordRep* flat = RemovePrefix(10, MakeFlat(str1, 26)); + CordRepRing* ring = CreateWithCapacity(flat, 0); + CordRepRing* result = NeedsUnref(CordRepRing::Append(ring, str2)); + ASSERT_THAT(result, IsValidRingBuffer()); + EXPECT_THAT(result, EqIfPrivate(GetParam(), ring)); + EXPECT_THAT(result, NeIfShared(GetParam(), ring)); + EXPECT_THAT(result->length, Eq(4 + str2.size())); + if (GetParam().refcount_is_one) { + EXPECT_THAT(ToFlats(result), ElementsAre(StrCat("1234", str2))); + } else { + EXPECT_THAT(ToFlats(result), ElementsAre("1234", str2)); + } +} + +TEST_P(CordRingBuildTest, AppendStringHavingSharedExtra) { + absl::string_view str1 = "123456789_1234"; + absl::string_view str2 = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + for (int shared_type = 0; shared_type < 2; ++shared_type) { + SCOPED_TRACE(absl::StrCat("Shared extra type ", shared_type)); + + // Create a flat that is shared in some way. + CordRep* flat = nullptr; + CordRep* flat1 = nullptr; + if (shared_type == 0) { + // Shared flat + flat = CordRep::Ref(MakeFlat(str1.substr(10), 100)); + } else if (shared_type == 1) { + // Shared flat inside private substring + flat1 = CordRep::Ref(MakeFlat(str1)); + flat = RemovePrefix(10, flat1); + } else { + // Private flat inside shared substring + flat = CordRep::Ref(RemovePrefix(10, MakeFlat(str1, 100))); + } + + CordRepRing* ring = CreateWithCapacity(flat, 1); + CordRepRing* result = NeedsUnref(CordRepRing::Append(ring, str2)); + ASSERT_THAT(result, IsValidRingBuffer()); + EXPECT_THAT(result, EqIfPrivateAndCapacity(GetParam(), ring)); + EXPECT_THAT(result, NeIfShared(GetParam(), ring)); + EXPECT_THAT(result->length, Eq(4 + str2.size())); + EXPECT_THAT(ToFlats(result), ElementsAre("1234", str2)); + + CordRep::Unref(shared_type == 1 ? flat1 : flat); + } +} + +TEST_P(CordRingBuildTest, AppendStringWithExtra) { + absl::string_view str1 = "1234"; + absl::string_view str2 = "1234567890"; + absl::string_view str3 = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + CordRepRing* ring = CreateWithCapacity(MakeExternal(str1), 1); + CordRepRing* result = NeedsUnref(CordRepRing::Append(ring, str2, 26)); + result = CordRepRing::Append(result, str3); + ASSERT_THAT(result, IsValidRingBuffer()); + EXPECT_THAT(result->length, Eq(str1.size() + str2.size() + str3.size())); + EXPECT_THAT(result, EqIfPrivateAndCapacity(GetParam(), ring)); + EXPECT_THAT(ToFlats(result), ElementsAre(str1, StrCat(str2, str3))); +} + +TEST_P(CordRingBuildTest, PrependString) { + absl::string_view str1 = "abcdefghijklmnopqrstuvwxyz"; + absl::string_view str2 = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + // Use external rep to avoid appending to first flat + CordRepRing* ring = CreateWithCapacity(MakeExternal(str1), 1); + CordRepRing* result = NeedsUnref(CordRepRing::Prepend(ring, str2)); + ASSERT_THAT(result, IsValidRingBuffer()); + if (GetParam().with_capacity && GetParam().refcount_is_one) { + EXPECT_THAT(result, Eq(ring)); + } else { + EXPECT_THAT(result, Ne(ring)); + } + EXPECT_THAT(result->length, Eq(str1.size() + str2.size())); + EXPECT_THAT(ToFlats(result), ElementsAre(str2, str1)); +} + +TEST_P(CordRingBuildTest, PrependStringHavingExtra) { + absl::string_view str1 = "abcdefghijklmnopqrstuvwxyz1234"; + absl::string_view str2 = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + CordRep* flat = RemovePrefix(26, MakeFlat(str1)); + CordRepRing* ring = CreateWithCapacity(flat, 0); + CordRepRing* result = NeedsUnref(CordRepRing::Prepend(ring, str2)); + ASSERT_THAT(result, IsValidRingBuffer()); + EXPECT_THAT(result, EqIfPrivate(GetParam(), ring)); + EXPECT_THAT(result, NeIfShared(GetParam(), ring)); + EXPECT_THAT(result->length, Eq(4 + str2.size())); + if (GetParam().refcount_is_one) { + EXPECT_THAT(ToFlats(result), ElementsAre(StrCat(str2, "1234"))); + } else { + EXPECT_THAT(ToFlats(result), ElementsAre(str2, "1234")); + } +} + +TEST_P(CordRingBuildTest, PrependStringHavingSharedExtra) { + absl::string_view str1 = "123456789_ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + absl::string_view str2 = "abcdefghij"; + absl::string_view str1a = str1.substr(10); + for (int shared_type = 1; shared_type < 2; ++shared_type) { + SCOPED_TRACE(absl::StrCat("Shared extra type ", shared_type)); + + // Create a flat that is shared in some way. + CordRep* flat = nullptr; + CordRep* flat1 = nullptr; + if (shared_type == 1) { + // Shared flat inside private substring + flat = RemovePrefix(10, flat1 = CordRep::Ref(MakeFlat(str1))); + } else { + // Private flat inside shared substring + flat = CordRep::Ref(RemovePrefix(10, MakeFlat(str1, 100))); + } + + CordRepRing* ring = CreateWithCapacity(flat, 1); + CordRepRing* result = NeedsUnref(CordRepRing::Prepend(ring, str2)); + ASSERT_THAT(result, IsValidRingBuffer()); + EXPECT_THAT(result->length, Eq(str1a.size() + str2.size())); + EXPECT_THAT(result, EqIfPrivateAndCapacity(GetParam(), ring)); + EXPECT_THAT(result, NeIfShared(GetParam(), ring)); + EXPECT_THAT(ToFlats(result), ElementsAre(str2, str1a)); + CordRep::Unref(shared_type == 1 ? flat1 : flat); + } +} + +TEST_P(CordRingBuildTest, PrependStringWithExtra) { + absl::string_view str1 = "1234"; + absl::string_view str2 = "1234567890"; + absl::string_view str3 = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + CordRepRing* ring = CreateWithCapacity(MakeExternal(str1), 1); + CordRepRing* result = NeedsUnref(CordRepRing::Prepend(ring, str2, 26)); + ASSERT_THAT(result, IsValidRingBuffer()); + result = CordRepRing::Prepend(result, str3); + EXPECT_THAT(result->length, Eq(str1.size() + str2.size() + str3.size())); + EXPECT_THAT(result, EqIfPrivateAndCapacity(GetParam(), ring)); + EXPECT_THAT(ToFlats(result), ElementsAre(StrCat(str3, str2), str1)); +} + +TEST_P(CordRingBuildTest, AppendPrependStringMix) { + const auto& flats = kFoxFlats; + CordRepRing* ring = CreateWithCapacity(MakeFlat(flats[4]), 8); + CordRepRing* result = ring; + for (int i = 1; i <= 4; ++i) { + result = CordRepRing::Prepend(result, flats[4 - i]); + result = CordRepRing::Append(result, flats[4 + i]); + } + NeedsUnref(result); + ASSERT_THAT(result, IsValidRingBuffer()); + EXPECT_THAT(result, EqIfPrivateAndCapacity(GetParam(), ring)); + EXPECT_THAT(ToString(result), kFox); +} + +TEST_P(CordRingBuildTest, AppendPrependStringMixWithExtra) { + const auto& flats = kFoxFlats; + CordRepRing* ring = CreateWithCapacity(MakeFlat(flats[4], 100), 8); + CordRepRing* result = ring; + for (int i = 1; i <= 4; ++i) { + result = CordRepRing::Prepend(result, flats[4 - i], 100); + result = CordRepRing::Append(result, flats[4 + i], 100); + } + NeedsUnref(result); + ASSERT_THAT(result, IsValidRingBuffer()); + EXPECT_THAT(result, EqIfPrivateAndCapacity(GetParam(), ring)); + if (GetParam().refcount_is_one) { + EXPECT_THAT(ToFlats(result), + ElementsAre("The quick brown fox ", "jumps over the lazy dog")); + } else { + EXPECT_THAT(ToFlats(result), ElementsAre("The quick brown fox ", "jumps ", + "over the lazy dog")); + } +} + +TEST_P(CordRingBuildTest, AppendPrependStringMixWithPrependedExtra) { + const auto& flats = kFoxFlats; + CordRep* flat = MakeFlat(StrCat(std::string(50, '.'), flats[4]), 50); + CordRepRing* ring = CreateWithCapacity(RemovePrefix(50, flat), 0); + CordRepRing* result = ring; + for (int i = 1; i <= 4; ++i) { + result = CordRepRing::Prepend(result, flats[4 - i], 100); + result = CordRepRing::Append(result, flats[4 + i], 100); + } + result = NeedsUnref(result); + ASSERT_THAT(result, IsValidRingBuffer()); + EXPECT_THAT(result, EqIfPrivate(GetParam(), ring)); + if (GetParam().refcount_is_one) { + EXPECT_THAT(ToFlats(result), ElementsAre(kFox)); + } else { + EXPECT_THAT(ToFlats(result), ElementsAre("The quick brown fox ", "jumps ", + "over the lazy dog")); + } +} + +TEST_P(CordRingSubTest, SubRing) { + auto composition = RandomComposition(); + SCOPED_TRACE(ToString(composition)); + auto flats = MakeSpan(kFoxFlats); + string_view all = kFox; + for (size_t offset = 0; offset < all.size() - 1; ++offset) { + CordRepRing* ring = RefIfShared(FromFlats(flats, composition)); + CordRepRing* result = CordRepRing::SubRing(ring, offset, 0); + EXPECT_THAT(result, nullptr); + + for (size_t len = 1; len < all.size() - offset; ++len) { + ring = RefIfShared(FromFlats(flats, composition)); + result = NeedsUnref(CordRepRing::SubRing(ring, offset, len)); + ASSERT_THAT(result, IsValidRingBuffer()); + ASSERT_THAT(result, EqIfPrivate(GetParam(), ring)); + ASSERT_THAT(result, NeIfShared(GetParam(), ring)); + ASSERT_THAT(ToString(result), Eq(all.substr(offset, len))); + } + } +} + +TEST_P(CordRingSubTest, SubRingFromLargeExternal) { + auto composition = RandomComposition(); + std::string large_string(1 << 20, '.'); + const char* flats[] = { + "abcdefghijklmnopqrstuvwxyz", + large_string.c_str(), + "ABCDEFGHIJKLMNOPQRSTUVWXYZ", + }; + std::string buffer = absl::StrCat(flats[0], flats[1], flats[2]); + absl::string_view all = buffer; + for (size_t offset = 0; offset < 30; ++offset) { + CordRepRing* ring = RefIfShared(FromFlats(flats, composition)); + CordRepRing* result = CordRepRing::SubRing(ring, offset, 0); + EXPECT_THAT(result, nullptr); + + for (size_t len = all.size() - 30; len < all.size() - offset; ++len) { + ring = RefIfShared(FromFlats(flats, composition)); + result = NeedsUnref(CordRepRing::SubRing(ring, offset, len)); + ASSERT_THAT(result, IsValidRingBuffer()); + ASSERT_THAT(result, EqIfPrivate(GetParam(), ring)); + ASSERT_THAT(result, NeIfShared(GetParam(), ring)); + auto str = ToString(result); + ASSERT_THAT(str, SizeIs(len)); + ASSERT_THAT(str, Eq(all.substr(offset, len))); + } + } +} + +TEST_P(CordRingSubTest, RemovePrefix) { + auto composition = RandomComposition(); + SCOPED_TRACE(ToString(composition)); + auto flats = MakeSpan(kFoxFlats); + string_view all = kFox; + CordRepRing* ring = RefIfShared(FromFlats(flats, composition)); + CordRepRing* result = CordRepRing::RemovePrefix(ring, all.size()); + EXPECT_THAT(result, nullptr); + + for (size_t len = 1; len < all.size(); ++len) { + ring = RefIfShared(FromFlats(flats, composition)); + result = NeedsUnref(CordRepRing::RemovePrefix(ring, len)); + ASSERT_THAT(result, IsValidRingBuffer()); + EXPECT_THAT(result, EqIfPrivate(GetParam(), ring)); + ASSERT_THAT(result, NeIfShared(GetParam(), ring)); + EXPECT_THAT(ToString(result), Eq(all.substr(len))); + } +} + +TEST_P(CordRingSubTest, RemovePrefixFromLargeExternal) { + CordRepExternal* external1 = MakeFakeExternal(1 << 20); + CordRepExternal* external2 = MakeFakeExternal(1 << 20); + CordRepRing* ring = CordRepRing::Create(external1, 1); + ring = CordRepRing::Append(ring, external2); + CordRepRing* result = NeedsUnref(CordRepRing::RemovePrefix(ring, 1 << 16)); + EXPECT_THAT( + ToRawFlats(result), + ElementsAre( + not_a_string_view(external1->base, 1 << 20).remove_prefix(1 << 16), + not_a_string_view(external2->base, 1 << 20))); +} + +TEST_P(CordRingSubTest, RemoveSuffix) { + auto composition = RandomComposition(); + SCOPED_TRACE(ToString(composition)); + auto flats = MakeSpan(kFoxFlats); + string_view all = kFox; + CordRepRing* ring = RefIfShared(FromFlats(flats, composition)); + CordRepRing* result = CordRepRing::RemoveSuffix(ring, all.size()); + EXPECT_THAT(result, nullptr); + + for (size_t len = 1; len < all.size(); ++len) { + ring = RefIfShared(FromFlats(flats, composition)); + result = NeedsUnref(CordRepRing::RemoveSuffix(ring, len)); + ASSERT_THAT(result, IsValidRingBuffer()); + ASSERT_THAT(result, EqIfPrivate(GetParam(), ring)); + ASSERT_THAT(result, NeIfShared(GetParam(), ring)); + ASSERT_THAT(ToString(result), Eq(all.substr(0, all.size() - len))); + } +} + +TEST_P(CordRingSubTest, AppendRing) { + auto composition = RandomComposition(); + SCOPED_TRACE(ToString(composition)); + auto flats = MakeSpan(kFoxFlats).subspan(1); + CordRepRing* ring = CreateWithCapacity(MakeFlat(kFoxFlats[0]), flats.size()); + CordRepRing* child = FromFlats(flats, composition); + CordRepRing* result = NeedsUnref(CordRepRing::Append(ring, child)); + ASSERT_THAT(result, IsValidRingBuffer()); + EXPECT_THAT(result, EqIfPrivate(GetParam(), ring)); + EXPECT_THAT(result, NeIfShared(GetParam(), ring)); + EXPECT_THAT(ToFlats(result), ElementsAreArray(kFoxFlats)); +} + +TEST_P(CordRingBuildInputTest, AppendRingWithFlatOffset) { + auto composition = RandomComposition(); + SCOPED_TRACE(ToString(composition)); + auto flats = MakeSpan(kFoxFlats); + CordRepRing* ring = CreateWithCapacity(MakeFlat("Head"), flats.size()); + CordRep* child = RefIfInputSharedIndirect(FromFlats(flats, composition)); + CordRep* stripped = RemovePrefix(10, child); + CordRepRing* result = NeedsUnref(CordRepRing::Append(ring, stripped)); + ASSERT_THAT(result, IsValidRingBuffer()); + EXPECT_THAT(result, EqIfPrivateAndCapacity(GetParam(), ring)); + EXPECT_THAT(result, NeIfShared(GetParam(), ring)); + EXPECT_THAT(ToFlats(result), ElementsAre("Head", "brown ", "fox ", "jumps ", + "over ", "the ", "lazy ", "dog")); +} + +TEST_P(CordRingBuildInputTest, AppendRingWithBrokenOffset) { + auto composition = RandomComposition(); + SCOPED_TRACE(ToString(composition)); + auto flats = MakeSpan(kFoxFlats); + CordRepRing* ring = CreateWithCapacity(MakeFlat("Head"), flats.size()); + CordRep* child = RefIfInputSharedIndirect(FromFlats(flats, composition)); + CordRep* stripped = RemovePrefix(21, child); + CordRepRing* result = NeedsUnref(CordRepRing::Append(ring, stripped)); + ASSERT_THAT(result, IsValidRingBuffer()); + EXPECT_THAT(result, EqIfPrivateAndCapacity(GetParam(), ring)); + EXPECT_THAT(result, NeIfShared(GetParam(), ring)); + EXPECT_THAT(ToFlats(result), + ElementsAre("Head", "umps ", "over ", "the ", "lazy ", "dog")); +} + +TEST_P(CordRingBuildInputTest, AppendRingWithFlatLength) { + auto composition = RandomComposition(); + SCOPED_TRACE(ToString(composition)); + auto flats = MakeSpan(kFoxFlats); + CordRepRing* ring = CreateWithCapacity(MakeFlat("Head"), flats.size()); + CordRep* child = RefIfInputSharedIndirect(FromFlats(flats, composition)); + CordRep* stripped = RemoveSuffix(8, child); + CordRepRing* result = NeedsUnref(CordRepRing::Append(ring, stripped)); + ASSERT_THAT(result, IsValidRingBuffer()); + EXPECT_THAT(result, EqIfPrivateAndCapacity(GetParam(), ring)); + EXPECT_THAT(result, NeIfShared(GetParam(), ring)); + EXPECT_THAT(ToFlats(result), ElementsAre("Head", "The ", "quick ", "brown ", + "fox ", "jumps ", "over ", "the ")); +} + +TEST_P(CordRingBuildTest, AppendRingWithBrokenFlatLength) { + auto composition = RandomComposition(); + SCOPED_TRACE(ToString(composition)); + auto flats = MakeSpan(kFoxFlats); + CordRepRing* ring = CreateWithCapacity(MakeFlat("Head"), flats.size()); + CordRep* child = RefIfInputSharedIndirect(FromFlats(flats, composition)); + CordRep* stripped = RemoveSuffix(15, child); + CordRepRing* result = NeedsUnref(CordRepRing::Append(ring, stripped)); + ASSERT_THAT(result, IsValidRingBuffer()); + EXPECT_THAT(result, EqIfPrivateAndCapacity(GetParam(), ring)); + EXPECT_THAT(result, NeIfShared(GetParam(), ring)); + EXPECT_THAT(ToFlats(result), ElementsAre("Head", "The ", "quick ", "brown ", + "fox ", "jumps ", "ov")); +} + +TEST_P(CordRingBuildTest, AppendRingMiddlePiece) { + auto composition = RandomComposition(); + SCOPED_TRACE(ToString(composition)); + auto flats = MakeSpan(kFoxFlats); + CordRepRing* ring = CreateWithCapacity(MakeFlat("Head"), flats.size()); + CordRep* child = RefIfInputSharedIndirect(FromFlats(flats, composition)); + CordRep* stripped = MakeSubstring(7, child->length - 27, child); + CordRepRing* result = NeedsUnref(CordRepRing::Append(ring, stripped)); + ASSERT_THAT(result, IsValidRingBuffer()); + EXPECT_THAT(result, EqIfPrivateAndCapacity(GetParam(), ring)); + EXPECT_THAT(result, NeIfShared(GetParam(), ring)); + EXPECT_THAT(ToFlats(result), + ElementsAre("Head", "ck ", "brown ", "fox ", "jum")); +} + +TEST_P(CordRingBuildTest, AppendRingSinglePiece) { + auto composition = RandomComposition(); + SCOPED_TRACE(ToString(composition)); + auto flats = MakeSpan(kFoxFlats); + CordRepRing* ring = CreateWithCapacity(MakeFlat("Head"), flats.size()); + CordRep* child = RefIfInputSharedIndirect(FromFlats(flats, composition)); + CordRep* stripped = RefIfInputShared(MakeSubstring(11, 3, child)); + CordRepRing* result = NeedsUnref(CordRepRing::Append(ring, stripped)); + ASSERT_THAT(result, IsValidRingBuffer()); + EXPECT_THAT(result, EqIfPrivateAndCapacity(GetParam(), ring)); + EXPECT_THAT(result, NeIfShared(GetParam(), ring)); + EXPECT_THAT(ToFlats(result), ElementsAre("Head", "row")); +} + +TEST_P(CordRingBuildInputTest, AppendRingSinglePieceWithPrefix) { + auto composition = RandomComposition(); + SCOPED_TRACE(ToString(composition)); + auto flats = MakeSpan(kFoxFlats); + size_t extra_capacity = 1 + (GetParam().with_capacity ? flats.size() : 0); + CordRepRing* ring = CordRepRing::Create(MakeFlat("Head"), extra_capacity); + ring->SetCapacityForTesting(1 + extra_capacity); + ring = RefIfShared(CordRepRing::Prepend(ring, MakeFlat("Prepend"))); + assert(ring->IsValid(std::cout)); + CordRepRing* child = RefIfInputSharedIndirect(FromFlats(flats, composition)); + CordRep* stripped = RefIfInputShared(MakeSubstring(11, 3, child)); + CordRepRing* result = NeedsUnref(CordRepRing::Append(ring, stripped)); + ASSERT_THAT(result, IsValidRingBuffer()); + EXPECT_THAT(result, EqIfPrivateAndCapacity(GetParam(), ring)); + EXPECT_THAT(result, NeIfShared(GetParam(), ring)); + EXPECT_THAT(ToFlats(result), ElementsAre("Prepend", "Head", "row")); +} + +TEST_P(CordRingBuildInputTest, PrependRing) { + auto composition = RandomComposition(); + SCOPED_TRACE(ToString(composition)); + auto fox = MakeSpan(kFoxFlats); + auto flats = MakeSpan(fox).subspan(0, fox.size() - 1); + CordRepRing* ring = CreateWithCapacity(MakeFlat(fox.back()), flats.size()); + CordRepRing* child = RefIfInputShared(FromFlats(flats, composition)); + CordRepRing* result = NeedsUnref(CordRepRing::Prepend(ring, child)); + ASSERT_THAT(result, IsValidRingBuffer()); + EXPECT_THAT(result, EqIfPrivateAndCapacity(GetParam(), ring)); + EXPECT_THAT(result, NeIfShared(GetParam(), ring)); + EXPECT_THAT(ToFlats(result), ElementsAreArray(kFoxFlats)); +} + +TEST_P(CordRingBuildInputTest, PrependRingWithFlatOffset) { + auto composition = RandomComposition(); + SCOPED_TRACE(ToString(composition)); + auto flats = MakeSpan(kFoxFlats); + CordRepRing* ring = CreateWithCapacity(MakeFlat("Tail"), flats.size()); + CordRep* child = RefIfInputShared(FromFlats(flats, composition)); + CordRep* stripped = RefIfInputSharedIndirect(RemovePrefix(10, child)); + CordRepRing* result = NeedsUnref(CordRepRing::Prepend(ring, stripped)); + ASSERT_THAT(result, IsValidRingBuffer()); + EXPECT_THAT(result, EqIfPrivateAndCapacity(GetParam(), ring)); + EXPECT_THAT(result, NeIfShared(GetParam(), ring)); + EXPECT_THAT(ToFlats(result), ElementsAre("brown ", "fox ", "jumps ", "over ", + "the ", "lazy ", "dog", "Tail")); +} + +TEST_P(CordRingBuildInputTest, PrependRingWithBrokenOffset) { + auto composition = RandomComposition(); + SCOPED_TRACE(ToString(composition)); + auto flats = MakeSpan(kFoxFlats); + CordRepRing* ring = CreateWithCapacity(MakeFlat("Tail"), flats.size()); + CordRep* child = RefIfInputShared(FromFlats(flats, composition)); + CordRep* stripped = RefIfInputSharedIndirect(RemovePrefix(21, child)); + CordRepRing* result = NeedsUnref(CordRepRing::Prepend(ring, stripped)); + EXPECT_THAT(result, EqIfPrivateAndCapacity(GetParam(), ring)); + EXPECT_THAT(result, NeIfShared(GetParam(), ring)); + EXPECT_THAT(ToFlats(result), + ElementsAre("umps ", "over ", "the ", "lazy ", "dog", "Tail")); +} + +TEST_P(CordRingBuildInputTest, PrependRingWithFlatLength) { + auto composition = RandomComposition(); + SCOPED_TRACE(ToString(composition)); + auto flats = MakeSpan(kFoxFlats); + CordRepRing* ring = CreateWithCapacity(MakeFlat("Tail"), flats.size()); + CordRep* child = RefIfInputShared(FromFlats(flats, composition)); + CordRep* stripped = RefIfInputSharedIndirect(RemoveSuffix(8, child)); + CordRepRing* result = NeedsUnref(CordRepRing::Prepend(ring, stripped)); + ASSERT_THAT(result, IsValidRingBuffer()); + EXPECT_THAT(result, EqIfPrivateAndCapacity(GetParam(), ring)); + EXPECT_THAT(result, NeIfShared(GetParam(), ring)); + EXPECT_THAT(ToFlats(result), ElementsAre("The ", "quick ", "brown ", "fox ", + "jumps ", "over ", "the ", "Tail")); +} + +TEST_P(CordRingBuildInputTest, PrependRingWithBrokenFlatLength) { + auto composition = RandomComposition(); + SCOPED_TRACE(ToString(composition)); + auto flats = MakeSpan(kFoxFlats); + CordRepRing* ring = CreateWithCapacity(MakeFlat("Tail"), flats.size()); + CordRep* child = RefIfInputShared(FromFlats(flats, composition)); + CordRep* stripped = RefIfInputSharedIndirect(RemoveSuffix(15, child)); + CordRepRing* result = NeedsUnref(CordRepRing::Prepend(ring, stripped)); + ASSERT_THAT(result, IsValidRingBuffer()); + EXPECT_THAT(result, EqIfPrivateAndCapacity(GetParam(), ring)); + EXPECT_THAT(result, NeIfShared(GetParam(), ring)); + EXPECT_THAT(ToFlats(result), ElementsAre("The ", "quick ", "brown ", "fox ", + "jumps ", "ov", "Tail")); +} + +TEST_P(CordRingBuildInputTest, PrependRingMiddlePiece) { + auto composition = RandomComposition(); + SCOPED_TRACE(ToString(composition)); + auto flats = MakeSpan(kFoxFlats); + CordRepRing* ring = CreateWithCapacity(MakeFlat("Tail"), flats.size()); + CordRep* child = RefIfInputShared(FromFlats(flats, composition)); + CordRep* stripped = + RefIfInputSharedIndirect(MakeSubstring(7, child->length - 27, child)); + CordRepRing* result = NeedsUnref(CordRepRing::Prepend(ring, stripped)); + ASSERT_THAT(result, IsValidRingBuffer()); + EXPECT_THAT(result, EqIfPrivateAndCapacity(GetParam(), ring)); + EXPECT_THAT(result, NeIfShared(GetParam(), ring)); + EXPECT_THAT(ToFlats(result), + ElementsAre("ck ", "brown ", "fox ", "jum", "Tail")); +} + +TEST_P(CordRingBuildInputTest, PrependRingSinglePiece) { + auto composition = RandomComposition(); + SCOPED_TRACE(ToString(composition)); + auto flats = MakeSpan(kFoxFlats); + CordRepRing* ring = CreateWithCapacity(MakeFlat("Tail"), flats.size()); + CordRep* child = RefIfInputShared(FromFlats(flats, composition)); + CordRep* stripped = RefIfInputSharedIndirect(MakeSubstring(11, 3, child)); + CordRepRing* result = NeedsUnref(CordRepRing::Prepend(ring, stripped)); + ASSERT_THAT(result, IsValidRingBuffer()); + EXPECT_THAT(result, EqIfPrivateAndCapacity(GetParam(), ring)); + EXPECT_THAT(result, NeIfShared(GetParam(), ring)); + EXPECT_THAT(ToFlats(result), ElementsAre("row", "Tail")); +} + +TEST_P(CordRingBuildInputTest, PrependRingSinglePieceWithPrefix) { + auto composition = RandomComposition(); + SCOPED_TRACE(ToString(composition)); + auto flats = MakeSpan(kFoxFlats); + size_t extra_capacity = 1 + (GetParam().with_capacity ? flats.size() : 0); + CordRepRing* ring = CordRepRing::Create(MakeFlat("Tail"), extra_capacity); + ring->SetCapacityForTesting(1 + extra_capacity); + ring = RefIfShared(CordRepRing::Prepend(ring, MakeFlat("Prepend"))); + CordRep* child = RefIfInputShared(FromFlats(flats, composition)); + CordRep* stripped = RefIfInputSharedIndirect(MakeSubstring(11, 3, child)); + CordRepRing* result = NeedsUnref(CordRepRing::Prepend(ring, stripped)); + ASSERT_THAT(result, IsValidRingBuffer()); + EXPECT_THAT(result, EqIfPrivateAndCapacity(GetParam(), ring)); + EXPECT_THAT(result, NeIfShared(GetParam(), ring)); + EXPECT_THAT(ToFlats(result), ElementsAre("row", "Prepend", "Tail")); +} + +TEST_F(CordRingTest, Find) { + constexpr const char* flats[] = { + "abcdefghij", "klmnopqrst", "uvwxyz", "ABCDEFGHIJ", + "KLMNOPQRST", "UVWXYZ", "1234567890", "~!@#$%^&*()_", + "+-=", "[]\\{}|;':", ",/<>?", "."}; + auto composition = RandomComposition(); + SCOPED_TRACE(ToString(composition)); + CordRepRing* ring = NeedsUnref(FromFlats(flats, composition)); + std::string value = ToString(ring); + for (int i = 0; i < value.length(); ++i) { + CordRepRing::Position found = ring->Find(i); + auto data = ring->entry_data(found.index); + ASSERT_THAT(found.offset, Lt(data.length())); + ASSERT_THAT(data[found.offset], Eq(value[i])); + } +} + +TEST_F(CordRingTest, FindWithHint) { + constexpr const char* flats[] = { + "abcdefghij", "klmnopqrst", "uvwxyz", "ABCDEFGHIJ", + "KLMNOPQRST", "UVWXYZ", "1234567890", "~!@#$%^&*()_", + "+-=", "[]\\{}|;':", ",/<>?", "."}; + auto composition = RandomComposition(); + SCOPED_TRACE(ToString(composition)); + CordRepRing* ring = NeedsUnref(FromFlats(flats, composition)); + std::string value = ToString(ring); + +#if defined(GTEST_HAS_DEATH_TEST) + // Test hint beyond valid position + index_type head = ring->head(); + EXPECT_DEBUG_DEATH(ring->Find(ring->advance(head), 0), ".*"); + EXPECT_DEBUG_DEATH(ring->Find(ring->advance(head), 9), ".*"); + EXPECT_DEBUG_DEATH(ring->Find(ring->advance(head, 3), 24), ".*"); +#endif + + int flat_pos = 0; + size_t flat_offset = 0; + for (auto sflat : flats) { + string_view flat(sflat); + for (int offset = 0; offset < flat.length(); ++offset) { + for (int start = 0; start <= flat_pos; ++start) { + index_type hint = ring->advance(ring->head(), start); + CordRepRing::Position found = ring->Find(hint, flat_offset + offset); + ASSERT_THAT(found.index, Eq(ring->advance(ring->head(), flat_pos))); + ASSERT_THAT(found.offset, Eq(offset)); + } + } + ++flat_pos; + flat_offset += flat.length(); + } +} + +TEST_F(CordRingTest, FindInLargeRing) { + constexpr const char* flats[] = { + "abcdefghij", "klmnopqrst", "uvwxyz", "ABCDEFGHIJ", + "KLMNOPQRST", "UVWXYZ", "1234567890", "~!@#$%^&*()_", + "+-=", "[]\\{}|;':", ",/<>?", "."}; + auto composition = RandomComposition(); + SCOPED_TRACE(ToString(composition)); + CordRepRing* ring = FromFlats(flats, composition); + for (int i = 0; i < 13; ++i) { + ring = CordRepRing::Append(ring, FromFlats(flats, composition)); + } + NeedsUnref(ring); + std::string value = ToString(ring); + for (int i = 0; i < value.length(); ++i) { + CordRepRing::Position pos = ring->Find(i); + auto data = ring->entry_data(pos.index); + ASSERT_THAT(pos.offset, Lt(data.length())); + ASSERT_THAT(data[pos.offset], Eq(value[i])); + } +} + +TEST_F(CordRingTest, FindTail) { + constexpr const char* flats[] = { + "abcdefghij", "klmnopqrst", "uvwxyz", "ABCDEFGHIJ", + "KLMNOPQRST", "UVWXYZ", "1234567890", "~!@#$%^&*()_", + "+-=", "[]\\{}|;':", ",/<>?", "."}; + auto composition = RandomComposition(); + SCOPED_TRACE(ToString(composition)); + CordRepRing* ring = NeedsUnref(FromFlats(flats, composition)); + std::string value = ToString(ring); + + for (int i = 0; i < value.length(); ++i) { + CordRepRing::Position pos = ring->FindTail(i + 1); + auto data = ring->entry_data(ring->retreat(pos.index)); + ASSERT_THAT(pos.offset, Lt(data.length())); + ASSERT_THAT(data[data.length() - pos.offset - 1], Eq(value[i])); + } +} + +TEST_F(CordRingTest, FindTailWithHint) { + constexpr const char* flats[] = { + "abcdefghij", "klmnopqrst", "uvwxyz", "ABCDEFGHIJ", + "KLMNOPQRST", "UVWXYZ", "1234567890", "~!@#$%^&*()_", + "+-=", "[]\\{}|;':", ",/<>?", "."}; + auto composition = RandomComposition(); + SCOPED_TRACE(ToString(composition)); + CordRepRing* ring = NeedsUnref(FromFlats(flats, composition)); + std::string value = ToString(ring); + + // Test hint beyond valid position +#if defined(GTEST_HAS_DEATH_TEST) + index_type head = ring->head(); + EXPECT_DEBUG_DEATH(ring->FindTail(ring->advance(head), 1), ".*"); + EXPECT_DEBUG_DEATH(ring->FindTail(ring->advance(head), 10), ".*"); + EXPECT_DEBUG_DEATH(ring->FindTail(ring->advance(head, 3), 26), ".*"); +#endif + + for (int i = 0; i < value.length(); ++i) { + CordRepRing::Position pos = ring->FindTail(i + 1); + auto data = ring->entry_data(ring->retreat(pos.index)); + ASSERT_THAT(pos.offset, Lt(data.length())); + ASSERT_THAT(data[data.length() - pos.offset - 1], Eq(value[i])); + } +} + +TEST_F(CordRingTest, FindTailInLargeRing) { + constexpr const char* flats[] = { + "abcdefghij", "klmnopqrst", "uvwxyz", "ABCDEFGHIJ", + "KLMNOPQRST", "UVWXYZ", "1234567890", "~!@#$%^&*()_", + "+-=", "[]\\{}|;':", ",/<>?", "."}; + auto composition = RandomComposition(); + SCOPED_TRACE(ToString(composition)); + CordRepRing* ring = FromFlats(flats, composition); + for (int i = 0; i < 13; ++i) { + ring = CordRepRing::Append(ring, FromFlats(flats, composition)); + } + NeedsUnref(ring); + std::string value = ToString(ring); + for (int i = 0; i < value.length(); ++i) { + CordRepRing::Position pos = ring->FindTail(i + 1); + auto data = ring->entry_data(ring->retreat(pos.index)); + ASSERT_THAT(pos.offset, Lt(data.length())); + ASSERT_THAT(data[data.length() - pos.offset - 1], Eq(value[i])); + } +} + +TEST_F(CordRingTest, GetCharacter) { + auto flats = MakeSpan(kFoxFlats); + CordRepRing* ring = CordRepRing::Create(MakeFlat("Tail"), flats.size()); + CordRep* child = FromFlats(flats, kAppend); + CordRepRing* result = NeedsUnref(CordRepRing::Prepend(ring, child)); + std::string value = ToString(result); + for (int i = 0; i < value.length(); ++i) { + ASSERT_THAT(result->GetCharacter(i), Eq(value[i])); + } +} + +TEST_F(CordRingTest, GetCharacterWithSubstring) { + absl::string_view str1 = "abcdefghijklmnopqrstuvwxyz"; + auto* child = MakeSubstring(4, 20, MakeFlat(str1)); + CordRepRing* result = NeedsUnref(CordRepRing::Create(child)); + ASSERT_THAT(result, IsValidRingBuffer()); + std::string value = ToString(result); + for (int i = 0; i < value.length(); ++i) { + ASSERT_THAT(result->GetCharacter(i), Eq(value[i])); + } +} + +TEST_F(CordRingTest, IsFlatSingleFlat) { + for (bool external : {false, true}) { + SCOPED_TRACE(external ? "With External" : "With Flat"); + absl::string_view str = "Hello world"; + CordRep* rep = external ? MakeExternal(str) : MakeFlat(str); + CordRepRing* ring = NeedsUnref(CordRepRing::Create(rep)); + + // The ring is a single non-fragmented flat: + absl::string_view fragment; + EXPECT_TRUE(ring->IsFlat(nullptr)); + EXPECT_TRUE(ring->IsFlat(&fragment)); + EXPECT_THAT(fragment, Eq("Hello world")); + fragment = ""; + EXPECT_TRUE(ring->IsFlat(0, 11, nullptr)); + EXPECT_TRUE(ring->IsFlat(0, 11, &fragment)); + EXPECT_THAT(fragment, Eq("Hello world")); + + // Arbitrary ranges must check true as well. + EXPECT_TRUE(ring->IsFlat(1, 4, &fragment)); + EXPECT_THAT(fragment, Eq("ello")); + EXPECT_TRUE(ring->IsFlat(6, 5, &fragment)); + EXPECT_THAT(fragment, Eq("world")); + } +} + +TEST_F(CordRingTest, IsFlatMultiFlat) { + for (bool external : {false, true}) { + SCOPED_TRACE(external ? "With External" : "With Flat"); + absl::string_view str1 = "Hello world"; + absl::string_view str2 = "Halt and catch fire"; + CordRep* rep1 = external ? MakeExternal(str1) : MakeFlat(str1); + CordRep* rep2 = external ? MakeExternal(str2) : MakeFlat(str2); + CordRepRing* ring = CordRepRing::Append(CordRepRing::Create(rep1), rep2); + NeedsUnref(ring); + + // The ring is fragmented, IsFlat() on the entire cord must be false. + EXPECT_FALSE(ring->IsFlat(nullptr)); + absl::string_view fragment = "Don't touch this"; + EXPECT_FALSE(ring->IsFlat(&fragment)); + EXPECT_THAT(fragment, Eq("Don't touch this")); + + // Check for ranges exactly within both flats. + EXPECT_TRUE(ring->IsFlat(0, 11, &fragment)); + EXPECT_THAT(fragment, Eq("Hello world")); + EXPECT_TRUE(ring->IsFlat(11, 19, &fragment)); + EXPECT_THAT(fragment, Eq("Halt and catch fire")); + + // Check for arbitrary partial range inside each flat. + EXPECT_TRUE(ring->IsFlat(1, 4, &fragment)); + EXPECT_THAT(fragment, "ello"); + EXPECT_TRUE(ring->IsFlat(26, 4, &fragment)); + EXPECT_THAT(fragment, "fire"); + + // Check ranges spanning across both flats + fragment = "Don't touch this"; + EXPECT_FALSE(ring->IsFlat(1, 18, &fragment)); + EXPECT_FALSE(ring->IsFlat(10, 2, &fragment)); + EXPECT_THAT(fragment, Eq("Don't touch this")); + } +} + +TEST_F(CordRingTest, Dump) { + std::stringstream ss; + auto flats = MakeSpan(kFoxFlats); + CordRepRing* ring = NeedsUnref(FromFlats(flats, kPrepend)); + ss << *ring; +} + +} // namespace +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil-cpp/absl/strings/cord_test.cc b/third_party/abseil-cpp/absl/strings/cord_test.cc index 434f3a247e..cced9bba39 100644 --- a/third_party/abseil-cpp/absl/strings/cord_test.cc +++ b/third_party/abseil-cpp/absl/strings/cord_test.cc @@ -1,3 +1,17 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + #include "absl/strings/cord.h" #include <algorithm> @@ -18,9 +32,13 @@ #include "absl/base/config.h" #include "absl/base/internal/endian.h" #include "absl/base/internal/raw_logging.h" +#include "absl/base/macros.h" #include "absl/container/fixed_array.h" +#include "absl/random/random.h" #include "absl/strings/cord_test_helpers.h" +#include "absl/strings/cordz_test_helpers.h" #include "absl/strings/str_cat.h" +#include "absl/strings/str_format.h" #include "absl/strings/string_view.h" typedef std::mt19937_64 RandomEngine; @@ -70,9 +88,8 @@ static std::string RandomLowercaseString(RandomEngine* rng) { static std::string RandomLowercaseString(RandomEngine* rng, size_t length) { std::string result(length, '\0'); std::uniform_int_distribution<int> chars('a', 'z'); - std::generate(result.begin(), result.end(), [&]() { - return static_cast<char>(chars(*rng)); - }); + std::generate(result.begin(), result.end(), + [&]() { return static_cast<char>(chars(*rng)); }); return result; } @@ -166,16 +183,60 @@ class CordTestPeer { const Cord& c, absl::FunctionRef<void(absl::string_view)> callback) { c.ForEachChunk(callback); } + + static bool IsTree(const Cord& c) { return c.contents_.is_tree(); } + + static cord_internal::CordzInfo* GetCordzInfo(const Cord& c) { + return c.contents_.cordz_info(); + } + + static Cord MakeSubstring(Cord src, size_t offset, size_t length) { + ABSL_RAW_CHECK(src.contents_.is_tree(), "Can not be inlined"); + Cord cord; + auto* rep = new cord_internal::CordRepSubstring; + rep->tag = cord_internal::SUBSTRING; + rep->child = cord_internal::CordRep::Ref(src.contents_.tree()); + rep->start = offset; + rep->length = length; + cord.contents_.EmplaceTree(rep, + cord_internal::CordzUpdateTracker::kSubCord); + return cord; + } }; ABSL_NAMESPACE_END } // namespace absl -TEST(Cord, AllFlatSizes) { +// The CordTest fixture runs all tests with and without Cord Btree enabled. +class CordTest : public testing::TestWithParam<bool> { + public: + CordTest() : was_btree_(absl::cord_internal::cord_btree_enabled.load()) { + absl::cord_internal::cord_btree_enabled.store(UseBtree()); + } + ~CordTest() override { + absl::cord_internal::cord_btree_enabled.store(was_btree_); + } + + // Returns true if test is running with btree enabled. + bool UseBtree() const { return GetParam(); } + + // Returns human readable string representation of the test parameter. + static std::string ToString(testing::TestParamInfo<bool> param) { + return param.param ? "Btree" : "Concat"; + } + + private: + const bool was_btree_; +}; + +INSTANTIATE_TEST_SUITE_P(WithParam, CordTest, testing::Bool(), + CordTest::ToString); + +TEST_P(CordTest, AllFlatSizes) { using absl::strings_internal::CordTestAccess; for (size_t s = 0; s < CordTestAccess::MaxFlatLength(); s++) { - // Make a std::string of length s. + // Make a string of length s. std::string src; while (src.size() < s) { src.push_back('a' + (src.size() % 26)); @@ -189,7 +250,7 @@ TEST(Cord, AllFlatSizes) { // We create a Cord at least 128GB in size using the fact that Cords can // internally reference-count; thus the Cord is enormous without actually // consuming very much memory. -TEST(GigabyteCord, FromExternal) { +TEST_P(CordTest, GigabyteCordFromExternal) { const size_t one_gig = 1024U * 1024U * 1024U; size_t max_size = 2 * one_gig; if (sizeof(max_size) > 4) max_size = 128 * one_gig; @@ -206,7 +267,6 @@ TEST(GigabyteCord, FromExternal) { // caused crashes in production. We grow exponentially so that the code will // execute in a reasonable amount of time. absl::Cord c; - ABSL_RAW_LOG(INFO, "Made a Cord with %zu bytes!", c.size()); c.Append(from); while (c.size() < max_size) { c.Append(c); @@ -239,7 +299,7 @@ static absl::Cord MakeExternalCord(int size) { extern bool my_unique_true_boolean; bool my_unique_true_boolean = true; -TEST(Cord, Assignment) { +TEST_P(CordTest, Assignment) { absl::Cord x(absl::string_view("hi there")); absl::Cord y(x); ASSERT_EQ(std::string(x), "hi there"); @@ -293,7 +353,7 @@ TEST(Cord, Assignment) { } } -TEST(Cord, StartsEndsWith) { +TEST_P(CordTest, StartsEndsWith) { absl::Cord x(absl::string_view("abcde")); absl::Cord empty(""); @@ -326,13 +386,13 @@ TEST(Cord, StartsEndsWith) { ASSERT_TRUE(!empty.EndsWith("xyz")); } -TEST(Cord, Subcord) { - RandomEngine rng(testing::GTEST_FLAG(random_seed)); +TEST_P(CordTest, Subcord) { + RandomEngine rng(GTEST_FLAG_GET(random_seed)); const std::string s = RandomLowercaseString(&rng, 1024); absl::Cord a; AppendWithFragments(s, &rng, &a); - ASSERT_EQ(s.size(), a.size()); + ASSERT_EQ(s, std::string(a)); // Check subcords of a, from a variety of interesting points. std::set<size_t> positions; @@ -350,7 +410,7 @@ TEST(Cord, Subcord) { for (size_t end_pos : positions) { if (end_pos < pos || end_pos > a.size()) continue; absl::Cord sa = a.Subcord(pos, end_pos - pos); - EXPECT_EQ(absl::string_view(s).substr(pos, end_pos - pos), + ASSERT_EQ(absl::string_view(s).substr(pos, end_pos - pos), std::string(sa)) << a; } @@ -362,7 +422,7 @@ TEST(Cord, Subcord) { for (size_t pos = 0; pos <= sh.size(); ++pos) { for (size_t n = 0; n <= sh.size() - pos; ++n) { absl::Cord sc = c.Subcord(pos, n); - EXPECT_EQ(sh.substr(pos, n), std::string(sc)) << c; + ASSERT_EQ(sh.substr(pos, n), std::string(sc)) << c; } } @@ -372,7 +432,7 @@ TEST(Cord, Subcord) { while (sa.size() > 1) { sa = sa.Subcord(1, sa.size() - 2); ss = ss.substr(1, ss.size() - 2); - EXPECT_EQ(ss, std::string(sa)) << a; + ASSERT_EQ(ss, std::string(sa)) << a; if (HasFailure()) break; // halt cascade } @@ -387,7 +447,7 @@ TEST(Cord, Subcord) { EXPECT_TRUE(sa.empty()); } -TEST(Cord, Swap) { +TEST_P(CordTest, Swap) { absl::string_view a("Dexter"); absl::string_view b("Mandark"); absl::Cord x(a); @@ -395,6 +455,9 @@ TEST(Cord, Swap) { swap(x, y); ASSERT_EQ(x, absl::Cord(b)); ASSERT_EQ(y, absl::Cord(a)); + x.swap(y); + ASSERT_EQ(x, absl::Cord(a)); + ASSERT_EQ(y, absl::Cord(b)); } static void VerifyCopyToString(const absl::Cord& cord) { @@ -410,13 +473,13 @@ static void VerifyCopyToString(const absl::Cord& cord) { if (cord.size() <= kInitialLength) { EXPECT_EQ(has_initial_contents.data(), address_before_copy) - << "CopyCordToString allocated new std::string storage; " + << "CopyCordToString allocated new string storage; " "has_initial_contents = \"" << has_initial_contents << "\""; } } -TEST(Cord, CopyToString) { +TEST_P(CordTest, CopyToString) { VerifyCopyToString(absl::Cord()); VerifyCopyToString(absl::Cord("small cord")); VerifyCopyToString( @@ -424,6 +487,80 @@ TEST(Cord, CopyToString) { "copying ", "to ", "a ", "string."})); } +TEST_P(CordTest, TryFlatEmpty) { + absl::Cord c; + EXPECT_EQ(c.TryFlat(), ""); +} + +TEST_P(CordTest, TryFlatFlat) { + absl::Cord c("hello"); + EXPECT_EQ(c.TryFlat(), "hello"); +} + +TEST_P(CordTest, TryFlatSubstrInlined) { + absl::Cord c("hello"); + c.RemovePrefix(1); + EXPECT_EQ(c.TryFlat(), "ello"); +} + +TEST_P(CordTest, TryFlatSubstrFlat) { + absl::Cord c("longer than 15 bytes"); + absl::Cord sub = absl::CordTestPeer::MakeSubstring(c, 1, c.size() - 1); + EXPECT_EQ(sub.TryFlat(), "onger than 15 bytes"); +} + +TEST_P(CordTest, TryFlatConcat) { + absl::Cord c = absl::MakeFragmentedCord({"hel", "lo"}); + EXPECT_EQ(c.TryFlat(), absl::nullopt); +} + +TEST_P(CordTest, TryFlatExternal) { + absl::Cord c = absl::MakeCordFromExternal("hell", [](absl::string_view) {}); + EXPECT_EQ(c.TryFlat(), "hell"); +} + +TEST_P(CordTest, TryFlatSubstrExternal) { + absl::Cord c = absl::MakeCordFromExternal("hell", [](absl::string_view) {}); + absl::Cord sub = absl::CordTestPeer::MakeSubstring(c, 1, c.size() - 1); + EXPECT_EQ(sub.TryFlat(), "ell"); +} + +TEST_P(CordTest, TryFlatSubstrConcat) { + absl::Cord c = absl::MakeFragmentedCord({"hello", " world"}); + absl::Cord sub = absl::CordTestPeer::MakeSubstring(c, 1, c.size() - 1); + EXPECT_EQ(sub.TryFlat(), absl::nullopt); + c.RemovePrefix(1); + EXPECT_EQ(c.TryFlat(), absl::nullopt); +} + +TEST_P(CordTest, TryFlatCommonlyAssumedInvariants) { + // The behavior tested below is not part of the API contract of Cord, but it's + // something we intend to be true in our current implementation. This test + // exists to detect and prevent accidental breakage of the implementation. + absl::string_view fragments[] = {"A fragmented test", + " cord", + " to test subcords", + " of ", + "a", + " cord for", + " each chunk " + "returned by the ", + "iterator"}; + absl::Cord c = absl::MakeFragmentedCord(fragments); + int fragment = 0; + int offset = 0; + absl::Cord::CharIterator itc = c.char_begin(); + for (absl::string_view sv : c.Chunks()) { + absl::string_view expected = fragments[fragment]; + absl::Cord subcord1 = c.Subcord(offset, sv.length()); + absl::Cord subcord2 = absl::Cord::AdvanceAndRead(&itc, sv.size()); + EXPECT_EQ(subcord1.TryFlat(), expected); + EXPECT_EQ(subcord2.TryFlat(), expected); + ++fragment; + offset += sv.length(); + } +} + static bool IsFlat(const absl::Cord& c) { return c.chunk_begin() == c.chunk_end() || ++c.chunk_begin() == c.chunk_end(); } @@ -452,14 +589,14 @@ static void VerifyFlatten(absl::Cord c) { EXPECT_TRUE(IsFlat(c)); } -TEST(Cord, Flatten) { +TEST_P(CordTest, Flatten) { VerifyFlatten(absl::Cord()); VerifyFlatten(absl::Cord("small cord")); VerifyFlatten(absl::Cord("larger than small buffer optimization")); VerifyFlatten(absl::MakeFragmentedCord({"small ", "fragmented ", "cord"})); // Test with a cord that is longer than the largest flat buffer - RandomEngine rng(testing::GTEST_FLAG(random_seed)); + RandomEngine rng(GTEST_FLAG_GET(random_seed)); VerifyFlatten(absl::Cord(RandomLowercaseString(&rng, 8192))); } @@ -506,29 +643,29 @@ class TestData { }; } // namespace -TEST(Cord, MultipleLengths) { +TEST_P(CordTest, MultipleLengths) { TestData d; for (size_t i = 0; i < d.size(); i++) { std::string a = d.data(i); - { // Construct from Cord + { // Construct from Cord absl::Cord tmp(a); absl::Cord x(tmp); EXPECT_EQ(a, std::string(x)) << "'" << a << "'"; } - { // Construct from absl::string_view + { // Construct from absl::string_view absl::Cord x(a); EXPECT_EQ(a, std::string(x)) << "'" << a << "'"; } - { // Append cord to self + { // Append cord to self absl::Cord self(a); self.Append(self); EXPECT_EQ(a + a, std::string(self)) << "'" << a << "' + '" << a << "'"; } - { // Prepend cord to self + { // Prepend cord to self absl::Cord self(a); self.Prepend(self); EXPECT_EQ(a + a, std::string(self)) << "'" << a << "' + '" << a << "'"; @@ -538,40 +675,40 @@ TEST(Cord, MultipleLengths) { for (size_t j = 0; j < d.size(); j++) { std::string b = d.data(j); - { // CopyFrom Cord + { // CopyFrom Cord absl::Cord x(a); absl::Cord y(b); x = y; EXPECT_EQ(b, std::string(x)) << "'" << a << "' + '" << b << "'"; } - { // CopyFrom absl::string_view + { // CopyFrom absl::string_view absl::Cord x(a); x = b; EXPECT_EQ(b, std::string(x)) << "'" << a << "' + '" << b << "'"; } - { // Cord::Append(Cord) + { // Cord::Append(Cord) absl::Cord x(a); absl::Cord y(b); x.Append(y); EXPECT_EQ(a + b, std::string(x)) << "'" << a << "' + '" << b << "'"; } - { // Cord::Append(absl::string_view) + { // Cord::Append(absl::string_view) absl::Cord x(a); x.Append(b); EXPECT_EQ(a + b, std::string(x)) << "'" << a << "' + '" << b << "'"; } - { // Cord::Prepend(Cord) + { // Cord::Prepend(Cord) absl::Cord x(a); absl::Cord y(b); x.Prepend(y); EXPECT_EQ(b + a, std::string(x)) << "'" << b << "' + '" << a << "'"; } - { // Cord::Prepend(absl::string_view) + { // Cord::Prepend(absl::string_view) absl::Cord x(a); x.Prepend(b); EXPECT_EQ(b + a, std::string(x)) << "'" << b << "' + '" << a << "'"; @@ -582,7 +719,7 @@ TEST(Cord, MultipleLengths) { namespace { -TEST(Cord, RemoveSuffixWithExternalOrSubstring) { +TEST_P(CordTest, RemoveSuffixWithExternalOrSubstring) { absl::Cord cord = absl::MakeCordFromExternal( "foo bar baz", [](absl::string_view s) { DoNothing(s, nullptr); }); @@ -597,7 +734,7 @@ TEST(Cord, RemoveSuffixWithExternalOrSubstring) { EXPECT_EQ("foo", std::string(cord)); } -TEST(Cord, RemoveSuffixMakesZeroLengthNode) { +TEST_P(CordTest, RemoveSuffixMakesZeroLengthNode) { absl::Cord c; c.Append(absl::Cord(std::string(100, 'x'))); absl::Cord other_ref = c; // Prevent inplace appends @@ -624,7 +761,7 @@ absl::Cord CordWithZedBlock(size_t size) { } // Establish that ZedBlock does what we think it does. -TEST(CordSpliceTest, ZedBlock) { +TEST_P(CordTest, CordSpliceTestZedBlock) { absl::Cord blob = CordWithZedBlock(10); EXPECT_EQ(10, blob.size()); std::string s; @@ -632,7 +769,7 @@ TEST(CordSpliceTest, ZedBlock) { EXPECT_EQ("zzzzzzzzzz", s); } -TEST(CordSpliceTest, ZedBlock0) { +TEST_P(CordTest, CordSpliceTestZedBlock0) { absl::Cord blob = CordWithZedBlock(0); EXPECT_EQ(0, blob.size()); std::string s; @@ -640,7 +777,7 @@ TEST(CordSpliceTest, ZedBlock0) { EXPECT_EQ("", s); } -TEST(CordSpliceTest, ZedBlockSuffix1) { +TEST_P(CordTest, CordSpliceTestZedBlockSuffix1) { absl::Cord blob = CordWithZedBlock(10); EXPECT_EQ(10, blob.size()); absl::Cord suffix(blob); @@ -652,7 +789,7 @@ TEST(CordSpliceTest, ZedBlockSuffix1) { } // Remove all of a prefix block -TEST(CordSpliceTest, ZedBlockSuffix0) { +TEST_P(CordTest, CordSpliceTestZedBlockSuffix0) { absl::Cord blob = CordWithZedBlock(10); EXPECT_EQ(10, blob.size()); absl::Cord suffix(blob); @@ -684,7 +821,7 @@ absl::Cord SpliceCord(const absl::Cord& blob, int64_t offset, } // Taking an empty suffix of a block breaks appending. -TEST(CordSpliceTest, RemoveEntireBlock1) { +TEST_P(CordTest, CordSpliceTestRemoveEntireBlock1) { absl::Cord zero = CordWithZedBlock(10); absl::Cord suffix(zero); suffix.RemovePrefix(10); @@ -692,7 +829,7 @@ TEST(CordSpliceTest, RemoveEntireBlock1) { result.Append(suffix); } -TEST(CordSpliceTest, RemoveEntireBlock2) { +TEST_P(CordTest, CordSpliceTestRemoveEntireBlock2) { absl::Cord zero = CordWithZedBlock(10); absl::Cord prefix(zero); prefix.RemoveSuffix(10); @@ -702,7 +839,7 @@ TEST(CordSpliceTest, RemoveEntireBlock2) { result.Append(suffix); } -TEST(CordSpliceTest, RemoveEntireBlock3) { +TEST_P(CordTest, CordSpliceTestRemoveEntireBlock3) { absl::Cord blob = CordWithZedBlock(10); absl::Cord block = BigCord(10, 'b'); blob = SpliceCord(blob, 0, block); @@ -733,7 +870,7 @@ void VerifyComparison(const CordCompareTestCase& test_case) { << "LHS=" << rhs_string << "; RHS=" << lhs_string; } -TEST(Cord, Compare) { +TEST_P(CordTest, Compare) { absl::Cord subcord("aaaaaBBBBBcccccDDDDD"); subcord = subcord.Subcord(3, 10); @@ -796,7 +933,7 @@ TEST(Cord, Compare) { } } -TEST(Cord, CompareAfterAssign) { +TEST_P(CordTest, CompareAfterAssign) { absl::Cord a("aaaaaa1111111"); absl::Cord b("aaaaaa2222222"); a = "cccccc"; @@ -813,7 +950,7 @@ TEST(Cord, CompareAfterAssign) { } // Test CompareTo() and ComparePrefix() against string and substring -// comparison methods from std::basic_string. +// comparison methods from basic_string. static void TestCompare(const absl::Cord& c, const absl::Cord& d, RandomEngine* rng) { typedef std::basic_string<uint8_t> ustring; @@ -825,8 +962,8 @@ static void TestCompare(const absl::Cord& c, const absl::Cord& d, EXPECT_EQ(expected, sign(c.Compare(d))) << c << ", " << d; } -TEST(Compare, ComparisonIsUnsigned) { - RandomEngine rng(testing::GTEST_FLAG(random_seed)); +TEST_P(CordTest, CompareComparisonIsUnsigned) { + RandomEngine rng(GTEST_FLAG_GET(random_seed)); std::uniform_int_distribution<uint32_t> uniform_uint8(0, 255); char x = static_cast<char>(uniform_uint8(rng)); TestCompare( @@ -834,9 +971,9 @@ TEST(Compare, ComparisonIsUnsigned) { absl::Cord(std::string(GetUniformRandomUpTo(&rng, 100), x ^ 0x80)), &rng); } -TEST(Compare, RandomComparisons) { +TEST_P(CordTest, CompareRandomComparisons) { const int kIters = 5000; - RandomEngine rng(testing::GTEST_FLAG(random_seed)); + RandomEngine rng(GTEST_FLAG_GET(random_seed)); int n = GetUniformRandomUpTo(&rng, 5000); absl::Cord a[] = {MakeExternalCord(n), @@ -869,7 +1006,7 @@ void CompareOperators() { EXPECT_TRUE(a == a); // For pointer type (i.e. `const char*`), operator== compares the address - // instead of the std::string, so `a == const char*("a")` isn't necessarily true. + // instead of the string, so `a == const char*("a")` isn't necessarily true. EXPECT_TRUE(std::is_pointer<T1>::value || a == T1("a")); EXPECT_TRUE(std::is_pointer<T2>::value || a == T2("a")); EXPECT_FALSE(a == b); @@ -892,43 +1029,43 @@ void CompareOperators() { EXPECT_FALSE(b <= a); } -TEST(ComparisonOperators, Cord_Cord) { +TEST_P(CordTest, ComparisonOperators_Cord_Cord) { CompareOperators<absl::Cord, absl::Cord>(); } -TEST(ComparisonOperators, Cord_StringPiece) { +TEST_P(CordTest, ComparisonOperators_Cord_StringPiece) { CompareOperators<absl::Cord, absl::string_view>(); } -TEST(ComparisonOperators, StringPiece_Cord) { +TEST_P(CordTest, ComparisonOperators_StringPiece_Cord) { CompareOperators<absl::string_view, absl::Cord>(); } -TEST(ComparisonOperators, Cord_string) { +TEST_P(CordTest, ComparisonOperators_Cord_string) { CompareOperators<absl::Cord, std::string>(); } -TEST(ComparisonOperators, string_Cord) { +TEST_P(CordTest, ComparisonOperators_string_Cord) { CompareOperators<std::string, absl::Cord>(); } -TEST(ComparisonOperators, stdstring_Cord) { +TEST_P(CordTest, ComparisonOperators_stdstring_Cord) { CompareOperators<std::string, absl::Cord>(); } -TEST(ComparisonOperators, Cord_stdstring) { +TEST_P(CordTest, ComparisonOperators_Cord_stdstring) { CompareOperators<absl::Cord, std::string>(); } -TEST(ComparisonOperators, charstar_Cord) { +TEST_P(CordTest, ComparisonOperators_charstar_Cord) { CompareOperators<const char*, absl::Cord>(); } -TEST(ComparisonOperators, Cord_charstar) { +TEST_P(CordTest, ComparisonOperators_Cord_charstar) { CompareOperators<absl::Cord, const char*>(); } -TEST(ConstructFromExternal, ReleaserInvoked) { +TEST_P(CordTest, ConstructFromExternalReleaserInvoked) { // Empty external memory means the releaser should be called immediately. { bool invoked = false; @@ -970,8 +1107,8 @@ TEST(ConstructFromExternal, ReleaserInvoked) { } } -TEST(ConstructFromExternal, CompareContents) { - RandomEngine rng(testing::GTEST_FLAG(random_seed)); +TEST_P(CordTest, ConstructFromExternalCompareContents) { + RandomEngine rng(GTEST_FLAG_GET(random_seed)); for (int length = 1; length <= 2048; length *= 2) { std::string data = RandomLowercaseString(&rng, length); @@ -986,8 +1123,8 @@ TEST(ConstructFromExternal, CompareContents) { } } -TEST(ConstructFromExternal, LargeReleaser) { - RandomEngine rng(testing::GTEST_FLAG(random_seed)); +TEST_P(CordTest, ConstructFromExternalLargeReleaser) { + RandomEngine rng(GTEST_FLAG_GET(random_seed)); constexpr size_t kLength = 256; std::string data = RandomLowercaseString(&rng, kLength); std::array<char, kLength> data_array; @@ -1001,7 +1138,7 @@ TEST(ConstructFromExternal, LargeReleaser) { EXPECT_TRUE(invoked); } -TEST(ConstructFromExternal, FunctionPointerReleaser) { +TEST_P(CordTest, ConstructFromExternalFunctionPointerReleaser) { static absl::string_view data("hello world"); static bool invoked; auto* releaser = @@ -1018,7 +1155,7 @@ TEST(ConstructFromExternal, FunctionPointerReleaser) { EXPECT_TRUE(invoked); } -TEST(ConstructFromExternal, MoveOnlyReleaser) { +TEST_P(CordTest, ConstructFromExternalMoveOnlyReleaser) { struct Releaser { explicit Releaser(bool* invoked) : invoked(invoked) {} Releaser(Releaser&& other) noexcept : invoked(other.invoked) {} @@ -1032,7 +1169,20 @@ TEST(ConstructFromExternal, MoveOnlyReleaser) { EXPECT_TRUE(invoked); } -TEST(ConstructFromExternal, NonTrivialReleaserDestructor) { +TEST_P(CordTest, ConstructFromExternalNoArgLambda) { + bool invoked = false; + (void)absl::MakeCordFromExternal("dummy", [&invoked]() { invoked = true; }); + EXPECT_TRUE(invoked); +} + +TEST_P(CordTest, ConstructFromExternalStringViewArgLambda) { + bool invoked = false; + (void)absl::MakeCordFromExternal( + "dummy", [&invoked](absl::string_view) { invoked = true; }); + EXPECT_TRUE(invoked); +} + +TEST_P(CordTest, ConstructFromExternalNonTrivialReleaserDestructor) { struct Releaser { explicit Releaser(bool* destroyed) : destroyed(destroyed) {} ~Releaser() { *destroyed = true; } @@ -1047,7 +1197,7 @@ TEST(ConstructFromExternal, NonTrivialReleaserDestructor) { EXPECT_TRUE(destroyed); } -TEST(ConstructFromExternal, ReferenceQualifierOverloads) { +TEST_P(CordTest, ConstructFromExternalReferenceQualifierOverloads) { struct Releaser { void operator()(absl::string_view) & { *lvalue_invoked = true; } void operator()(absl::string_view) && { *rvalue_invoked = true; } @@ -1075,8 +1225,8 @@ TEST(ConstructFromExternal, ReferenceQualifierOverloads) { EXPECT_TRUE(rvalue_invoked); } -TEST(ExternalMemory, BasicUsage) { - static const char* strings[] = { "", "hello", "there" }; +TEST_P(CordTest, ExternalMemoryBasicUsage) { + static const char* strings[] = {"", "hello", "there"}; for (const char* str : strings) { absl::Cord dst("(prefix)"); AddExternalMemory(str, &dst); @@ -1086,7 +1236,7 @@ TEST(ExternalMemory, BasicUsage) { } } -TEST(ExternalMemory, RemovePrefixSuffix) { +TEST_P(CordTest, ExternalMemoryRemovePrefixSuffix) { // Exhaustively try all sub-strings. absl::Cord cord = MakeComposite(); std::string s = std::string(cord); @@ -1101,7 +1251,7 @@ TEST(ExternalMemory, RemovePrefixSuffix) { } } -TEST(ExternalMemory, Get) { +TEST_P(CordTest, ExternalMemoryGet) { absl::Cord cord("hello"); AddExternalMemory(" world!", &cord); AddExternalMemory(" how are ", &cord); @@ -1120,16 +1270,16 @@ TEST(ExternalMemory, Get) { // Additionally we have some whiteboxed expectations based on our knowledge of // the layout and size of empty and inlined cords, and flat nodes. -TEST(CordMemoryUsage, Empty) { +TEST_P(CordTest, CordMemoryUsageEmpty) { EXPECT_EQ(sizeof(absl::Cord), absl::Cord().EstimatedMemoryUsage()); } -TEST(CordMemoryUsage, Embedded) { +TEST_P(CordTest, CordMemoryUsageEmbedded) { absl::Cord a("hello"); EXPECT_EQ(a.EstimatedMemoryUsage(), sizeof(absl::Cord)); } -TEST(CordMemoryUsage, EmbeddedAppend) { +TEST_P(CordTest, CordMemoryUsageEmbeddedAppend) { absl::Cord a("a"); absl::Cord b("bcd"); EXPECT_EQ(b.EstimatedMemoryUsage(), sizeof(absl::Cord)); @@ -1137,7 +1287,7 @@ TEST(CordMemoryUsage, EmbeddedAppend) { EXPECT_EQ(a.EstimatedMemoryUsage(), sizeof(absl::Cord)); } -TEST(CordMemoryUsage, ExternalMemory) { +TEST_P(CordTest, CordMemoryUsageExternalMemory) { static const int kLength = 1000; absl::Cord cord; AddExternalMemory(std::string(kLength, 'x'), &cord); @@ -1145,14 +1295,14 @@ TEST(CordMemoryUsage, ExternalMemory) { EXPECT_LE(cord.EstimatedMemoryUsage(), kLength * 1.5); } -TEST(CordMemoryUsage, Flat) { +TEST_P(CordTest, CordMemoryUsageFlat) { static const int kLength = 125; absl::Cord a(std::string(kLength, 'a')); EXPECT_GT(a.EstimatedMemoryUsage(), kLength); EXPECT_LE(a.EstimatedMemoryUsage(), kLength * 1.5); } -TEST(CordMemoryUsage, AppendFlat) { +TEST_P(CordTest, CordMemoryUsageAppendFlat) { using absl::strings_internal::CordTestAccess; absl::Cord a(std::string(CordTestAccess::MaxFlatLength(), 'a')); size_t length = a.EstimatedMemoryUsage(); @@ -1162,9 +1312,32 @@ TEST(CordMemoryUsage, AppendFlat) { EXPECT_LE(delta, CordTestAccess::MaxFlatLength() * 1.5); } +TEST_P(CordTest, CordMemoryUsageAppendExternal) { + static const int kLength = 1000; + using absl::strings_internal::CordTestAccess; + absl::Cord a(std::string(CordTestAccess::MaxFlatLength(), 'a')); + size_t length = a.EstimatedMemoryUsage(); + AddExternalMemory(std::string(kLength, 'b'), &a); + size_t delta = a.EstimatedMemoryUsage() - length; + EXPECT_GT(delta, kLength); + EXPECT_LE(delta, kLength * 1.5); +} + +TEST_P(CordTest, CordMemoryUsageSubString) { + static const int kLength = 2000; + using absl::strings_internal::CordTestAccess; + absl::Cord a(std::string(kLength, 'a')); + size_t length = a.EstimatedMemoryUsage(); + AddExternalMemory(std::string(kLength, 'b'), &a); + absl::Cord b = a.Subcord(0, kLength + kLength / 2); + size_t delta = b.EstimatedMemoryUsage() - length; + EXPECT_GT(delta, kLength); + EXPECT_LE(delta, kLength * 1.5); +} + // Regtest for a change that had to be rolled back because it expanded out // of the InlineRep too soon, which was observable through MemoryUsage(). -TEST(CordMemoryUsage, InlineRep) { +TEST_P(CordTest, CordMemoryUsageInlineRep) { constexpr size_t kMaxInline = 15; // Cord::InlineRep::N const std::string small_string(kMaxInline, 'x'); absl::Cord c1(small_string); @@ -1178,7 +1351,7 @@ TEST(CordMemoryUsage, InlineRep) { } // namespace // Regtest for 7510292 (fix a bug introduced by 7465150) -TEST(Cord, Concat_Append) { +TEST_P(CordTest, Concat_Append) { // Create a rep of type CONCAT absl::Cord s1("foobarbarbarbarbar"); s1.Append("abcdefgabcdefgabcdefgabcdefgabcdefgabcdefgabcdefg"); @@ -1193,7 +1366,80 @@ TEST(Cord, Concat_Append) { EXPECT_EQ(s2.size(), size + 1); } -TEST(MakeFragmentedCord, MakeFragmentedCordFromInitializerList) { +TEST_P(CordTest, DiabolicalGrowth) { + // This test exercises a diabolical Append(<one char>) on a cord, making the + // cord shared before each Append call resulting in a terribly fragmented + // resulting cord. + // TODO(b/183983616): Apply some minimum compaction when copying a shared + // source cord into a mutable copy for updates in CordRepRing. + RandomEngine rng(GTEST_FLAG_GET(random_seed)); + const std::string expected = RandomLowercaseString(&rng, 5000); + absl::Cord cord; + for (char c : expected) { + absl::Cord shared(cord); + cord.Append(absl::string_view(&c, 1)); + } + std::string value; + absl::CopyCordToString(cord, &value); + EXPECT_EQ(value, expected); + ABSL_RAW_LOG(INFO, "Diabolical size allocated = %zu", + cord.EstimatedMemoryUsage()); +} + +// The following tests check support for >4GB cords in 64-bit binaries, and +// 2GB-4GB cords in 32-bit binaries. This function returns the large cord size +// that's appropriate for the binary. + +// Construct a huge cord with the specified valid prefix. +static absl::Cord MakeHuge(absl::string_view prefix) { + absl::Cord cord; + if (sizeof(size_t) > 4) { + // In 64-bit binaries, test 64-bit Cord support. + const size_t size = + static_cast<size_t>(std::numeric_limits<uint32_t>::max()) + 314; + cord.Append(absl::MakeCordFromExternal( + absl::string_view(prefix.data(), size), + [](absl::string_view s) { DoNothing(s, nullptr); })); + } else { + // Cords are limited to 32-bit lengths in 32-bit binaries. The following + // tests check for use of "signed int" to represent Cord length/offset. + // However absl::string_view does not allow lengths >= (1u<<31), so we need + // to append in two parts; + const size_t s1 = (1u << 31) - 1; + // For shorter cord, `Append` copies the data rather than allocating a new + // node. The threshold is currently set to 511, so `s2` needs to be bigger + // to not trigger the copy. + const size_t s2 = 600; + cord.Append(absl::MakeCordFromExternal( + absl::string_view(prefix.data(), s1), + [](absl::string_view s) { DoNothing(s, nullptr); })); + cord.Append(absl::MakeCordFromExternal( + absl::string_view("", s2), + [](absl::string_view s) { DoNothing(s, nullptr); })); + } + return cord; +} + +TEST_P(CordTest, HugeCord) { + absl::Cord cord = MakeHuge("huge cord"); + EXPECT_LE(cord.size(), cord.EstimatedMemoryUsage()); + EXPECT_GE(cord.size() + 100, cord.EstimatedMemoryUsage()); +} + +// Tests that Append() works ok when handed a self reference +TEST_P(CordTest, AppendSelf) { + // We run the test until data is ~16K + // This guarantees it covers small, medium and large data. + std::string control_data = "Abc"; + absl::Cord data(control_data); + while (control_data.length() < 0x4000) { + data.Append(data); + control_data.append(control_data); + ASSERT_EQ(control_data, data); + } +} + +TEST_P(CordTest, MakeFragmentedCordFromInitializerList) { absl::Cord fragmented = absl::MakeFragmentedCord({"A ", "fragmented ", "Cord"}); @@ -1213,7 +1459,7 @@ TEST(MakeFragmentedCord, MakeFragmentedCordFromInitializerList) { ASSERT_TRUE(++chunk_it == fragmented.chunk_end()); } -TEST(MakeFragmentedCord, MakeFragmentedCordFromVector) { +TEST_P(CordTest, MakeFragmentedCordFromVector) { std::vector<absl::string_view> chunks = {"A ", "fragmented ", "Cord"}; absl::Cord fragmented = absl::MakeFragmentedCord(chunks); @@ -1233,7 +1479,7 @@ TEST(MakeFragmentedCord, MakeFragmentedCordFromVector) { ASSERT_TRUE(++chunk_it == fragmented.chunk_end()); } -TEST(CordChunkIterator, Traits) { +TEST_P(CordTest, CordChunkIteratorTraits) { static_assert(std::is_copy_constructible<absl::Cord::ChunkIterator>::value, ""); static_assert(std::is_copy_assignable<absl::Cord::ChunkIterator>::value, ""); @@ -1314,7 +1560,7 @@ static void VerifyChunkIterator(const absl::Cord& cord, EXPECT_TRUE(post_iter == cord.chunk_end()); // NOLINT } -TEST(CordChunkIterator, Operations) { +TEST_P(CordTest, CordChunkIteratorOperations) { absl::Cord empty_cord; VerifyChunkIterator(empty_cord, 0); @@ -1339,14 +1585,14 @@ TEST(CordChunkIterator, Operations) { VerifyChunkIterator(reused_nodes_cord, expected_chunks); } - RandomEngine rng(testing::GTEST_FLAG(random_seed)); + RandomEngine rng(GTEST_FLAG_GET(random_seed)); absl::Cord flat_cord(RandomLowercaseString(&rng, 256)); absl::Cord subcords; for (int i = 0; i < 128; ++i) subcords.Prepend(flat_cord.Subcord(i, 128)); VerifyChunkIterator(subcords, 128); } -TEST(CordCharIterator, Traits) { +TEST_P(CordTest, CharIteratorTraits) { static_assert(std::is_copy_constructible<absl::Cord::CharIterator>::value, ""); static_assert(std::is_copy_assignable<absl::Cord::CharIterator>::value, ""); @@ -1455,7 +1701,7 @@ static void VerifyCharIterator(const absl::Cord& cord) { } } -TEST(CordCharIterator, Operations) { +TEST_P(CordTest, CharIteratorOperations) { absl::Cord empty_cord; VerifyCharIterator(empty_cord); @@ -1477,14 +1723,49 @@ TEST(CordCharIterator, Operations) { VerifyCharIterator(reused_nodes_cord); } - RandomEngine rng(testing::GTEST_FLAG(random_seed)); + RandomEngine rng(GTEST_FLAG_GET(random_seed)); absl::Cord flat_cord(RandomLowercaseString(&rng, 256)); absl::Cord subcords; for (int i = 0; i < 4; ++i) subcords.Prepend(flat_cord.Subcord(16 * i, 128)); VerifyCharIterator(subcords); } -TEST(Cord, StreamingOutput) { +TEST_P(CordTest, CharIteratorAdvanceAndRead) { + // Create a Cord holding 6 flats of 2500 bytes each, and then iterate over it + // reading 150, 1500, 2500 and 3000 bytes. This will result in all possible + // partial, full and straddled read combinations including reads below + // kMaxBytesToCopy. b/197776822 surfaced a bug for a specific partial, small + // read 'at end' on Cord which caused a failure on attempting to read past the + // end in CordRepBtreeReader which was not covered by any existing test. + constexpr int kBlocks = 6; + constexpr size_t kBlockSize = 2500; + constexpr size_t kChunkSize1 = 1500; + constexpr size_t kChunkSize2 = 2500; + constexpr size_t kChunkSize3 = 3000; + constexpr size_t kChunkSize4 = 150; + RandomEngine rng; + std::string data = RandomLowercaseString(&rng, kBlocks * kBlockSize); + absl::Cord cord; + for (int i = 0; i < kBlocks; ++i) { + const std::string block = data.substr(i * kBlockSize, kBlockSize); + cord.Append(absl::Cord(block)); + } + + for (size_t chunk_size : + {kChunkSize1, kChunkSize2, kChunkSize3, kChunkSize4}) { + absl::Cord::CharIterator it = cord.char_begin(); + size_t offset = 0; + while (offset < data.length()) { + const size_t n = std::min<size_t>(data.length() - offset, chunk_size); + absl::Cord chunk = cord.AdvanceAndRead(&it, n); + ASSERT_EQ(chunk.size(), n); + ASSERT_EQ(chunk.Compare(data.substr(offset, n)), 0); + offset += n; + } + } +} + +TEST_P(CordTest, StreamingOutput) { absl::Cord c = absl::MakeFragmentedCord({"A ", "small ", "fragmented ", "Cord", "."}); std::stringstream output; @@ -1492,7 +1773,7 @@ TEST(Cord, StreamingOutput) { EXPECT_EQ("A small fragmented Cord.", output.str()); } -TEST(Cord, ForEachChunk) { +TEST_P(CordTest, ForEachChunk) { for (int num_elements : {1, 10, 200}) { SCOPED_TRACE(num_elements); std::vector<std::string> cord_chunks; @@ -1510,7 +1791,7 @@ TEST(Cord, ForEachChunk) { } } -TEST(Cord, SmallBufferAssignFromOwnData) { +TEST_P(CordTest, SmallBufferAssignFromOwnData) { constexpr size_t kMaxInline = 15; std::string contents = "small buff cord"; EXPECT_EQ(contents.size(), kMaxInline); @@ -1524,3 +1805,153 @@ TEST(Cord, SmallBufferAssignFromOwnData) { } } } + +TEST_P(CordTest, Format) { + absl::Cord c; + absl::Format(&c, "There were %04d little %s.", 3, "pigs"); + EXPECT_EQ(c, "There were 0003 little pigs."); + absl::Format(&c, "And %-3llx bad wolf!", 1); + EXPECT_EQ(c, "There were 0003 little pigs.And 1 bad wolf!"); +} + +TEST_P(CordTest, Hardening) { + absl::Cord cord("hello"); + // These statement should abort the program in all builds modes. + EXPECT_DEATH_IF_SUPPORTED(cord.RemovePrefix(6), ""); + EXPECT_DEATH_IF_SUPPORTED(cord.RemoveSuffix(6), ""); + + bool test_hardening = false; + ABSL_HARDENING_ASSERT([&]() { + // This only runs when ABSL_HARDENING_ASSERT is active. + test_hardening = true; + return true; + }()); + if (!test_hardening) return; + + EXPECT_DEATH_IF_SUPPORTED(cord[5], ""); + EXPECT_DEATH_IF_SUPPORTED(*cord.chunk_end(), ""); + EXPECT_DEATH_IF_SUPPORTED(static_cast<void>(cord.chunk_end()->empty()), ""); + EXPECT_DEATH_IF_SUPPORTED(++cord.chunk_end(), ""); +} + +// This test mimics a specific (and rare) application repeatedly splitting a +// cord, inserting (overwriting) a string value, and composing a new cord from +// the three pieces. This is hostile towards a Btree implementation: A split of +// a node at any level is likely to have the right-most edge of the left split, +// and the left-most edge of the right split shared. For example, splitting a +// leaf node with 6 edges will result likely in a 1-6, 2-5, 3-4, etc. split, +// sharing the 'split node'. When recomposing such nodes, we 'injected' an edge +// in that node. As this happens with some probability on each level of the +// tree, this will quickly grow the tree until it reaches maximum height. +TEST_P(CordTest, BtreeHostileSplitInsertJoin) { + absl::BitGen bitgen; + + // Start with about 1GB of data + std::string data(1 << 10, 'x'); + absl::Cord buffer(data); + absl::Cord cord; + for (int i = 0; i < 1000000; ++i) { + cord.Append(buffer); + } + + for (int j = 0; j < 1000; ++j) { + size_t offset = absl::Uniform(bitgen, 0u, cord.size()); + size_t length = absl::Uniform(bitgen, 100u, data.size()); + if (cord.size() == offset) { + cord.Append(absl::string_view(data.data(), length)); + } else { + absl::Cord suffix; + if (offset + length < cord.size()) { + suffix = cord; + suffix.RemovePrefix(offset + length); + } + if (cord.size() > offset) { + cord.RemoveSuffix(cord.size() - offset); + } + cord.Append(absl::string_view(data.data(), length)); + if (!suffix.empty()) { + cord.Append(suffix); + } + } + } +} + +class AfterExitCordTester { + public: + bool Set(absl::Cord* cord, absl::string_view expected) { + cord_ = cord; + expected_ = expected; + return true; + } + + ~AfterExitCordTester() { + EXPECT_EQ(*cord_, expected_); + } + private: + absl::Cord* cord_; + absl::string_view expected_; +}; + +template <typename Str> +void TestConstinitConstructor(Str) { + const auto expected = Str::value; + // Defined before `cord` to be destroyed after it. + static AfterExitCordTester exit_tester; // NOLINT + ABSL_CONST_INIT static absl::Cord cord(Str{}); // NOLINT + static bool init_exit_tester = exit_tester.Set(&cord, expected); + (void)init_exit_tester; + + EXPECT_EQ(cord, expected); + // Copy the object and test the copy, and the original. + { + absl::Cord copy = cord; + EXPECT_EQ(copy, expected); + } + // The original still works + EXPECT_EQ(cord, expected); + + // Try making adding more structure to the tree. + { + absl::Cord copy = cord; + std::string expected_copy(expected); + for (int i = 0; i < 10; ++i) { + copy.Append(cord); + absl::StrAppend(&expected_copy, expected); + EXPECT_EQ(copy, expected_copy); + } + } + + // Make sure we are using the right branch during constant evaluation. + EXPECT_EQ(absl::CordTestPeer::IsTree(cord), cord.size() >= 16); + + for (int i = 0; i < 10; ++i) { + // Make a few more Cords from the same global rep. + // This tests what happens when the refcount for it gets below 1. + EXPECT_EQ(expected, absl::Cord(Str{})); + } +} + +constexpr int SimpleStrlen(const char* p) { + return *p ? 1 + SimpleStrlen(p + 1) : 0; +} + +struct ShortView { + constexpr absl::string_view operator()() const { + return absl::string_view("SSO string", SimpleStrlen("SSO string")); + } +}; + +struct LongView { + constexpr absl::string_view operator()() const { + return absl::string_view("String that does not fit SSO.", + SimpleStrlen("String that does not fit SSO.")); + } +}; + + +TEST_P(CordTest, ConstinitConstructor) { + TestConstinitConstructor( + absl::strings_internal::MakeStringConstant(ShortView{})); + TestConstinitConstructor( + absl::strings_internal::MakeStringConstant(LongView{})); +} diff --git a/third_party/abseil-cpp/absl/strings/cord_test_helpers.h b/third_party/abseil-cpp/absl/strings/cord_test_helpers.h index f1036e3b13..31a1dc8980 100644 --- a/third_party/abseil-cpp/absl/strings/cord_test_helpers.h +++ b/third_party/abseil-cpp/absl/strings/cord_test_helpers.h @@ -17,11 +17,73 @@ #ifndef ABSL_STRINGS_CORD_TEST_HELPERS_H_ #define ABSL_STRINGS_CORD_TEST_HELPERS_H_ +#include <cstdint> +#include <iostream> +#include <string> + +#include "absl/base/config.h" #include "absl/strings/cord.h" +#include "absl/strings/internal/cord_internal.h" +#include "absl/strings/string_view.h" namespace absl { ABSL_NAMESPACE_BEGIN +// Cord sizes relevant for testing +enum class TestCordSize { + // An empty value + kEmpty = 0, + + // An inlined string value + kInlined = cord_internal::kMaxInline / 2 + 1, + + // 'Well known' SSO lengths (excluding terminating zero). + // libstdcxx has a maximum SSO of 15, libc++ has a maximum SSO of 22. + kStringSso1 = 15, + kStringSso2 = 22, + + // A string value which is too large to fit in inlined data, but small enough + // such that Cord prefers copying the value if possible, i.e.: not stealing + // std::string inputs, or referencing existing CordReps on Append, etc. + kSmall = cord_internal::kMaxBytesToCopy / 2 + 1, + + // A string value large enough that Cord prefers to reference or steal from + // existing inputs rather than copying contents of the input. + kMedium = cord_internal::kMaxFlatLength / 2 + 1, + + // A string value large enough to cause it to be stored in mutliple flats. + kLarge = cord_internal::kMaxFlatLength * 4 +}; + +// To string helper +inline absl::string_view ToString(TestCordSize size) { + switch (size) { + case TestCordSize::kEmpty: + return "Empty"; + case TestCordSize::kInlined: + return "Inlined"; + case TestCordSize::kSmall: + return "Small"; + case TestCordSize::kStringSso1: + return "StringSso1"; + case TestCordSize::kStringSso2: + return "StringSso2"; + case TestCordSize::kMedium: + return "Medium"; + case TestCordSize::kLarge: + return "Large"; + } + return "???"; +} + +// Returns the length matching the specified size +inline size_t Length(TestCordSize size) { return static_cast<size_t>(size); } + +// Stream output helper +inline std::ostream& operator<<(std::ostream& stream, TestCordSize size) { + return stream << ToString(size); +} + // Creates a multi-segment Cord from an iterable container of strings. The // resulting Cord is guaranteed to have one segment for every string in the // container. This allows code to be unit tested with multi-segment Cord diff --git a/third_party/abseil-cpp/absl/strings/cordz_test.cc b/third_party/abseil-cpp/absl/strings/cordz_test.cc new file mode 100644 index 0000000000..2b7d30b0e0 --- /dev/null +++ b/third_party/abseil-cpp/absl/strings/cordz_test.cc @@ -0,0 +1,466 @@ +// Copyright 2021 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <cstdint> +#include <string> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/base/config.h" +#include "absl/base/internal/raw_logging.h" +#include "absl/base/macros.h" +#include "absl/strings/cord.h" +#include "absl/strings/cord_test_helpers.h" +#include "absl/strings/cordz_test_helpers.h" +#include "absl/strings/internal/cordz_functions.h" +#include "absl/strings/internal/cordz_info.h" +#include "absl/strings/internal/cordz_sample_token.h" +#include "absl/strings/internal/cordz_statistics.h" +#include "absl/strings/internal/cordz_update_tracker.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/string_view.h" + +#ifdef ABSL_INTERNAL_CORDZ_ENABLED + +using testing::Eq; +using testing::AnyOf; + +namespace absl { +ABSL_NAMESPACE_BEGIN + +using cord_internal::CordzInfo; +using cord_internal::CordzSampleToken; +using cord_internal::CordzStatistics; +using cord_internal::CordzUpdateTracker; +using Method = CordzUpdateTracker::MethodIdentifier; + +// Do not print cord contents, we only care about 'size' perhaps. +// Note that this method must be inside the named namespace. +inline void PrintTo(const Cord& cord, std::ostream* s) { + if (s) *s << "Cord[" << cord.size() << "]"; +} + +namespace { + +auto constexpr kMaxInline = cord_internal::kMaxInline; + +// Returns a string_view value of the specified length +// We do this to avoid 'consuming' large strings in Cord by default. +absl::string_view MakeString(size_t size) { + thread_local std::string str; + str = std::string(size, '.'); + return str; +} + +absl::string_view MakeString(TestCordSize size) { + return MakeString(Length(size)); +} + +// Returns a cord with a sampled method of kAppendString. +absl::Cord MakeAppendStringCord(TestCordSize size) { + CordzSamplingIntervalHelper always(1); + absl::Cord cord; + cord.Append(MakeString(size)); + return cord; +} + +std::string TestParamToString(::testing::TestParamInfo<TestCordSize> size) { + return absl::StrCat("On", ToString(size.param), "Cord"); +} + +class CordzUpdateTest : public testing::TestWithParam<TestCordSize> { + public: + Cord& cord() { return cord_; } + + Method InitialOr(Method method) const { + return (GetParam() > TestCordSize::kInlined) ? Method::kConstructorString + : method; + } + + private: + CordzSamplingIntervalHelper sample_every_{1}; + Cord cord_{MakeString(GetParam())}; +}; + +template <typename T> +std::string ParamToString(::testing::TestParamInfo<T> param) { + return std::string(ToString(param.param)); +} + +INSTANTIATE_TEST_SUITE_P(WithParam, CordzUpdateTest, + testing::Values(TestCordSize::kEmpty, + TestCordSize::kInlined, + TestCordSize::kLarge), + TestParamToString); + +class CordzStringTest : public testing::TestWithParam<TestCordSize> { + private: + CordzSamplingIntervalHelper sample_every_{1}; +}; + +INSTANTIATE_TEST_SUITE_P(WithParam, CordzStringTest, + testing::Values(TestCordSize::kInlined, + TestCordSize::kStringSso1, + TestCordSize::kStringSso2, + TestCordSize::kSmall, + TestCordSize::kLarge), + ParamToString<TestCordSize>); + +TEST(CordzTest, ConstructSmallArray) { + CordzSamplingIntervalHelper sample_every{1}; + Cord cord(MakeString(TestCordSize::kSmall)); + EXPECT_THAT(cord, HasValidCordzInfoOf(Method::kConstructorString)); +} + +TEST(CordzTest, ConstructLargeArray) { + CordzSamplingIntervalHelper sample_every{1}; + Cord cord(MakeString(TestCordSize::kLarge)); + EXPECT_THAT(cord, HasValidCordzInfoOf(Method::kConstructorString)); +} + +TEST_P(CordzStringTest, ConstructString) { + CordzSamplingIntervalHelper sample_every{1}; + Cord cord(std::string(Length(GetParam()), '.')); + if (Length(GetParam()) > kMaxInline) { + EXPECT_THAT(cord, HasValidCordzInfoOf(Method::kConstructorString)); + } +} + +TEST(CordzTest, CopyConstructFromUnsampled) { + CordzSamplingIntervalHelper sample_every{1}; + Cord src = UnsampledCord(MakeString(TestCordSize::kLarge)); + Cord cord(src); + EXPECT_THAT(GetCordzInfoForTesting(cord), Eq(nullptr)); +} + +TEST(CordzTest, CopyConstructFromSampled) { + CordzSamplingIntervalHelper sample_never{99999}; + Cord src = MakeAppendStringCord(TestCordSize::kLarge); + Cord cord(src); + ASSERT_THAT(cord, HasValidCordzInfoOf(Method::kConstructorCord)); + CordzStatistics stats = GetCordzInfoForTesting(cord)->GetCordzStatistics(); + EXPECT_THAT(stats.parent_method, Eq(Method::kAppendString)); + EXPECT_THAT(stats.update_tracker.Value(Method::kAppendString), Eq(1)); +} + +TEST(CordzTest, MoveConstruct) { + CordzSamplingIntervalHelper sample_every{1}; + Cord src(MakeString(TestCordSize::kLarge)); + Cord cord(std::move(src)); + EXPECT_THAT(cord, HasValidCordzInfoOf(Method::kConstructorString)); +} + +TEST_P(CordzUpdateTest, AssignUnsampledCord) { + Cord src = UnsampledCord(MakeString(TestCordSize::kLarge)); + const CordzInfo* info = GetCordzInfoForTesting(cord()); + cord() = src; + EXPECT_THAT(GetCordzInfoForTesting(cord()), Eq(nullptr)); + EXPECT_FALSE(CordzInfoIsListed(info)); +} + +TEST_P(CordzUpdateTest, AssignSampledCord) { + Cord src = MakeAppendStringCord(TestCordSize::kLarge); + cord() = src; + ASSERT_THAT(cord(), HasValidCordzInfoOf(Method::kAssignCord)); + CordzStatistics stats = GetCordzInfoForTesting(cord())->GetCordzStatistics(); + EXPECT_THAT(stats.parent_method, Eq(Method::kAppendString)); + EXPECT_THAT(stats.update_tracker.Value(Method::kAppendString), Eq(1)); + EXPECT_THAT(stats.update_tracker.Value(Method::kConstructorString), Eq(0)); +} + +TEST(CordzUpdateTest, AssignSampledCordToInlined) { + CordzSamplingIntervalHelper sample_never{99999}; + Cord cord; + Cord src = MakeAppendStringCord(TestCordSize::kLarge); + cord = src; + ASSERT_THAT(cord, HasValidCordzInfoOf(Method::kAssignCord)); + CordzStatistics stats = GetCordzInfoForTesting(cord)->GetCordzStatistics(); + EXPECT_THAT(stats.parent_method, Eq(Method::kAppendString)); + EXPECT_THAT(stats.update_tracker.Value(Method::kAppendString), Eq(1)); + EXPECT_THAT(stats.update_tracker.Value(Method::kConstructorString), Eq(0)); +} + +TEST(CordzUpdateTest, AssignSampledCordToUnsampledCord) { + CordzSamplingIntervalHelper sample_never{99999}; + Cord cord = UnsampledCord(MakeString(TestCordSize::kLarge)); + Cord src = MakeAppendStringCord(TestCordSize::kLarge); + cord = src; + ASSERT_THAT(cord, HasValidCordzInfoOf(Method::kAssignCord)); + CordzStatistics stats = GetCordzInfoForTesting(cord)->GetCordzStatistics(); + EXPECT_THAT(stats.parent_method, Eq(Method::kAppendString)); + EXPECT_THAT(stats.update_tracker.Value(Method::kAppendString), Eq(1)); + EXPECT_THAT(stats.update_tracker.Value(Method::kConstructorString), Eq(0)); +} + +TEST(CordzUpdateTest, AssignUnsampledCordToSampledCordWithoutSampling) { + CordzSamplingIntervalHelper sample_never{99999}; + Cord cord = MakeAppendStringCord(TestCordSize::kLarge); + const CordzInfo* info = GetCordzInfoForTesting(cord); + Cord src = UnsampledCord(MakeString(TestCordSize::kLarge)); + cord = src; + EXPECT_THAT(GetCordzInfoForTesting(cord), Eq(nullptr)); + EXPECT_FALSE(CordzInfoIsListed(info)); +} + +TEST(CordzUpdateTest, AssignUnsampledCordToSampledCordWithSampling) { + CordzSamplingIntervalHelper sample_every{1}; + Cord cord = MakeAppendStringCord(TestCordSize::kLarge); + const CordzInfo* info = GetCordzInfoForTesting(cord); + Cord src = UnsampledCord(MakeString(TestCordSize::kLarge)); + cord = src; + EXPECT_THAT(GetCordzInfoForTesting(cord), Eq(nullptr)); + EXPECT_FALSE(CordzInfoIsListed(info)); +} + +TEST(CordzUpdateTest, AssignSampledCordToSampledCord) { + CordzSamplingIntervalHelper sample_every{1}; + Cord src = MakeAppendStringCord(TestCordSize::kLarge); + Cord cord(MakeString(TestCordSize::kLarge)); + cord = src; + ASSERT_THAT(cord, HasValidCordzInfoOf(Method::kAssignCord)); + CordzStatistics stats = GetCordzInfoForTesting(cord)->GetCordzStatistics(); + EXPECT_THAT(stats.parent_method, Eq(Method::kAppendString)); + EXPECT_THAT(stats.update_tracker.Value(Method::kAppendString), Eq(1)); + EXPECT_THAT(stats.update_tracker.Value(Method::kConstructorString), Eq(0)); +} + +TEST(CordzUpdateTest, AssignUnsampledCordToSampledCord) { + CordzSamplingIntervalHelper sample_every{1}; + Cord src = MakeAppendStringCord(TestCordSize::kLarge); + Cord cord(MakeString(TestCordSize::kLarge)); + cord = src; + ASSERT_THAT(cord, HasValidCordzInfoOf(Method::kAssignCord)); + CordzStatistics stats = GetCordzInfoForTesting(cord)->GetCordzStatistics(); + EXPECT_THAT(stats.parent_method, Eq(Method::kAppendString)); + EXPECT_THAT(stats.update_tracker.Value(Method::kAppendString), Eq(1)); + EXPECT_THAT(stats.update_tracker.Value(Method::kConstructorString), Eq(0)); +} + +TEST(CordzTest, AssignInlinedCordToSampledCord) { + CordzSampleToken token; + CordzSamplingIntervalHelper sample_every{1}; + Cord cord(MakeString(TestCordSize::kLarge)); + const CordzInfo* info = GetCordzInfoForTesting(cord); + Cord src = UnsampledCord(MakeString(TestCordSize::kInlined)); + cord = src; + EXPECT_THAT(GetCordzInfoForTesting(cord), Eq(nullptr)); + EXPECT_FALSE(CordzInfoIsListed(info)); +} + +TEST(CordzUpdateTest, MoveAssignCord) { + CordzSamplingIntervalHelper sample_every{1}; + Cord cord; + Cord src(MakeString(TestCordSize::kLarge)); + cord = std::move(src); + EXPECT_THAT(cord, HasValidCordzInfoOf(Method::kConstructorString)); +} + +TEST_P(CordzUpdateTest, AssignLargeArray) { + cord() = MakeString(TestCordSize::kSmall); + EXPECT_THAT(cord(), HasValidCordzInfoOf(Method::kAssignString)); +} + +TEST_P(CordzUpdateTest, AssignSmallArray) { + cord() = MakeString(TestCordSize::kSmall); + EXPECT_THAT(cord(), HasValidCordzInfoOf(Method::kAssignString)); +} + +TEST_P(CordzUpdateTest, AssignInlinedArray) { + cord() = MakeString(TestCordSize::kInlined); + EXPECT_THAT(GetCordzInfoForTesting(cord()), Eq(nullptr)); +} + +TEST_P(CordzStringTest, AssignStringToInlined) { + Cord cord; + cord = std::string(Length(GetParam()), '.'); + if (Length(GetParam()) > kMaxInline) { + EXPECT_THAT(cord, HasValidCordzInfoOf(Method::kAssignString)); + } +} + +TEST_P(CordzStringTest, AssignStringToCord) { + Cord cord(MakeString(TestCordSize::kLarge)); + cord = std::string(Length(GetParam()), '.'); + if (Length(GetParam()) > kMaxInline) { + EXPECT_THAT(cord, HasValidCordzInfoOf(Method::kConstructorString)); + EXPECT_THAT(cord, CordzMethodCountEq(Method::kAssignString, 1)); + } +} + +TEST_P(CordzUpdateTest, AssignInlinedString) { + cord() = std::string(Length(TestCordSize::kInlined), '.'); + EXPECT_THAT(GetCordzInfoForTesting(cord()), Eq(nullptr)); +} + +TEST_P(CordzUpdateTest, AppendCord) { + Cord src = UnsampledCord(MakeString(TestCordSize::kLarge)); + cord().Append(src); + EXPECT_THAT(cord(), HasValidCordzInfoOf(InitialOr(Method::kAppendCord))); +} + +TEST_P(CordzUpdateTest, MoveAppendCord) { + cord().Append(UnsampledCord(MakeString(TestCordSize::kLarge))); + EXPECT_THAT(cord(), HasValidCordzInfoOf(InitialOr(Method::kAppendCord))); +} + +TEST_P(CordzUpdateTest, AppendSmallArray) { + cord().Append(MakeString(TestCordSize::kSmall)); + EXPECT_THAT(cord(), HasValidCordzInfoOf(InitialOr(Method::kAppendString))); +} + +TEST_P(CordzUpdateTest, AppendLargeArray) { + cord().Append(MakeString(TestCordSize::kLarge)); + EXPECT_THAT(cord(), HasValidCordzInfoOf(InitialOr(Method::kAppendString))); +} + +TEST_P(CordzStringTest, AppendStringToEmpty) { + Cord cord; + cord.Append(std::string(Length(GetParam()), '.')); + if (Length(GetParam()) > kMaxInline) { + EXPECT_THAT(cord, HasValidCordzInfoOf(Method::kAppendString)); + } +} + +TEST_P(CordzStringTest, AppendStringToInlined) { + Cord cord(MakeString(TestCordSize::kInlined)); + cord.Append(std::string(Length(GetParam()), '.')); + if (Length(TestCordSize::kInlined) + Length(GetParam()) > kMaxInline) { + EXPECT_THAT(cord, HasValidCordzInfoOf(Method::kAppendString)); + } +} + +TEST_P(CordzStringTest, AppendStringToCord) { + Cord cord(MakeString(TestCordSize::kLarge)); + cord.Append(std::string(Length(GetParam()), '.')); + EXPECT_THAT(cord, HasValidCordzInfoOf(Method::kConstructorString)); + EXPECT_THAT(cord, CordzMethodCountEq(Method::kAppendString, 1)); +} + +TEST(CordzTest, MakeCordFromExternal) { + CordzSamplingIntervalHelper sample_every{1}; + Cord cord = MakeCordFromExternal("Hello world", [](absl::string_view) {}); + EXPECT_THAT(cord, HasValidCordzInfoOf(Method::kMakeCordFromExternal)); +} + +TEST(CordzTest, MakeCordFromEmptyExternal) { + CordzSamplingIntervalHelper sample_every{1}; + Cord cord = MakeCordFromExternal({}, [](absl::string_view) {}); + EXPECT_THAT(GetCordzInfoForTesting(cord), Eq(nullptr)); +} + +TEST_P(CordzUpdateTest, PrependCord) { + Cord src = UnsampledCord(MakeString(TestCordSize::kLarge)); + cord().Prepend(src); + EXPECT_THAT(cord(), HasValidCordzInfoOf(InitialOr(Method::kPrependCord))); +} + +TEST_P(CordzUpdateTest, PrependSmallArray) { + cord().Prepend(MakeString(TestCordSize::kSmall)); + EXPECT_THAT(cord(), HasValidCordzInfoOf(InitialOr(Method::kPrependString))); +} + +TEST_P(CordzUpdateTest, PrependLargeArray) { + cord().Prepend(MakeString(TestCordSize::kLarge)); + EXPECT_THAT(cord(), HasValidCordzInfoOf(InitialOr(Method::kPrependString))); +} + +TEST_P(CordzStringTest, PrependStringToEmpty) { + Cord cord; + cord.Prepend(std::string(Length(GetParam()), '.')); + if (Length(GetParam()) > kMaxInline) { + EXPECT_THAT(cord, HasValidCordzInfoOf(Method::kPrependString)); + } +} + +TEST_P(CordzStringTest, PrependStringToInlined) { + Cord cord(MakeString(TestCordSize::kInlined)); + cord.Prepend(std::string(Length(GetParam()), '.')); + if (Length(TestCordSize::kInlined) + Length(GetParam()) > kMaxInline) { + EXPECT_THAT(cord, HasValidCordzInfoOf(Method::kPrependString)); + } +} + +TEST_P(CordzStringTest, PrependStringToCord) { + Cord cord(MakeString(TestCordSize::kLarge)); + cord.Prepend(std::string(Length(GetParam()), '.')); + EXPECT_THAT(cord, HasValidCordzInfoOf(Method::kConstructorString)); + EXPECT_THAT(cord, CordzMethodCountEq(Method::kPrependString, 1)); +} + +TEST(CordzTest, RemovePrefix) { + CordzSamplingIntervalHelper sample_every(1); + Cord cord(MakeString(TestCordSize::kLarge)); + + // Half the cord + cord.RemovePrefix(cord.size() / 2); + EXPECT_THAT(cord, HasValidCordzInfoOf(Method::kConstructorString)); + EXPECT_THAT(cord, CordzMethodCountEq(Method::kRemovePrefix, 1)); + + // TODO(mvels): RemovePrefix does not reset to inlined, except if empty? + cord.RemovePrefix(cord.size() - kMaxInline); + EXPECT_THAT(cord, HasValidCordzInfoOf(Method::kConstructorString)); + EXPECT_THAT(cord, CordzMethodCountEq(Method::kRemovePrefix, 2)); + + cord.RemovePrefix(cord.size()); + EXPECT_THAT(GetCordzInfoForTesting(cord), Eq(nullptr)); +} + +TEST(CordzTest, RemoveSuffix) { + CordzSamplingIntervalHelper sample_every(1); + Cord cord(MakeString(TestCordSize::kLarge)); + + // Half the cord + cord.RemoveSuffix(cord.size() / 2); + EXPECT_THAT(cord, HasValidCordzInfoOf(Method::kConstructorString)); + EXPECT_THAT(cord, CordzMethodCountEq(Method::kRemoveSuffix, 1)); + + // TODO(mvels): RemoveSuffix does not reset to inlined, except if empty? + cord.RemoveSuffix(cord.size() - kMaxInline); + EXPECT_THAT(cord, HasValidCordzInfoOf(Method::kConstructorString)); + EXPECT_THAT(cord, CordzMethodCountEq(Method::kRemoveSuffix, 2)); + + cord.RemoveSuffix(cord.size()); + EXPECT_THAT(GetCordzInfoForTesting(cord), Eq(nullptr)); +} + +TEST(CordzTest, SubCordFromUnsampledCord) { + CordzSamplingIntervalHelper sample_every{1}; + Cord src = UnsampledCord(MakeString(TestCordSize::kLarge)); + Cord cord = src.Subcord(10, src.size() / 2); + EXPECT_THAT(GetCordzInfoForTesting(cord), Eq(nullptr)); +} + +TEST(CordzTest, SubCordFromSampledCord) { + CordzSamplingIntervalHelper sample_never{99999}; + Cord src = MakeAppendStringCord(TestCordSize::kLarge); + Cord cord = src.Subcord(10, src.size() / 2); + ASSERT_THAT(cord, HasValidCordzInfoOf(Method::kSubCord)); + CordzStatistics stats = GetCordzInfoForTesting(cord)->GetCordzStatistics(); + EXPECT_THAT(stats.parent_method, Eq(Method::kAppendString)); + EXPECT_THAT(stats.update_tracker.Value(Method::kAppendString), Eq(1)); +} + +TEST(CordzTest, SmallSubCord) { + CordzSamplingIntervalHelper sample_never{99999}; + Cord src = MakeAppendStringCord(TestCordSize::kLarge); + Cord cord = src.Subcord(10, kMaxInline + 1); + EXPECT_THAT(cord, HasValidCordzInfoOf(Method::kSubCord)); +} + +} // namespace + +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_INTERNAL_CORDZ_ENABLED diff --git a/third_party/abseil-cpp/absl/strings/cordz_test_helpers.h b/third_party/abseil-cpp/absl/strings/cordz_test_helpers.h new file mode 100644 index 0000000000..e410eecf7f --- /dev/null +++ b/third_party/abseil-cpp/absl/strings/cordz_test_helpers.h @@ -0,0 +1,151 @@ +// Copyright 2021 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_CORDZ_TEST_HELPERS_H_ +#define ABSL_STRINGS_CORDZ_TEST_HELPERS_H_ + +#include <utility> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/base/config.h" +#include "absl/base/macros.h" +#include "absl/strings/cord.h" +#include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/cordz_info.h" +#include "absl/strings/internal/cordz_sample_token.h" +#include "absl/strings/internal/cordz_statistics.h" +#include "absl/strings/internal/cordz_update_tracker.h" +#include "absl/strings/str_cat.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN + +// Returns the CordzInfo for the cord, or nullptr if the cord is not sampled. +inline const cord_internal::CordzInfo* GetCordzInfoForTesting( + const Cord& cord) { + if (!cord.contents_.is_tree()) return nullptr; + return cord.contents_.cordz_info(); +} + +// Returns true if the provided cordz_info is in the list of sampled cords. +inline bool CordzInfoIsListed(const cord_internal::CordzInfo* cordz_info, + cord_internal::CordzSampleToken token = {}) { + for (const cord_internal::CordzInfo& info : token) { + if (cordz_info == &info) return true; + } + return false; +} + +// Matcher on Cord that verifies all of: +// - the cord is sampled +// - the CordzInfo of the cord is listed / discoverable. +// - the reported CordzStatistics match the cord's actual properties +// - the cord has an (initial) UpdateTracker count of 1 for `method` +MATCHER_P(HasValidCordzInfoOf, method, "CordzInfo matches cord") { + const cord_internal::CordzInfo* cord_info = GetCordzInfoForTesting(arg); + if (cord_info == nullptr) { + *result_listener << "cord is not sampled"; + return false; + } + if (!CordzInfoIsListed(cord_info)) { + *result_listener << "cord is sampled, but not listed"; + return false; + } + cord_internal::CordzStatistics stat = cord_info->GetCordzStatistics(); + if (stat.size != arg.size()) { + *result_listener << "cordz size " << stat.size + << " does not match cord size " << arg.size(); + return false; + } + if (stat.update_tracker.Value(method) != 1) { + *result_listener << "Expected method count 1 for " << method << ", found " + << stat.update_tracker.Value(method); + return false; + } + return true; +} + +// Matcher on Cord that verifies that the cord is sampled and that the CordzInfo +// update tracker has 'method' with a call count of 'n' +MATCHER_P2(CordzMethodCountEq, method, n, + absl::StrCat("CordzInfo method count equals ", n)) { + const cord_internal::CordzInfo* cord_info = GetCordzInfoForTesting(arg); + if (cord_info == nullptr) { + *result_listener << "cord is not sampled"; + return false; + } + cord_internal::CordzStatistics stat = cord_info->GetCordzStatistics(); + if (stat.update_tracker.Value(method) != n) { + *result_listener << "Expected method count " << n << " for " << method + << ", found " << stat.update_tracker.Value(method); + return false; + } + return true; +} + +// Cordz will only update with a new rate once the previously scheduled event +// has fired. When we disable Cordz, a long delay takes place where we won't +// consider profiling new Cords. CordzSampleIntervalHelper will burn through +// that interval and allow for testing that assumes that the average sampling +// interval is a particular value. +class CordzSamplingIntervalHelper { + public: + explicit CordzSamplingIntervalHelper(int32_t interval) + : orig_mean_interval_(absl::cord_internal::get_cordz_mean_interval()) { + absl::cord_internal::set_cordz_mean_interval(interval); + absl::cord_internal::cordz_set_next_sample_for_testing(interval); + } + + ~CordzSamplingIntervalHelper() { + absl::cord_internal::set_cordz_mean_interval(orig_mean_interval_); + absl::cord_internal::cordz_set_next_sample_for_testing(orig_mean_interval_); + } + + private: + int32_t orig_mean_interval_; +}; + +// Wrapper struct managing a small CordRep `rep` +struct TestCordRep { + cord_internal::CordRepFlat* rep; + + TestCordRep() { + rep = cord_internal::CordRepFlat::New(100); + rep->length = 100; + memset(rep->Data(), 1, 100); + } + ~TestCordRep() { cord_internal::CordRep::Unref(rep); } +}; + +// Wrapper struct managing a small CordRep `rep`, and +// an InlineData `data` initialized with that CordRep. +struct TestCordData { + TestCordRep rep; + cord_internal::InlineData data{rep.rep}; +}; + +// Creates a Cord that is not sampled +template <typename... Args> +Cord UnsampledCord(Args... args) { + CordzSamplingIntervalHelper never(9999); + Cord cord(std::forward<Args>(args)...); + ABSL_ASSERT(GetCordzInfoForTesting(cord) == nullptr); + return cord; +} + +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_CORDZ_TEST_HELPERS_H_ diff --git a/third_party/abseil-cpp/absl/strings/escaping.cc b/third_party/abseil-cpp/absl/strings/escaping.cc index 7adc1b6571..18b20b83fd 100644 --- a/third_party/abseil-cpp/absl/strings/escaping.cc +++ b/third_party/abseil-cpp/absl/strings/escaping.cc @@ -137,7 +137,7 @@ bool CUnescapeInternal(absl::string_view source, bool leave_nulls_escaped, // Copy the escape sequence for the null character const ptrdiff_t octal_size = p + 1 - octal_start; *d++ = '\\'; - memcpy(d, octal_start, octal_size); + memmove(d, octal_start, octal_size); d += octal_size; break; } @@ -170,7 +170,7 @@ bool CUnescapeInternal(absl::string_view source, bool leave_nulls_escaped, // Copy the escape sequence for the null character const ptrdiff_t hex_size = p + 1 - hex_start; *d++ = '\\'; - memcpy(d, hex_start, hex_size); + memmove(d, hex_start, hex_size); d += hex_size; break; } @@ -203,7 +203,7 @@ bool CUnescapeInternal(absl::string_view source, bool leave_nulls_escaped, if ((rune == 0) && leave_nulls_escaped) { // Copy the escape sequence for the null character *d++ = '\\'; - memcpy(d, hex_start, 5); // u0000 + memmove(d, hex_start, 5); // u0000 d += 5; break; } @@ -251,7 +251,7 @@ bool CUnescapeInternal(absl::string_view source, bool leave_nulls_escaped, if ((rune == 0) && leave_nulls_escaped) { // Copy the escape sequence for the null character *d++ = '\\'; - memcpy(d, hex_start, 9); // U00000000 + memmove(d, hex_start, 9); // U00000000 d += 9; break; } @@ -450,7 +450,7 @@ bool Base64UnescapeInternal(const char* src_param, size_t szsrc, char* dest, // The GET_INPUT macro gets the next input character, skipping // over any whitespace, and stopping when we reach the end of the - // std::string or when we read any non-data character. The arguments are + // string or when we read any non-data character. The arguments are // an arbitrary identifier (used as a label for goto) and the number // of data bytes that must remain in the input to avoid aborting the // loop. @@ -473,18 +473,18 @@ bool Base64UnescapeInternal(const char* src_param, size_t szsrc, char* dest, if (dest) { // This loop consumes 4 input bytes and produces 3 output bytes // per iteration. We can't know at the start that there is enough - // data left in the std::string for a full iteration, so the loop may + // data left in the string for a full iteration, so the loop may // break out in the middle; if so 'state' will be set to the // number of input bytes read. while (szsrc >= 4) { // We'll start by optimistically assuming that the next four - // bytes of the std::string (src[0..3]) are four good data bytes + // bytes of the string (src[0..3]) are four good data bytes // (that is, no nulls, whitespace, padding chars, or illegal // chars). We need to test src[0..2] for nulls individually // before constructing temp to preserve the property that we - // never read past a null in the std::string (no matter how long - // szsrc claims the std::string is). + // never read past a null in the string (no matter how long + // szsrc claims the string is). if (!src[0] || !src[1] || !src[2] || ((temp = ((unsigned(unbase64[src[0]]) << 18) | @@ -509,7 +509,7 @@ bool Base64UnescapeInternal(const char* src_param, size_t szsrc, char* dest, temp = (temp << 6) | decode; } else { // We really did have four good data bytes, so advance four - // characters in the std::string. + // characters in the string. szsrc -= 4; src += 4; @@ -644,7 +644,7 @@ bool Base64UnescapeInternal(const char* src_param, size_t szsrc, char* dest, state); } - // The remainder of the std::string should be all whitespace, mixed with + // The remainder of the string should be all whitespace, mixed with // exactly 0 equals signs, or exactly 'expected_equals' equals // signs. (Always accepting 0 equals signs is an Abseil extension // not covered in the RFC, as is accepting dot as the pad character.) @@ -771,7 +771,7 @@ constexpr char kWebSafeBase64Chars[] = template <typename String> bool Base64UnescapeInternal(const char* src, size_t slen, String* dest, const signed char* unbase64) { - // Determine the size of the output std::string. Base64 encodes every 3 bytes into + // Determine the size of the output string. Base64 encodes every 3 bytes into // 4 characters. any leftover chars are added directly for good measure. // This is documented in the base64 RFC: http://tools.ietf.org/html/rfc3548 const size_t dest_len = 3 * (slen / 4) + (slen % 4); @@ -779,7 +779,7 @@ bool Base64UnescapeInternal(const char* src, size_t slen, String* dest, strings_internal::STLStringResizeUninitialized(dest, dest_len); // We are getting the destination buffer by getting the beginning of the - // std::string and converting it into a char *. + // string and converting it into a char *. size_t len; const bool ok = Base64UnescapeInternal(src, slen, &(*dest)[0], dest_len, unbase64, &len); diff --git a/third_party/abseil-cpp/absl/strings/escaping_test.cc b/third_party/abseil-cpp/absl/strings/escaping_test.cc index 1967975b69..45671a0ed5 100644 --- a/third_party/abseil-cpp/absl/strings/escaping_test.cc +++ b/third_party/abseil-cpp/absl/strings/escaping_test.cc @@ -300,7 +300,7 @@ static struct { absl::string_view plaintext; absl::string_view cyphertext; } const base64_tests[] = { - // Empty std::string. + // Empty string. {{"", 0}, {"", 0}}, {{nullptr, 0}, {"", 0}}, // if length is zero, plaintext ptr must be ignored! @@ -586,7 +586,7 @@ void TestEscapeAndUnescape() { EXPECT_EQ(encoded, websafe); EXPECT_EQ(absl::WebSafeBase64Escape(tc.plaintext), websafe); - // Let's try the std::string version of the decoder + // Let's try the string version of the decoder decoded = "this junk should be ignored"; EXPECT_TRUE(absl::WebSafeBase64Unescape(websafe, &decoded)); EXPECT_EQ(decoded, tc.plaintext); @@ -625,7 +625,7 @@ TEST(Base64, DISABLED_HugeData) { std::string escaped; absl::Base64Escape(huge, &escaped); - // Generates the std::string that should match a base64 encoded "xxx..." std::string. + // Generates the string that should match a base64 encoded "xxx..." string. // "xxx" in base64 is "eHh4". std::string expected_encoding; expected_encoding.reserve(kSize / 3 * 4); diff --git a/third_party/abseil-cpp/absl/strings/internal/char_map.h b/third_party/abseil-cpp/absl/strings/internal/char_map.h index a76e60362b..61484de0b7 100644 --- a/third_party/abseil-cpp/absl/strings/internal/char_map.h +++ b/third_party/abseil-cpp/absl/strings/internal/char_map.h @@ -72,7 +72,7 @@ class Charmap { CharMaskForWord(x, 2), CharMaskForWord(x, 3)); } - // Containing all the chars in the C-std::string 's'. + // Containing all the chars in the C-string 's'. // Note that this is expensively recursive because of the C++11 constexpr // formulation. Use only in constexpr initializers. static constexpr Charmap FromString(const char* s) { diff --git a/third_party/abseil-cpp/absl/strings/internal/charconv_bigint.cc b/third_party/abseil-cpp/absl/strings/internal/charconv_bigint.cc index 66f33e7207..ebf8c0791a 100644 --- a/third_party/abseil-cpp/absl/strings/internal/charconv_bigint.cc +++ b/third_party/abseil-cpp/absl/strings/internal/charconv_bigint.cc @@ -208,7 +208,7 @@ int BigUnsigned<max_words>::ReadDigits(const char* begin, const char* end, ++dropped_digits; } if (begin < end && *std::prev(end) == '.') { - // If the std::string ends in '.', either before or after dropping zeroes, then + // If the string ends in '.', either before or after dropping zeroes, then // drop the decimal point and look for more digits to drop. dropped_digits = 0; --end; diff --git a/third_party/abseil-cpp/absl/strings/internal/charconv_bigint.h b/third_party/abseil-cpp/absl/strings/internal/charconv_bigint.h index 999e9ae3a2..8f702976a8 100644 --- a/third_party/abseil-cpp/absl/strings/internal/charconv_bigint.h +++ b/third_party/abseil-cpp/absl/strings/internal/charconv_bigint.h @@ -66,7 +66,7 @@ class BigUnsigned { static_cast<uint32_t>(v >> 32)} {} // Constructs a BigUnsigned from the given string_view containing a decimal - // value. If the input std::string is not a decimal integer, constructs a 0 + // value. If the input string is not a decimal integer, constructs a 0 // instead. explicit BigUnsigned(absl::string_view sv) : size_(0), words_{} { // Check for valid input, returning a 0 otherwise. This is reasonable @@ -210,7 +210,7 @@ class BigUnsigned { return words_[index]; } - // Returns this integer as a decimal std::string. This is not used in the decimal- + // Returns this integer as a decimal string. This is not used in the decimal- // to-binary conversion; it is intended to aid in testing. std::string ToString() const; diff --git a/third_party/abseil-cpp/absl/strings/internal/charconv_bigint_test.cc b/third_party/abseil-cpp/absl/strings/internal/charconv_bigint_test.cc index 363bcb03d9..a8b9945829 100644 --- a/third_party/abseil-cpp/absl/strings/internal/charconv_bigint_test.cc +++ b/third_party/abseil-cpp/absl/strings/internal/charconv_bigint_test.cc @@ -69,6 +69,61 @@ TEST(BigUnsigned, ShiftLeft) { // And we should have fully rotated all bits off by now: EXPECT_EQ(a, BigUnsigned<84>(0u)); } + { + // Bit shifting large and small numbers by large and small offsets. + // Intended to exercise bounds-checking corner on ShiftLeft() (directly + // and under asan). + + // 2**(32*84)-1 + const BigUnsigned<84> all_bits_one( + "1474444211396924248063325089479706787923460402125687709454567433186613" + "6228083464060749874845919674257665016359189106695900028098437021384227" + "3285029708032466536084583113729486015826557532750465299832071590813090" + "2011853039837649252477307070509704043541368002938784757296893793903797" + "8180292336310543540677175225040919704702800559606097685920595947397024" + "8303316808753252115729411497720357971050627997031988036134171378490368" + "6008000778741115399296162550786288457245180872759047016734959330367829" + "5235612397427686310674725251378116268607113017720538636924549612987647" + "5767411074510311386444547332882472126067840027882117834454260409440463" + "9345147252664893456053258463203120637089916304618696601333953616715125" + "2115882482473279040772264257431663818610405673876655957323083702713344" + "4201105427930770976052393421467136557055"); + const BigUnsigned<84> zero(0u); + const BigUnsigned<84> one(1u); + // in bounds shifts + for (int i = 1; i < 84*32; ++i) { + // shifting all_bits_one to the left should result in a smaller number, + // since the high bits rotate off and the low bits are replaced with + // zeroes. + BigUnsigned<84> big_shifted = all_bits_one; + big_shifted.ShiftLeft(i); + EXPECT_GT(all_bits_one, big_shifted); + // Shifting 1 to the left should instead result in a larger number. + BigUnsigned<84> small_shifted = one; + small_shifted.ShiftLeft(i); + EXPECT_LT(one, small_shifted); + } + // Shifting by zero or a negative number has no effect + for (int no_op_shift : {0, -1, -84 * 32, std::numeric_limits<int>::min()}) { + BigUnsigned<84> big_shifted = all_bits_one; + big_shifted.ShiftLeft(no_op_shift); + EXPECT_EQ(all_bits_one, big_shifted); + BigUnsigned<84> small_shifted = one; + big_shifted.ShiftLeft(no_op_shift); + EXPECT_EQ(one, small_shifted); + } + // Shifting by an amount greater than the number of bits should result in + // zero. + for (int out_of_bounds_shift : + {84 * 32, 84 * 32 + 1, std::numeric_limits<int>::max()}) { + BigUnsigned<84> big_shifted = all_bits_one; + big_shifted.ShiftLeft(out_of_bounds_shift); + EXPECT_EQ(zero, big_shifted); + BigUnsigned<84> small_shifted = one; + small_shifted.ShiftLeft(out_of_bounds_shift); + EXPECT_EQ(zero, small_shifted); + } + } } TEST(BigUnsigned, MultiplyByUint32) { diff --git a/third_party/abseil-cpp/absl/strings/internal/charconv_parse.cc b/third_party/abseil-cpp/absl/strings/internal/charconv_parse.cc index d9a57a7822..d29acaf462 100644 --- a/third_party/abseil-cpp/absl/strings/internal/charconv_parse.cc +++ b/third_party/abseil-cpp/absl/strings/internal/charconv_parse.cc @@ -52,7 +52,7 @@ static_assert(std::numeric_limits<double>::digits == 53, "IEEE double fact"); // The lowest valued 19-digit decimal mantissa we can read still contains // sufficient information to reconstruct a binary mantissa. -static_assert(1000000000000000000u > (uint64_t(1) << (53 + 3)), "(b) above"); +static_assert(1000000000000000000u > (uint64_t{1} << (53 + 3)), "(b) above"); // ParseFloat<16> will read the first 15 significant digits of the mantissa. // @@ -246,8 +246,8 @@ constexpr int DigitMagnitude<16>() { // ConsumeDigits does not protect against overflow on *out; max_digits must // be chosen with respect to type T to avoid the possibility of overflow. template <int base, typename T> -std::size_t ConsumeDigits(const char* begin, const char* end, int max_digits, - T* out, bool* dropped_nonzero_digit) { +int ConsumeDigits(const char* begin, const char* end, int max_digits, T* out, + bool* dropped_nonzero_digit) { if (base == 10) { assert(max_digits <= std::numeric_limits<T>::digits10); } else if (base == 16) { @@ -282,7 +282,7 @@ std::size_t ConsumeDigits(const char* begin, const char* end, int max_digits, *dropped_nonzero_digit = true; } *out = accumulator; - return begin - original_begin; + return static_cast<int>(begin - original_begin); } // Returns true if `v` is one of the chars allowed inside parentheses following @@ -302,7 +302,7 @@ bool ParseInfinityOrNan(const char* begin, const char* end, switch (*begin) { case 'i': case 'I': { - // An infinity std::string consists of the characters "inf" or "infinity", + // An infinity string consists of the characters "inf" or "infinity", // case insensitive. if (strings_internal::memcasecmp(begin + 1, "nf", 2) != 0) { return false; @@ -326,7 +326,7 @@ bool ParseInfinityOrNan(const char* begin, const char* end, } out->type = strings_internal::FloatType::kNan; out->end = begin + 3; - // NaN is allowed to be followed by a parenthesized std::string, consisting of + // NaN is allowed to be followed by a parenthesized string, consisting of // only the characters [a-zA-Z0-9_]. Match that if it's present. begin += 3; if (begin < end && *begin == '(') { @@ -372,7 +372,7 @@ strings_internal::ParsedFloat ParseFloat(const char* begin, const char* end, int exponent_adjustment = 0; bool mantissa_is_inexact = false; - std::size_t pre_decimal_digits = ConsumeDigits<base>( + int pre_decimal_digits = ConsumeDigits<base>( begin, end, MantissaDigitsMax<base>(), &mantissa, &mantissa_is_inexact); begin += pre_decimal_digits; int digits_left; @@ -398,14 +398,14 @@ strings_internal::ParsedFloat ParseFloat(const char* begin, const char* end, while (begin < end && *begin == '0') { ++begin; } - std::size_t zeros_skipped = begin - begin_zeros; + int zeros_skipped = static_cast<int>(begin - begin_zeros); if (zeros_skipped >= DigitLimit<base>()) { // refuse to parse pathological inputs return result; } exponent_adjustment -= static_cast<int>(zeros_skipped); } - std::size_t post_decimal_digits = ConsumeDigits<base>( + int post_decimal_digits = ConsumeDigits<base>( begin, end, digits_left, &mantissa, &mantissa_is_inexact); begin += post_decimal_digits; diff --git a/third_party/abseil-cpp/absl/strings/internal/charconv_parse_test.cc b/third_party/abseil-cpp/absl/strings/internal/charconv_parse_test.cc index 9511c98745..bc2d111876 100644 --- a/third_party/abseil-cpp/absl/strings/internal/charconv_parse_test.cc +++ b/third_party/abseil-cpp/absl/strings/internal/charconv_parse_test.cc @@ -63,7 +63,7 @@ void ExpectParsedFloat(std::string s, absl::chars_format format_flags, } const std::string::size_type expected_characters_matched = s.find('$'); ABSL_RAW_CHECK(expected_characters_matched != std::string::npos, - "Input std::string must contain $"); + "Input string must contain $"); s.replace(expected_characters_matched, 1, ""); ParsedFloat parsed = diff --git a/third_party/abseil-cpp/absl/strings/internal/cord_internal.cc b/third_party/abseil-cpp/absl/strings/internal/cord_internal.cc new file mode 100644 index 0000000000..1767e6fcc5 --- /dev/null +++ b/third_party/abseil-cpp/absl/strings/internal/cord_internal.cc @@ -0,0 +1,89 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "absl/strings/internal/cord_internal.h" + +#include <atomic> +#include <cassert> +#include <memory> + +#include "absl/container/inlined_vector.h" +#include "absl/strings/internal/cord_rep_btree.h" +#include "absl/strings/internal/cord_rep_flat.h" +#include "absl/strings/internal/cord_rep_ring.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +ABSL_CONST_INIT std::atomic<bool> cord_btree_enabled(kCordEnableBtreeDefault); +ABSL_CONST_INIT std::atomic<bool> cord_ring_buffer_enabled( + kCordEnableRingBufferDefault); +ABSL_CONST_INIT std::atomic<bool> shallow_subcords_enabled( + kCordShallowSubcordsDefault); +ABSL_CONST_INIT std::atomic<bool> cord_btree_exhaustive_validation(false); + +void CordRep::Destroy(CordRep* rep) { + assert(rep != nullptr); + + absl::InlinedVector<CordRep*, Constants::kInlinedVectorSize> pending; + while (true) { + assert(!rep->refcount.IsImmortal()); + if (rep->tag == CONCAT) { + CordRepConcat* rep_concat = rep->concat(); + CordRep* right = rep_concat->right; + if (!right->refcount.Decrement()) { + pending.push_back(right); + } + CordRep* left = rep_concat->left; + delete rep_concat; + rep = nullptr; + if (!left->refcount.Decrement()) { + rep = left; + continue; + } + } else if (rep->tag == BTREE) { + CordRepBtree::Destroy(rep->btree()); + rep = nullptr; + } else if (rep->tag == RING) { + CordRepRing::Destroy(rep->ring()); + rep = nullptr; + } else if (rep->tag == EXTERNAL) { + CordRepExternal::Delete(rep); + rep = nullptr; + } else if (rep->tag == SUBSTRING) { + CordRepSubstring* rep_substring = rep->substring(); + CordRep* child = rep_substring->child; + delete rep_substring; + rep = nullptr; + if (!child->refcount.Decrement()) { + rep = child; + continue; + } + } else { + CordRepFlat::Delete(rep); + rep = nullptr; + } + + if (!pending.empty()) { + rep = pending.back(); + pending.pop_back(); + } else { + break; + } + } +} + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil-cpp/absl/strings/internal/cord_internal.h b/third_party/abseil-cpp/absl/strings/internal/cord_internal.h index 5b5d108308..bfe5564e46 100644 --- a/third_party/abseil-cpp/absl/strings/internal/cord_internal.h +++ b/third_party/abseil-cpp/absl/strings/internal/cord_internal.h @@ -1,4 +1,4 @@ -// Copyright 2020 The Abseil Authors. +// Copyright 2021 The Abseil Authors. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -21,6 +21,11 @@ #include <cstdint> #include <type_traits> +#include "absl/base/config.h" +#include "absl/base/internal/endian.h" +#include "absl/base/internal/invoke.h" +#include "absl/base/optimization.h" +#include "absl/container/internal/compressed_tuple.h" #include "absl/meta/type_traits.h" #include "absl/strings/string_view.h" @@ -28,49 +33,153 @@ namespace absl { ABSL_NAMESPACE_BEGIN namespace cord_internal { -// Wraps std::atomic for reference counting. -class Refcount { +class CordzInfo; + +// Default feature enable states for cord ring buffers +enum CordFeatureDefaults { + kCordEnableBtreeDefault = true, + kCordEnableRingBufferDefault = false, + kCordShallowSubcordsDefault = false +}; + +extern std::atomic<bool> cord_btree_enabled; +extern std::atomic<bool> cord_ring_buffer_enabled; +extern std::atomic<bool> shallow_subcords_enabled; + +// `cord_btree_exhaustive_validation` can be set to force exhaustive validation +// in debug assertions, and code that calls `IsValid()` explicitly. By default, +// assertions should be relatively cheap and AssertValid() can easily lead to +// O(n^2) complexity as recursive / full tree validation is O(n). +extern std::atomic<bool> cord_btree_exhaustive_validation; + +inline void enable_cord_btree(bool enable) { + cord_btree_enabled.store(enable, std::memory_order_relaxed); +} + +inline void enable_cord_ring_buffer(bool enable) { + cord_ring_buffer_enabled.store(enable, std::memory_order_relaxed); +} + +inline void enable_shallow_subcords(bool enable) { + shallow_subcords_enabled.store(enable, std::memory_order_relaxed); +} + +enum Constants { + // The inlined size to use with absl::InlinedVector. + // + // Note: The InlinedVectors in this file (and in cord.h) do not need to use + // the same value for their inlined size. The fact that they do is historical. + // It may be desirable for each to use a different inlined size optimized for + // that InlinedVector's usage. + // + // TODO(jgm): Benchmark to see if there's a more optimal value than 47 for + // the inlined vector size (47 exists for backward compatibility). + kInlinedVectorSize = 47, + + // Prefer copying blocks of at most this size, otherwise reference count. + kMaxBytesToCopy = 511 +}; + +// Compact class for tracking the reference count and state flags for CordRep +// instances. Data is stored in an atomic int32_t for compactness and speed. +class RefcountAndFlags { public: - Refcount() : count_{1} {} - ~Refcount() {} + constexpr RefcountAndFlags() : count_{kRefIncrement} {} + struct Immortal {}; + explicit constexpr RefcountAndFlags(Immortal) : count_(kImmortalFlag) {} + struct WithCrc {}; + explicit constexpr RefcountAndFlags(WithCrc) + : count_(kCrcFlag | kRefIncrement) {} - // Increments the reference count by 1. Imposes no memory ordering. - inline void Increment() { count_.fetch_add(1, std::memory_order_relaxed); } + // Increments the reference count. Imposes no memory ordering. + inline void Increment() { + count_.fetch_add(kRefIncrement, std::memory_order_relaxed); + } // Asserts that the current refcount is greater than 0. If the refcount is - // greater than 1, decrements the reference count by 1. + // greater than 1, decrements the reference count. // // Returns false if there are no references outstanding; true otherwise. // Inserts barriers to ensure that state written before this method returns // false will be visible to a thread that just observed this method returning - // false. + // false. Always returns false when the immortal bit is set. inline bool Decrement() { - int32_t refcount = count_.load(std::memory_order_acquire); - assert(refcount > 0); - return refcount != 1 && count_.fetch_sub(1, std::memory_order_acq_rel) != 1; + int32_t refcount = count_.load(std::memory_order_acquire) & kRefcountMask; + assert(refcount > 0 || refcount & kImmortalFlag); + return refcount != kRefIncrement && + (count_.fetch_sub(kRefIncrement, std::memory_order_acq_rel) & + kRefcountMask) != kRefIncrement; } // Same as Decrement but expect that refcount is greater than 1. inline bool DecrementExpectHighRefcount() { - int32_t refcount = count_.fetch_sub(1, std::memory_order_acq_rel); - assert(refcount > 0); - return refcount != 1; + int32_t refcount = + count_.fetch_sub(kRefIncrement, std::memory_order_acq_rel) & + kRefcountMask; + assert(refcount > 0 || refcount & kImmortalFlag); + return refcount != kRefIncrement; } // Returns the current reference count using acquire semantics. - inline int32_t Get() const { return count_.load(std::memory_order_acquire); } - - // Returns whether the atomic integer is 1. - // If the reference count is used in the conventional way, a - // reference count of 1 implies that the current thread owns the - // reference and no other thread shares it. - // This call performs the test for a reference count of one, and - // performs the memory barrier needed for the owning thread - // to act on the object, knowing that it has exclusive access to the - // object. - inline bool IsOne() { return count_.load(std::memory_order_acquire) == 1; } + inline int32_t Get() const { + return count_.load(std::memory_order_acquire) >> kNumFlags; + } + + // Returns true if the referenced object carries a CRC value. + bool HasCrc() const { + return (count_.load(std::memory_order_relaxed) & kCrcFlag) != 0; + } + + // Returns true iff the atomic integer is 1 and this node does not store + // a CRC. When both these conditions are met, the current thread owns + // the reference and no other thread shares it, so its contents may be + // safely mutated. + // + // If the referenced item is shared, carries a CRC, or is immortal, + // it should not be modified in-place, and this function returns false. + // + // This call performs the memory barrier needed for the owning thread + // to act on the object, so that if it returns true, it may safely + // assume exclusive access to the object. + inline bool IsMutable() { + return (count_.load(std::memory_order_acquire)) == kRefIncrement; + } + + // Returns whether the atomic integer is 1. Similar to IsMutable(), + // but does not check for a stored CRC. (An unshared node with a CRC is not + // mutable, because changing its data would invalidate the CRC.) + // + // When this returns true, there are no other references, and data sinks + // may safely adopt the children of the CordRep. + inline bool IsOne() { + return (count_.load(std::memory_order_acquire) & kRefcountMask) == + kRefIncrement; + } + + bool IsImmortal() const { + return (count_.load(std::memory_order_relaxed) & kImmortalFlag) != 0; + } private: + // We reserve the bottom bits for flags. + // kImmortalBit indicates that this entity should never be collected; it is + // used for the StringConstant constructor to avoid collecting immutable + // constant cords. + // kReservedFlag is reserved for future use. + enum { + kNumFlags = 2, + + kImmortalFlag = 0x1, + kCrcFlag = 0x2, + kRefIncrement = (1 << kNumFlags), + + // Bitmask to use when checking refcount by equality. This masks out + // all flags except kImmortalFlag, which is part of the refcount for + // purposes of equality. (A refcount of 0 or 1 does not count as 0 or 1 + // if the immortal bit is set.) + kRefcountMask = ~kCrcFlag, + }; + std::atomic<int32_t> count_; }; @@ -80,34 +189,106 @@ class Refcount { // functions in the base class. struct CordRepConcat; -struct CordRepSubstring; struct CordRepExternal; +struct CordRepFlat; +struct CordRepSubstring; +class CordRepRing; +class CordRepBtree; + +// Various representations that we allow +enum CordRepKind { + CONCAT = 0, + SUBSTRING = 1, + BTREE = 2, + RING = 3, + EXTERNAL = 4, + + // We have different tags for different sized flat arrays, + // starting with FLAT, and limited to MAX_FLAT_TAG. The 225 value is based on + // the current 'size to tag' encoding of 8 / 32 bytes. If a new tag is needed + // in the future, then 'FLAT' and 'MAX_FLAT_TAG' should be adjusted as well + // as the Tag <---> Size logic so that FLAT stil represents the minimum flat + // allocation size. (32 bytes as of now). + FLAT = 5, + MAX_FLAT_TAG = 225 +}; + +// There are various locations where we want to check if some rep is a 'plain' +// data edge, i.e. an external or flat rep. By having FLAT == EXTERNAL + 1, we +// can perform this check in a single branch as 'tag >= EXTERNAL' +// Likewise, we have some locations where we check for 'ring or external/flat', +// so likewise align RING to EXTERNAL. +// Note that we can leave this optimization to the compiler. The compiler will +// DTRT when it sees a condition like `tag == EXTERNAL || tag >= FLAT`. +static_assert(RING == BTREE + 1, "BTREE and RING not consecutive"); +static_assert(EXTERNAL == RING + 1, "BTREE and EXTERNAL not consecutive"); +static_assert(FLAT == EXTERNAL + 1, "EXTERNAL and FLAT not consecutive"); struct CordRep { + CordRep() = default; + constexpr CordRep(RefcountAndFlags::Immortal immortal, size_t l) + : length(l), refcount(immortal), tag(EXTERNAL), storage{} {} + // The following three fields have to be less than 32 bytes since // that is the smallest supported flat node size. - // We use uint64_t for the length even in 32-bit binaries. - uint64_t length; - Refcount refcount; + size_t length; + RefcountAndFlags refcount; // If tag < FLAT, it represents CordRepKind and indicates the type of node. // Otherwise, the node type is CordRepFlat and the tag is the encoded size. uint8_t tag; - char data[1]; // Starting point for flat array: MUST BE LAST FIELD of CordRep + // `storage` provides two main purposes: + // - the starting point for FlatCordRep.Data() [flexible-array-member] + // - 3 bytes of additional storage for use by derived classes. + // The latter is used by CordrepConcat and CordRepBtree. CordRepConcat stores + // a 'depth' value in storage[0], and the (future) CordRepBtree class stores + // `height`, `begin` and `end` in the 3 entries. Otherwise we would need to + // allocate room for these in the derived class, as not all compilers reuse + // padding space from the base class (clang and gcc do, MSVC does not, etc) + uint8_t storage[3]; + + // Returns true if this instance's tag matches the requested type. + constexpr bool IsRing() const { return tag == RING; } + constexpr bool IsConcat() const { return tag == CONCAT; } + constexpr bool IsSubstring() const { return tag == SUBSTRING; } + constexpr bool IsExternal() const { return tag == EXTERNAL; } + constexpr bool IsFlat() const { return tag >= FLAT; } + constexpr bool IsBtree() const { return tag == BTREE; } + + inline CordRepRing* ring(); + inline const CordRepRing* ring() const; inline CordRepConcat* concat(); inline const CordRepConcat* concat() const; inline CordRepSubstring* substring(); inline const CordRepSubstring* substring() const; inline CordRepExternal* external(); inline const CordRepExternal* external() const; + inline CordRepFlat* flat(); + inline const CordRepFlat* flat() const; + inline CordRepBtree* btree(); + inline const CordRepBtree* btree() const; + + // -------------------------------------------------------------------- + // Memory management + + // Destroys the provided `rep`. + static void Destroy(CordRep* rep); + + // Increments the reference count of `rep`. + // Requires `rep` to be a non-null pointer value. + static inline CordRep* Ref(CordRep* rep); + + // Decrements the reference count of `rep`. Destroys rep if count reaches + // zero. Requires `rep` to be a non-null pointer value. + static inline void Unref(CordRep* rep); }; struct CordRepConcat : public CordRep { CordRep* left; CordRep* right; - uint8_t depth() const { return static_cast<uint8_t>(data[0]); } - void set_depth(uint8_t depth) { data[0] = static_cast<char>(depth); } + uint8_t depth() const { return storage[0]; } + void set_depth(uint8_t depth) { storage[0] = depth; } }; struct CordRepSubstring : public CordRep { @@ -115,37 +296,325 @@ struct CordRepSubstring : public CordRep { CordRep* child; }; -// TODO(strel): replace the following logic (and related functions in cord.cc) -// with container_internal::Layout. - -// Alignment requirement for CordRepExternal so that the type erased releaser -// will be stored at a suitably aligned address. -constexpr size_t ExternalRepAlignment() { -#if defined(__STDCPP_DEFAULT_NEW_ALIGNMENT__) - return __STDCPP_DEFAULT_NEW_ALIGNMENT__; -#else - return alignof(max_align_t); -#endif -} - -// Type for function pointer that will invoke and destroy the type-erased -// releaser function object. Accepts a pointer to the releaser and the -// `string_view` that were passed in to `NewExternalRep` below. The return value -// is the size of the `Releaser` type. -using ExternalReleaserInvoker = size_t (*)(void*, absl::string_view); +// Type for function pointer that will invoke the releaser function and also +// delete the `CordRepExternalImpl` corresponding to the passed in +// `CordRepExternal`. +using ExternalReleaserInvoker = void (*)(CordRepExternal*); // External CordReps are allocated together with a type erased releaser. The // releaser is stored in the memory directly following the CordRepExternal. -struct alignas(ExternalRepAlignment()) CordRepExternal : public CordRep { +struct CordRepExternal : public CordRep { + CordRepExternal() = default; + explicit constexpr CordRepExternal(absl::string_view str) + : CordRep(RefcountAndFlags::Immortal{}, str.size()), + base(str.data()), + releaser_invoker(nullptr) {} + const char* base; // Pointer to function that knows how to call and destroy the releaser. ExternalReleaserInvoker releaser_invoker; + + // Deletes (releases) the external rep. + // Requires rep != nullptr and rep->IsExternal() + static void Delete(CordRep* rep); +}; + +struct Rank1 {}; +struct Rank0 : Rank1 {}; + +template <typename Releaser, typename = ::absl::base_internal::invoke_result_t< + Releaser, absl::string_view>> +void InvokeReleaser(Rank0, Releaser&& releaser, absl::string_view data) { + ::absl::base_internal::invoke(std::forward<Releaser>(releaser), data); +} + +template <typename Releaser, + typename = ::absl::base_internal::invoke_result_t<Releaser>> +void InvokeReleaser(Rank1, Releaser&& releaser, absl::string_view) { + ::absl::base_internal::invoke(std::forward<Releaser>(releaser)); +} + +// We use CompressedTuple so that we can benefit from EBCO. +template <typename Releaser> +struct CordRepExternalImpl + : public CordRepExternal, + public ::absl::container_internal::CompressedTuple<Releaser> { + // The extra int arg is so that we can avoid interfering with copy/move + // constructors while still benefitting from perfect forwarding. + template <typename T> + CordRepExternalImpl(T&& releaser, int) + : CordRepExternalImpl::CompressedTuple(std::forward<T>(releaser)) { + this->releaser_invoker = &Release; + } + + ~CordRepExternalImpl() { + InvokeReleaser(Rank0{}, std::move(this->template get<0>()), + absl::string_view(base, length)); + } + + static void Release(CordRepExternal* rep) { + delete static_cast<CordRepExternalImpl*>(rep); + } +}; + +inline void CordRepExternal::Delete(CordRep* rep) { + assert(rep != nullptr && rep->IsExternal()); + auto* rep_external = static_cast<CordRepExternal*>(rep); + assert(rep_external->releaser_invoker != nullptr); + rep_external->releaser_invoker(rep_external); +} + +template <typename Str> +struct ConstInitExternalStorage { + ABSL_CONST_INIT static CordRepExternal value; }; -// TODO(strel): look into removing, it doesn't seem like anything relies on this -static_assert(sizeof(CordRepConcat) == sizeof(CordRepSubstring), ""); +template <typename Str> +CordRepExternal ConstInitExternalStorage<Str>::value(Str::value); + +enum { + kMaxInline = 15, +}; + +constexpr char GetOrNull(absl::string_view data, size_t pos) { + return pos < data.size() ? data[pos] : '\0'; +} + +// We store cordz_info as 64 bit pointer value in big endian format. This +// guarantees that the least significant byte of cordz_info matches the last +// byte of the inline data representation in as_chars_, which holds the inlined +// size or the 'is_tree' bit. +using cordz_info_t = int64_t; + +// Assert that the `cordz_info` pointer value perfectly overlaps the last half +// of `as_chars_` and can hold a pointer value. +static_assert(sizeof(cordz_info_t) * 2 == kMaxInline + 1, ""); +static_assert(sizeof(cordz_info_t) >= sizeof(intptr_t), ""); + +// BigEndianByte() creates a big endian representation of 'value', i.e.: a big +// endian value where the last byte in the host's representation holds 'value`, +// with all other bytes being 0. +static constexpr cordz_info_t BigEndianByte(unsigned char value) { +#if defined(ABSL_IS_BIG_ENDIAN) + return value; +#else + return static_cast<cordz_info_t>(value) << ((sizeof(cordz_info_t) - 1) * 8); +#endif +} + +class InlineData { + public: + // DefaultInitType forces the use of the default initialization constructor. + enum DefaultInitType { kDefaultInit }; + + // kNullCordzInfo holds the big endian representation of intptr_t(1) + // This is the 'null' / initial value of 'cordz_info'. The null value + // is specifically big endian 1 as with 64-bit pointers, the last + // byte of cordz_info overlaps with the last byte holding the tag. + static constexpr cordz_info_t kNullCordzInfo = BigEndianByte(1); + + constexpr InlineData() : as_chars_{0} {} + explicit InlineData(DefaultInitType) {} + explicit constexpr InlineData(CordRep* rep) : as_tree_(rep) {} + explicit constexpr InlineData(absl::string_view chars) + : as_chars_{ + GetOrNull(chars, 0), GetOrNull(chars, 1), + GetOrNull(chars, 2), GetOrNull(chars, 3), + GetOrNull(chars, 4), GetOrNull(chars, 5), + GetOrNull(chars, 6), GetOrNull(chars, 7), + GetOrNull(chars, 8), GetOrNull(chars, 9), + GetOrNull(chars, 10), GetOrNull(chars, 11), + GetOrNull(chars, 12), GetOrNull(chars, 13), + GetOrNull(chars, 14), static_cast<char>((chars.size() << 1))} {} + + // Returns true if the current instance is empty. + // The 'empty value' is an inlined data value of zero length. + bool is_empty() const { return tag() == 0; } + + // Returns true if the current instance holds a tree value. + bool is_tree() const { return (tag() & 1) != 0; } + + // Returns true if the current instance holds a cordz_info value. + // Requires the current instance to hold a tree value. + bool is_profiled() const { + assert(is_tree()); + return as_tree_.cordz_info != kNullCordzInfo; + } + + // Returns true if either of the provided instances hold a cordz_info value. + // This method is more efficient than the equivalent `data1.is_profiled() || + // data2.is_profiled()`. Requires both arguments to hold a tree. + static bool is_either_profiled(const InlineData& data1, + const InlineData& data2) { + assert(data1.is_tree() && data2.is_tree()); + return (data1.as_tree_.cordz_info | data2.as_tree_.cordz_info) != + kNullCordzInfo; + } + + // Returns the cordz_info sampling instance for this instance, or nullptr + // if the current instance is not sampled and does not have CordzInfo data. + // Requires the current instance to hold a tree value. + CordzInfo* cordz_info() const { + assert(is_tree()); + intptr_t info = + static_cast<intptr_t>(absl::big_endian::ToHost64(as_tree_.cordz_info)); + assert(info & 1); + return reinterpret_cast<CordzInfo*>(info - 1); + } + + // Sets the current cordz_info sampling instance for this instance, or nullptr + // if the current instance is not sampled and does not have CordzInfo data. + // Requires the current instance to hold a tree value. + void set_cordz_info(CordzInfo* cordz_info) { + assert(is_tree()); + intptr_t info = reinterpret_cast<intptr_t>(cordz_info) | 1; + as_tree_.cordz_info = absl::big_endian::FromHost64(info); + } + + // Resets the current cordz_info to null / empty. + void clear_cordz_info() { + assert(is_tree()); + as_tree_.cordz_info = kNullCordzInfo; + } + + // Returns a read only pointer to the character data inside this instance. + // Requires the current instance to hold inline data. + const char* as_chars() const { + assert(!is_tree()); + return as_chars_; + } + + // Returns a mutable pointer to the character data inside this instance. + // Should be used for 'write only' operations setting an inlined value. + // Applications can set the value of inlined data either before or after + // setting the inlined size, i.e., both of the below are valid: + // + // // Set inlined data and inline size + // memcpy(data_.as_chars(), data, size); + // data_.set_inline_size(size); + // + // // Set inlined size and inline data + // data_.set_inline_size(size); + // memcpy(data_.as_chars(), data, size); + // + // It's an error to read from the returned pointer without a preceding write + // if the current instance does not hold inline data, i.e.: is_tree() == true. + char* as_chars() { return as_chars_; } + + // Returns the tree value of this value. + // Requires the current instance to hold a tree value. + CordRep* as_tree() const { + assert(is_tree()); + return as_tree_.rep; + } + + // Initialize this instance to holding the tree value `rep`, + // initializing the cordz_info to null, i.e.: 'not profiled'. + void make_tree(CordRep* rep) { + as_tree_.rep = rep; + as_tree_.cordz_info = kNullCordzInfo; + } + + // Set the tree value of this instance to 'rep`. + // Requires the current instance to already hold a tree value. + // Does not affect the value of cordz_info. + void set_tree(CordRep* rep) { + assert(is_tree()); + as_tree_.rep = rep; + } + + // Returns the size of the inlined character data inside this instance. + // Requires the current instance to hold inline data. + size_t inline_size() const { + assert(!is_tree()); + return tag() >> 1; + } + + // Sets the size of the inlined character data inside this instance. + // Requires `size` to be <= kMaxInline. + // See the documentation on 'as_chars()' for more information and examples. + void set_inline_size(size_t size) { + ABSL_ASSERT(size <= kMaxInline); + tag() = static_cast<char>(size << 1); + } + + private: + // See cordz_info_t for forced alignment and size of `cordz_info` details. + struct AsTree { + explicit constexpr AsTree(absl::cord_internal::CordRep* tree) + : rep(tree), cordz_info(kNullCordzInfo) {} + // This union uses up extra space so that whether rep is 32 or 64 bits, + // cordz_info will still start at the eighth byte, and the last + // byte of cordz_info will still be the last byte of InlineData. + union { + absl::cord_internal::CordRep* rep; + cordz_info_t unused_aligner; + }; + cordz_info_t cordz_info; + }; + + char& tag() { return reinterpret_cast<char*>(this)[kMaxInline]; } + char tag() const { return reinterpret_cast<const char*>(this)[kMaxInline]; } + + // If the data has length <= kMaxInline, we store it in `as_chars_`, and + // store the size in the last char of `as_chars_` shifted left + 1. + // Else we store it in a tree and store a pointer to that tree in + // `as_tree_.rep` and store a tag in `tagged_size`. + union { + char as_chars_[kMaxInline + 1]; + AsTree as_tree_; + }; +}; + +static_assert(sizeof(InlineData) == kMaxInline + 1, ""); + +inline CordRepConcat* CordRep::concat() { + assert(IsConcat()); + return static_cast<CordRepConcat*>(this); +} + +inline const CordRepConcat* CordRep::concat() const { + assert(IsConcat()); + return static_cast<const CordRepConcat*>(this); +} + +inline CordRepSubstring* CordRep::substring() { + assert(IsSubstring()); + return static_cast<CordRepSubstring*>(this); +} + +inline const CordRepSubstring* CordRep::substring() const { + assert(IsSubstring()); + return static_cast<const CordRepSubstring*>(this); +} + +inline CordRepExternal* CordRep::external() { + assert(IsExternal()); + return static_cast<CordRepExternal*>(this); +} + +inline const CordRepExternal* CordRep::external() const { + assert(IsExternal()); + return static_cast<const CordRepExternal*>(this); +} + +inline CordRep* CordRep::Ref(CordRep* rep) { + assert(rep != nullptr); + rep->refcount.Increment(); + return rep; +} + +inline void CordRep::Unref(CordRep* rep) { + assert(rep != nullptr); + // Expect refcount to be 0. Avoiding the cost of an atomic decrement should + // typically outweigh the cost of an extra branch checking for ref == 1. + if (ABSL_PREDICT_FALSE(!rep->refcount.DecrementExpectHighRefcount())) { + Destroy(rep); + } +} } // namespace cord_internal + ABSL_NAMESPACE_END } // namespace absl #endif // ABSL_STRINGS_INTERNAL_CORD_INTERNAL_H_ diff --git a/third_party/abseil-cpp/absl/strings/internal/cord_internal_test.cc b/third_party/abseil-cpp/absl/strings/internal/cord_internal_test.cc new file mode 100644 index 0000000000..0758dfef38 --- /dev/null +++ b/third_party/abseil-cpp/absl/strings/internal/cord_internal_test.cc @@ -0,0 +1,116 @@ +// Copyright 2021 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/cord_internal.h" + +#include "gmock/gmock.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +TEST(RefcountAndFlags, NormalRefcount) { + for (bool expect_high_refcount : {false, true}) { + SCOPED_TRACE(expect_high_refcount); + RefcountAndFlags refcount; + // count = 1 + + EXPECT_FALSE(refcount.HasCrc()); + EXPECT_TRUE(refcount.IsMutable()); + EXPECT_TRUE(refcount.IsOne()); + + refcount.Increment(); + // count = 2 + + EXPECT_FALSE(refcount.HasCrc()); + EXPECT_FALSE(refcount.IsMutable()); + EXPECT_FALSE(refcount.IsOne()); + + // Decrementing should return true, since a reference is outstanding. + if (expect_high_refcount) { + EXPECT_TRUE(refcount.DecrementExpectHighRefcount()); + } else { + EXPECT_TRUE(refcount.Decrement()); + } + // count = 1 + + EXPECT_FALSE(refcount.HasCrc()); + EXPECT_TRUE(refcount.IsMutable()); + EXPECT_TRUE(refcount.IsOne()); + + // One more decremnt will return false, as no references remain. + if (expect_high_refcount) { + EXPECT_FALSE(refcount.DecrementExpectHighRefcount()); + } else { + EXPECT_FALSE(refcount.Decrement()); + } + } +} + +TEST(RefcountAndFlags, CrcRefcount) { + for (bool expect_high_refcount : {false, true}) { + SCOPED_TRACE(expect_high_refcount); + RefcountAndFlags refcount(RefcountAndFlags::WithCrc{}); + // count = 1 + + // A CRC-carrying node is never mutable, but can be unshared + EXPECT_TRUE(refcount.HasCrc()); + EXPECT_FALSE(refcount.IsMutable()); + EXPECT_TRUE(refcount.IsOne()); + + refcount.Increment(); + // count = 2 + + EXPECT_TRUE(refcount.HasCrc()); + EXPECT_FALSE(refcount.IsMutable()); + EXPECT_FALSE(refcount.IsOne()); + + // Decrementing should return true, since a reference is outstanding. + if (expect_high_refcount) { + EXPECT_TRUE(refcount.DecrementExpectHighRefcount()); + } else { + EXPECT_TRUE(refcount.Decrement()); + } + // count = 1 + + EXPECT_TRUE(refcount.HasCrc()); + EXPECT_FALSE(refcount.IsMutable()); + EXPECT_TRUE(refcount.IsOne()); + + // One more decremnt will return false, as no references remain. + if (expect_high_refcount) { + EXPECT_FALSE(refcount.DecrementExpectHighRefcount()); + } else { + EXPECT_FALSE(refcount.Decrement()); + } + } +} + +TEST(RefcountAndFlags, ImmortalRefcount) { + RefcountAndFlags immortal_refcount(RefcountAndFlags::Immortal{}); + + for (int i = 0; i < 100; ++i) { + // An immortal refcount is never unshared, and decrementing never causes + // a collection. + EXPECT_FALSE(immortal_refcount.HasCrc()); + EXPECT_FALSE(immortal_refcount.IsMutable()); + EXPECT_FALSE(immortal_refcount.IsOne()); + EXPECT_TRUE(immortal_refcount.Decrement()); + EXPECT_TRUE(immortal_refcount.DecrementExpectHighRefcount()); + } +} + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil-cpp/absl/strings/internal/cord_rep_btree.cc b/third_party/abseil-cpp/absl/strings/internal/cord_rep_btree.cc new file mode 100644 index 0000000000..4404f33a12 --- /dev/null +++ b/third_party/abseil-cpp/absl/strings/internal/cord_rep_btree.cc @@ -0,0 +1,1128 @@ +// Copyright 2021 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/cord_rep_btree.h" + +#include <cassert> +#include <cstdint> +#include <iostream> +#include <string> + +#include "absl/base/attributes.h" +#include "absl/base/config.h" +#include "absl/base/internal/raw_logging.h" +#include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/cord_rep_consume.h" +#include "absl/strings/internal/cord_rep_flat.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/string_view.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +constexpr size_t CordRepBtree::kMaxCapacity; // NOLINT: needed for c++ < c++17 + +namespace { + +using NodeStack = CordRepBtree * [CordRepBtree::kMaxDepth]; +using EdgeType = CordRepBtree::EdgeType; +using OpResult = CordRepBtree::OpResult; +using CopyResult = CordRepBtree::CopyResult; + +constexpr auto kFront = CordRepBtree::kFront; +constexpr auto kBack = CordRepBtree::kBack; + +inline bool exhaustive_validation() { + return cord_btree_exhaustive_validation.load(std::memory_order_relaxed); +} + +// Implementation of the various 'Dump' functions. +// Prints the entire tree structure or 'rep'. External callers should +// not specify 'depth' and leave it to its default (0) value. +// Rep may be a CordRepBtree tree, or a SUBSTRING / EXTERNAL / FLAT node. +void DumpAll(const CordRep* rep, bool include_contents, std::ostream& stream, + int depth = 0) { + // Allow for full height trees + substring -> flat / external nodes. + assert(depth <= CordRepBtree::kMaxDepth + 2); + std::string sharing = const_cast<CordRep*>(rep)->refcount.IsOne() + ? std::string("Private") + : absl::StrCat("Shared(", rep->refcount.Get(), ")"); + std::string sptr = absl::StrCat("0x", absl::Hex(rep)); + + // Dumps the data contents of `rep` if `include_contents` is true. + // Always emits a new line character. + auto maybe_dump_data = [&stream, include_contents](const CordRep* r) { + if (include_contents) { + // Allow for up to 60 wide display of content data, which with some + // indentation and prefix / labels keeps us within roughly 80-100 wide. + constexpr size_t kMaxDataLength = 60; + stream << ", data = \"" + << CordRepBtree::EdgeData(r).substr(0, kMaxDataLength) + << (r->length > kMaxDataLength ? "\"..." : "\""); + } + stream << '\n'; + }; + + // For each level, we print the 'shared/private' state and the rep pointer, + // indented by two spaces per recursive depth. + stream << std::string(depth * 2, ' ') << sharing << " (" << sptr << ") "; + + if (rep->IsBtree()) { + const CordRepBtree* node = rep->btree(); + std::string label = + node->height() ? absl::StrCat("Node(", node->height(), ")") : "Leaf"; + stream << label << ", len = " << node->length + << ", begin = " << node->begin() << ", end = " << node->end() + << "\n"; + for (CordRep* edge : node->Edges()) { + DumpAll(edge, include_contents, stream, depth + 1); + } + } else if (rep->tag == SUBSTRING) { + const CordRepSubstring* substring = rep->substring(); + stream << "Substring, len = " << rep->length + << ", start = " << substring->start; + maybe_dump_data(rep); + DumpAll(substring->child, include_contents, stream, depth + 1); + } else if (rep->tag >= FLAT) { + stream << "Flat, len = " << rep->length + << ", cap = " << rep->flat()->Capacity(); + maybe_dump_data(rep); + } else if (rep->tag == EXTERNAL) { + stream << "Extn, len = " << rep->length; + maybe_dump_data(rep); + } +} + +// TODO(b/192061034): add 'bytes to copy' logic to avoid large slop on substring +// small data out of large reps, and general efficiency of 'always copy small +// data'. Consider making this a cord rep internal library function. +CordRepSubstring* CreateSubstring(CordRep* rep, size_t offset, size_t n) { + assert(n != 0); + assert(offset + n <= rep->length); + assert(offset != 0 || n != rep->length); + + if (rep->tag == SUBSTRING) { + CordRepSubstring* substring = rep->substring(); + offset += substring->start; + rep = CordRep::Ref(substring->child); + CordRep::Unref(substring); + } + CordRepSubstring* substring = new CordRepSubstring(); + substring->length = n; + substring->tag = SUBSTRING; + substring->start = offset; + substring->child = rep; + return substring; +} + +// TODO(b/192061034): consider making this a cord rep library function. +inline CordRep* MakeSubstring(CordRep* rep, size_t offset, size_t n) { + if (n == rep->length) return rep; + if (n == 0) return CordRep::Unref(rep), nullptr; + return CreateSubstring(rep, offset, n); +} + +// TODO(b/192061034): consider making this a cord rep library function. +inline CordRep* MakeSubstring(CordRep* rep, size_t offset) { + if (offset == 0) return rep; + return CreateSubstring(rep, offset, rep->length - offset); +} + +// Resizes `edge` to the provided `length`. Adopts a reference on `edge`. +// This method directly returns `edge` if `length` equals `edge->length`. +// If `is_mutable` is set to true, this function may return `edge` with +// `edge->length` set to the new length depending on the type and size of +// `edge`. Otherwise, this function returns a new CordRepSubstring value. +// Requires `length > 0 && length <= edge->length`. +CordRep* ResizeEdge(CordRep* edge, size_t length, bool is_mutable) { + assert(length > 0); + assert(length <= edge->length); + assert(CordRepBtree::IsDataEdge(edge)); + if (length >= edge->length) return edge; + + if (is_mutable && (edge->tag >= FLAT || edge->tag == SUBSTRING)) { + edge->length = length; + return edge; + } + + return CreateSubstring(edge, 0, length); +} + +template <EdgeType edge_type> +inline absl::string_view Consume(absl::string_view s, size_t n) { + return edge_type == kBack ? s.substr(n) : s.substr(0, s.size() - n); +} + +template <EdgeType edge_type> +inline absl::string_view Consume(char* dst, absl::string_view s, size_t n) { + if (edge_type == kBack) { + memcpy(dst, s.data(), n); + return s.substr(n); + } else { + const size_t offset = s.size() - n; + memcpy(dst, s.data() + offset, n); + return s.substr(0, offset); + } +} + +// Known issue / optimization weirdness: the store associated with the +// decrement introduces traffic between cpus (even if the result of that +// traffic does nothing), making this faster than a single call to +// refcount.Decrement() checking the zero refcount condition. +template <typename R, typename Fn> +inline void FastUnref(R* r, Fn&& fn) { + if (r->refcount.IsOne()) { + fn(r); + } else if (!r->refcount.DecrementExpectHighRefcount()) { + fn(r); + } +} + +// Deletes a leaf node data edge. Requires `rep` to be an EXTERNAL or FLAT +// node, or a SUBSTRING of an EXTERNAL or FLAT node. +void DeleteLeafEdge(CordRep* rep) { + for (;;) { + if (rep->tag >= FLAT) { + CordRepFlat::Delete(rep->flat()); + return; + } + if (rep->tag == EXTERNAL) { + CordRepExternal::Delete(rep->external()); + return; + } + assert(rep->tag == SUBSTRING); + CordRepSubstring* substring = rep->substring(); + rep = substring->child; + assert(rep->tag == EXTERNAL || rep->tag >= FLAT); + delete substring; + if (rep->refcount.Decrement()) return; + } +} + +// StackOperations contains the logic to build a left-most or right-most stack +// (leg) down to the leaf level of a btree, and 'unwind' / 'Finalize' methods to +// propagate node changes up the stack. +template <EdgeType edge_type> +struct StackOperations { + // Returns true if the node at 'depth' is mutable, i.e. has a refcount + // of one, carries no CRC, and all of its parent nodes have a refcount of one. + inline bool owned(int depth) const { return depth < share_depth; } + + // Returns the node at 'depth'. + inline CordRepBtree* node(int depth) const { return stack[depth]; } + + // Builds a `depth` levels deep stack starting at `tree` recording which nodes + // are private in the form of the 'share depth' where nodes are shared. + inline CordRepBtree* BuildStack(CordRepBtree* tree, int depth) { + assert(depth <= tree->height()); + int current_depth = 0; + while (current_depth < depth && tree->refcount.IsMutable()) { + stack[current_depth++] = tree; + tree = tree->Edge(edge_type)->btree(); + } + share_depth = current_depth + (tree->refcount.IsMutable() ? 1 : 0); + while (current_depth < depth) { + stack[current_depth++] = tree; + tree = tree->Edge(edge_type)->btree(); + } + return tree; + } + + // Builds a stack with the invariant that all nodes are private owned / not + // shared and carry no CRC data. This is used in iterative updates where a + // previous propagation guaranteed all nodes have this property. + inline void BuildOwnedStack(CordRepBtree* tree, int height) { + assert(height <= CordRepBtree::kMaxHeight); + int depth = 0; + while (depth < height) { + assert(tree->refcount.IsMutable()); + stack[depth++] = tree; + tree = tree->Edge(edge_type)->btree(); + } + assert(tree->refcount.IsMutable()); + share_depth = depth + 1; + } + + // Processes the final 'top level' result action for the tree. + // See the 'Action' enum for the various action implications. + static inline CordRepBtree* Finalize(CordRepBtree* tree, OpResult result) { + switch (result.action) { + case CordRepBtree::kPopped: + tree = edge_type == kBack ? CordRepBtree::New(tree, result.tree) + : CordRepBtree::New(result.tree, tree); + if (ABSL_PREDICT_FALSE(tree->height() > CordRepBtree::kMaxHeight)) { + tree = CordRepBtree::Rebuild(tree); + ABSL_RAW_CHECK(tree->height() <= CordRepBtree::kMaxHeight, + "Max height exceeded"); + } + return tree; + case CordRepBtree::kCopied: + CordRep::Unref(tree); + ABSL_FALLTHROUGH_INTENDED; + case CordRepBtree::kSelf: + return result.tree; + } + ABSL_INTERNAL_UNREACHABLE; + return result.tree; + } + + // Propagate the action result in 'result' up into all nodes of the stack + // starting at depth 'depth'. 'length' contains the extra length of data that + // was added at the lowest level, and is updated into all nodes of the stack. + // See the 'Action' enum for the various action implications. + // If 'propagate' is true, then any copied node values are updated into the + // stack, which is used for iterative processing on the same stack. + template <bool propagate = false> + inline CordRepBtree* Unwind(CordRepBtree* tree, int depth, size_t length, + OpResult result) { + // TODO(mvels): revisit the below code to check if 3 loops with 3 + // (incremental) conditions is faster than 1 loop with a switch. + // Benchmarking and perf recordings indicate the loop with switch is + // fastest, likely because of indirect jumps on the tight case values and + // dense branches. But it's worth considering 3 loops, as the `action` + // transitions are mono directional. E.g.: + // while (action == kPopped) { + // ... + // } + // while (action == kCopied) { + // ... + // } + // ... + // We also found that an "if () do {}" loop here seems faster, possibly + // because it allows the branch predictor more granular heuristics on + // 'single leaf' (`depth` == 0) and 'single depth' (`depth` == 1) cases + // which appear to be the most common use cases. + if (depth != 0) { + do { + CordRepBtree* node = stack[--depth]; + const bool owned = depth < share_depth; + switch (result.action) { + case CordRepBtree::kPopped: + assert(!propagate); + result = node->AddEdge<edge_type>(owned, result.tree, length); + break; + case CordRepBtree::kCopied: + result = node->SetEdge<edge_type>(owned, result.tree, length); + if (propagate) stack[depth] = result.tree; + break; + case CordRepBtree::kSelf: + node->length += length; + while (depth > 0) { + node = stack[--depth]; + node->length += length; + } + return node; + } + } while (depth > 0); + } + return Finalize(tree, result); + } + + // Invokes `Unwind` with `propagate=true` to update the stack node values. + inline CordRepBtree* Propagate(CordRepBtree* tree, int depth, size_t length, + OpResult result) { + return Unwind</*propagate=*/true>(tree, depth, length, result); + } + + // `share_depth` contains the depth at which the nodes in the stack cannot + // be mutated. I.e., if the top most level is shared (i.e.: + // `!refcount.IsMutable()`), then `share_depth` is 0. If the 2nd node + // is shared (and implicitly all nodes below that) then `share_depth` is 1, + // etc. A `share_depth` greater than the depth of the stack indicates that + // none of the nodes in the stack are shared. + int share_depth; + + NodeStack stack; +}; + +} // namespace + +void CordRepBtree::Dump(const CordRep* rep, absl::string_view label, + bool include_contents, std::ostream& stream) { + stream << "===================================\n"; + if (!label.empty()) { + stream << label << '\n'; + stream << "-----------------------------------\n"; + } + if (rep) { + DumpAll(rep, include_contents, stream); + } else { + stream << "NULL\n"; + } +} + +void CordRepBtree::Dump(const CordRep* rep, absl::string_view label, + std::ostream& stream) { + Dump(rep, label, false, stream); +} + +void CordRepBtree::Dump(const CordRep* rep, std::ostream& stream) { + Dump(rep, absl::string_view(), false, stream); +} + +void CordRepBtree::DestroyLeaf(CordRepBtree* tree, size_t begin, size_t end) { + for (CordRep* edge : tree->Edges(begin, end)) { + FastUnref(edge, DeleteLeafEdge); + } + Delete(tree); +} + +void CordRepBtree::DestroyNonLeaf(CordRepBtree* tree, size_t begin, + size_t end) { + for (CordRep* edge : tree->Edges(begin, end)) { + FastUnref(edge->btree(), Destroy); + } + Delete(tree); +} + +bool CordRepBtree::IsValid(const CordRepBtree* tree, bool shallow) { +#define NODE_CHECK_VALID(x) \ + if (!(x)) { \ + ABSL_RAW_LOG(ERROR, "CordRepBtree::CheckValid() FAILED: %s", #x); \ + return false; \ + } +#define NODE_CHECK_EQ(x, y) \ + if ((x) != (y)) { \ + ABSL_RAW_LOG(ERROR, \ + "CordRepBtree::CheckValid() FAILED: %s != %s (%s vs %s)", #x, \ + #y, absl::StrCat(x).c_str(), absl::StrCat(y).c_str()); \ + return false; \ + } + + NODE_CHECK_VALID(tree != nullptr); + NODE_CHECK_VALID(tree->IsBtree()); + NODE_CHECK_VALID(tree->height() <= kMaxHeight); + NODE_CHECK_VALID(tree->begin() < tree->capacity()); + NODE_CHECK_VALID(tree->end() <= tree->capacity()); + NODE_CHECK_VALID(tree->begin() <= tree->end()); + size_t child_length = 0; + for (CordRep* edge : tree->Edges()) { + NODE_CHECK_VALID(edge != nullptr); + if (tree->height() > 0) { + NODE_CHECK_VALID(edge->IsBtree()); + NODE_CHECK_VALID(edge->btree()->height() == tree->height() - 1); + } else { + NODE_CHECK_VALID(IsDataEdge(edge)); + } + child_length += edge->length; + } + NODE_CHECK_EQ(child_length, tree->length); + if ((!shallow || exhaustive_validation()) && tree->height() > 0) { + for (CordRep* edge : tree->Edges()) { + if (!IsValid(edge->btree(), shallow)) return false; + } + } + return true; + +#undef NODE_CHECK_VALID +#undef NODE_CHECK_EQ +} + +#ifndef NDEBUG + +CordRepBtree* CordRepBtree::AssertValid(CordRepBtree* tree, bool shallow) { + if (!IsValid(tree, shallow)) { + Dump(tree, "CordRepBtree validation failed:", false, std::cout); + ABSL_RAW_LOG(FATAL, "CordRepBtree::CheckValid() FAILED"); + } + return tree; +} + +const CordRepBtree* CordRepBtree::AssertValid(const CordRepBtree* tree, + bool shallow) { + if (!IsValid(tree, shallow)) { + Dump(tree, "CordRepBtree validation failed:", false, std::cout); + ABSL_RAW_LOG(FATAL, "CordRepBtree::CheckValid() FAILED"); + } + return tree; +} + +#endif // NDEBUG + +template <EdgeType edge_type> +inline OpResult CordRepBtree::AddEdge(bool owned, CordRep* edge, size_t delta) { + if (size() >= kMaxCapacity) return {New(edge), kPopped}; + OpResult result = ToOpResult(owned); + result.tree->Add<edge_type>(edge); + result.tree->length += delta; + return result; +} + +template <EdgeType edge_type> +OpResult CordRepBtree::SetEdge(bool owned, CordRep* edge, size_t delta) { + OpResult result; + const size_t idx = index(edge_type); + if (owned) { + result = {this, kSelf}; + CordRep::Unref(edges_[idx]); + } else { + // Create a copy containing all unchanged edges. Unchanged edges are the + // open interval [begin, back) or [begin + 1, end) depending on `edge_type`. + // We conveniently cover both case using a constexpr `shift` being 0 or 1 + // as `end :== back + 1`. + result = {CopyRaw(), kCopied}; + constexpr int shift = edge_type == kFront ? 1 : 0; + for (CordRep* r : Edges(begin() + shift, back() + shift)) { + CordRep::Ref(r); + } + } + result.tree->edges_[idx] = edge; + result.tree->length += delta; + return result; +} + +template <EdgeType edge_type> +CordRepBtree* CordRepBtree::AddCordRep(CordRepBtree* tree, CordRep* rep) { + const int depth = tree->height(); + const size_t length = rep->length; + StackOperations<edge_type> ops; + CordRepBtree* leaf = ops.BuildStack(tree, depth); + const OpResult result = + leaf->AddEdge<edge_type>(ops.owned(depth), rep, length); + return ops.Unwind(tree, depth, length, result); +} + +template <> +CordRepBtree* CordRepBtree::NewLeaf<kBack>(absl::string_view data, + size_t extra) { + CordRepBtree* leaf = CordRepBtree::New(0); + size_t length = 0; + size_t end = 0; + const size_t cap = leaf->capacity(); + while (!data.empty() && end != cap) { + auto* flat = CordRepFlat::New(data.length() + extra); + flat->length = (std::min)(data.length(), flat->Capacity()); + length += flat->length; + leaf->edges_[end++] = flat; + data = Consume<kBack>(flat->Data(), data, flat->length); + } + leaf->length = length; + leaf->set_end(end); + return leaf; +} + +template <> +CordRepBtree* CordRepBtree::NewLeaf<kFront>(absl::string_view data, + size_t extra) { + CordRepBtree* leaf = CordRepBtree::New(0); + size_t length = 0; + size_t begin = leaf->capacity(); + leaf->set_end(leaf->capacity()); + while (!data.empty() && begin != 0) { + auto* flat = CordRepFlat::New(data.length() + extra); + flat->length = (std::min)(data.length(), flat->Capacity()); + length += flat->length; + leaf->edges_[--begin] = flat; + data = Consume<kFront>(flat->Data(), data, flat->length); + } + leaf->length = length; + leaf->set_begin(begin); + return leaf; +} + +template <> +absl::string_view CordRepBtree::AddData<kBack>(absl::string_view data, + size_t extra) { + assert(!data.empty()); + assert(size() < capacity()); + AlignBegin(); + const size_t cap = capacity(); + do { + CordRepFlat* flat = CordRepFlat::New(data.length() + extra); + const size_t n = (std::min)(data.length(), flat->Capacity()); + flat->length = n; + edges_[fetch_add_end(1)] = flat; + data = Consume<kBack>(flat->Data(), data, n); + } while (!data.empty() && end() != cap); + return data; +} + +template <> +absl::string_view CordRepBtree::AddData<kFront>(absl::string_view data, + size_t extra) { + assert(!data.empty()); + assert(size() < capacity()); + AlignEnd(); + do { + CordRepFlat* flat = CordRepFlat::New(data.length() + extra); + const size_t n = (std::min)(data.length(), flat->Capacity()); + flat->length = n; + edges_[sub_fetch_begin(1)] = flat; + data = Consume<kFront>(flat->Data(), data, n); + } while (!data.empty() && begin() != 0); + return data; +} + +template <EdgeType edge_type> +CordRepBtree* CordRepBtree::AddData(CordRepBtree* tree, absl::string_view data, + size_t extra) { + if (ABSL_PREDICT_FALSE(data.empty())) return tree; + + const size_t original_data_size = data.size(); + int depth = tree->height(); + StackOperations<edge_type> ops; + CordRepBtree* leaf = ops.BuildStack(tree, depth); + + // If there is capacity in the last edge, append as much data + // as possible into this last edge. + if (leaf->size() < leaf->capacity()) { + OpResult result = leaf->ToOpResult(ops.owned(depth)); + data = result.tree->AddData<edge_type>(data, extra); + if (data.empty()) { + result.tree->length += original_data_size; + return ops.Unwind(tree, depth, original_data_size, result); + } + + // We added some data into this leaf, but not all. Propagate the added + // length to the top most node, and rebuild the stack with any newly copied + // or updated nodes. From this point on, the path (leg) from the top most + // node to the right-most node towards the leaf node is privately owned. + size_t delta = original_data_size - data.size(); + assert(delta > 0); + result.tree->length += delta; + tree = ops.Propagate(tree, depth, delta, result); + ops.share_depth = depth + 1; + } + + // We were unable to append all data into the existing right-most leaf node. + // This means all remaining data must be put into (a) new leaf node(s) which + // we append to the tree. To make this efficient, we iteratively build full + // leaf nodes from `data` until the created leaf contains all remaining data. + // We utilize the `Unwind` method to merge the created leaf into the first + // level towards root that has capacity. On each iteration with remaining + // data, we rebuild the stack in the knowledge that right-most nodes are + // privately owned after the first `Unwind` completes. + for (;;) { + OpResult result = {CordRepBtree::NewLeaf<edge_type>(data, extra), kPopped}; + if (result.tree->length == data.size()) { + return ops.Unwind(tree, depth, result.tree->length, result); + } + data = Consume<edge_type>(data, result.tree->length); + tree = ops.Unwind(tree, depth, result.tree->length, result); + depth = tree->height(); + ops.BuildOwnedStack(tree, depth); + } +} + +template <EdgeType edge_type> +CordRepBtree* CordRepBtree::Merge(CordRepBtree* dst, CordRepBtree* src) { + assert(dst->height() >= src->height()); + + // Capture source length as we may consume / destroy `src`. + const size_t length = src->length; + + // We attempt to merge `src` at its corresponding height in `dst`. + const int depth = dst->height() - src->height(); + StackOperations<edge_type> ops; + CordRepBtree* merge_node = ops.BuildStack(dst, depth); + + // If there is enough space in `merge_node` for all edges from `src`, add all + // edges to this node, making a fresh copy as needed if not privately owned. + // If `merge_node` does not have capacity for `src`, we rely on `Unwind` and + // `Finalize` to merge `src` into the first level towards `root` where there + // is capacity for another edge, or create a new top level node. + OpResult result; + if (merge_node->size() + src->size() <= kMaxCapacity) { + result = merge_node->ToOpResult(ops.owned(depth)); + result.tree->Add<edge_type>(src->Edges()); + result.tree->length += src->length; + if (src->refcount.IsOne()) { + Delete(src); + } else { + for (CordRep* edge : src->Edges()) CordRep::Ref(edge); + CordRepBtree::Unref(src); + } + } else { + result = {src, kPopped}; + } + + // Unless we merged at the top level (i.e.: src and dst are equal height), + // unwind the result towards the top level, and finalize the result. + if (depth) { + return ops.Unwind(dst, depth, length, result); + } + return ops.Finalize(dst, result); +} + +CopyResult CordRepBtree::CopySuffix(size_t offset) { + assert(offset < this->length); + + // As long as `offset` starts inside the last edge, we can 'drop' the current + // depth. For the most extreme example: if offset references the last data + // edge in the tree, there is only a single edge / path from the top of the + // tree to that last edge, so we can drop all the nodes except that edge. + // The fast path check for this is `back->length >= length - offset`. + int height = this->height(); + CordRepBtree* node = this; + size_t len = node->length - offset; + CordRep* back = node->Edge(kBack); + while (back->length >= len) { + offset = back->length - len; + if (--height < 0) { + return {MakeSubstring(CordRep::Ref(back), offset), height}; + } + node = back->btree(); + back = node->Edge(kBack); + } + if (offset == 0) return {CordRep::Ref(node), height}; + + // Offset does not point into the last edge, so we span at least two edges. + // Find the index of offset with `IndexBeyond` which provides us the edge + // 'beyond' the offset if offset is not a clean starting point of an edge. + Position pos = node->IndexBeyond(offset); + CordRepBtree* sub = node->CopyToEndFrom(pos.index, len); + const CopyResult result = {sub, height}; + + // `pos.n` contains a non zero value if the offset is not an exact starting + // point of an edge. In this case, `pos.n` contains the 'trailing' amount of + // bytes of the edge preceding that in `pos.index`. We need to iteratively + // adjust the preceding edge with the 'broken' offset until we have a perfect + // start of the edge. + while (pos.n != 0) { + assert(pos.index >= 1); + const size_t begin = pos.index - 1; + sub->set_begin(begin); + CordRep* const edge = node->Edge(begin); + + len = pos.n; + offset = edge->length - len; + + if (--height < 0) { + sub->edges_[begin] = MakeSubstring(CordRep::Ref(edge), offset, len); + return result; + } + + node = edge->btree(); + pos = node->IndexBeyond(offset); + + CordRepBtree* nsub = node->CopyToEndFrom(pos.index, len); + sub->edges_[begin] = nsub; + sub = nsub; + } + sub->set_begin(pos.index); + return result; +} + +CopyResult CordRepBtree::CopyPrefix(size_t n, bool allow_folding) { + assert(n > 0); + assert(n <= this->length); + + // As long as `n` does not exceed the length of the first edge, we can 'drop' + // the current depth. For the most extreme example: if we'd copy a 1 byte + // prefix from a tree, there is only a single edge / path from the top of the + // tree to the single data edge containing this byte, so we can drop all the + // nodes except the data node. + int height = this->height(); + CordRepBtree* node = this; + CordRep* front = node->Edge(kFront); + if (allow_folding) { + while (front->length >= n) { + if (--height < 0) return {MakeSubstring(CordRep::Ref(front), 0, n), -1}; + node = front->btree(); + front = node->Edge(kFront); + } + } + if (node->length == n) return {CordRep::Ref(node), height}; + + // `n` spans at least two nodes, find the end point of the span. + Position pos = node->IndexOf(n); + + // Create a partial copy of the node up to `pos.index`, with a defined length + // of `n`. Any 'partial last edge' is added further below as needed. + CordRepBtree* sub = node->CopyBeginTo(pos.index, n); + const CopyResult result = {sub, height}; + + // `pos.n` contains the 'offset inside the edge for IndexOf(n)'. As long as + // this is not zero, we don't have a 'clean cut', so we need to make a + // (partial) copy of that last edge, and repeat this until pos.n is zero. + while (pos.n != 0) { + size_t end = pos.index; + n = pos.n; + + CordRep* edge = node->Edge(pos.index); + if (--height < 0) { + sub->edges_[end++] = MakeSubstring(CordRep::Ref(edge), 0, n); + sub->set_end(end); + AssertValid(result.edge->btree()); + return result; + } + + node = edge->btree(); + pos = node->IndexOf(n); + CordRepBtree* nsub = node->CopyBeginTo(pos.index, n); + sub->edges_[end++] = nsub; + sub->set_end(end); + sub = nsub; + } + sub->set_end(pos.index); + AssertValid(result.edge->btree()); + return result; +} + +CordRep* CordRepBtree::ExtractFront(CordRepBtree* tree) { + CordRep* front = tree->Edge(tree->begin()); + if (tree->refcount.IsMutable()) { + Unref(tree->Edges(tree->begin() + 1, tree->end())); + CordRepBtree::Delete(tree); + } else { + CordRep::Ref(front); + CordRep::Unref(tree); + } + return front; +} + +CordRepBtree* CordRepBtree::ConsumeBeginTo(CordRepBtree* tree, size_t end, + size_t new_length) { + assert(end <= tree->end()); + if (tree->refcount.IsMutable()) { + Unref(tree->Edges(end, tree->end())); + tree->set_end(end); + tree->length = new_length; + } else { + CordRepBtree* old = tree; + tree = tree->CopyBeginTo(end, new_length); + CordRep::Unref(old); + } + return tree; +} + +CordRep* CordRepBtree::RemoveSuffix(CordRepBtree* tree, size_t n) { + // Check input and deal with trivial cases 'Remove all/none' + assert(tree != nullptr); + assert(n <= tree->length); + const size_t len = tree->length; + if (ABSL_PREDICT_FALSE(n == 0)) { + return tree; + } + if (ABSL_PREDICT_FALSE(n >= len)) { + CordRepBtree::Unref(tree); + return nullptr; + } + + size_t length = len - n; + int height = tree->height(); + bool is_mutable = tree->refcount.IsMutable(); + + // Extract all top nodes which are reduced to size = 1 + Position pos = tree->IndexOfLength(length); + while (pos.index == tree->begin()) { + CordRep* edge = ExtractFront(tree); + is_mutable &= edge->refcount.IsMutable(); + if (height-- == 0) return ResizeEdge(edge, length, is_mutable); + tree = edge->btree(); + pos = tree->IndexOfLength(length); + } + + // Repeat the following sequence traversing down the tree: + // - Crop the top node to the 'last remaining edge' adjusting length. + // - Set the length for down edges to the partial length in that last edge. + // - Repeat this until the last edge is 'included in full' + // - If we hit the data edge level, resize and return the last data edge + CordRepBtree* top = tree = ConsumeBeginTo(tree, pos.index + 1, length); + CordRep* edge = tree->Edge(pos.index); + length = pos.n; + while (length != edge->length) { + // ConsumeBeginTo guarantees `tree` is a clean, privately owned copy. + assert(tree->refcount.IsMutable()); + const bool edge_is_mutable = edge->refcount.IsMutable(); + + if (height-- == 0) { + tree->edges_[pos.index] = ResizeEdge(edge, length, edge_is_mutable); + return AssertValid(top); + } + + if (!edge_is_mutable) { + // We can't 'in place' remove any suffixes down this edge. + // Replace this edge with a prefix copy instead. + tree->edges_[pos.index] = edge->btree()->CopyPrefix(length, false).edge; + CordRep::Unref(edge); + return AssertValid(top); + } + + // Move down one level, rinse repeat. + tree = edge->btree(); + pos = tree->IndexOfLength(length); + tree = ConsumeBeginTo(edge->btree(), pos.index + 1, length); + edge = tree->Edge(pos.index); + length = pos.n; + } + + return AssertValid(top); +} + +CordRep* CordRepBtree::SubTree(size_t offset, size_t n) { + assert(n <= this->length); + assert(offset <= this->length - n); + if (ABSL_PREDICT_FALSE(n == 0)) return nullptr; + + CordRepBtree* node = this; + int height = node->height(); + Position front = node->IndexOf(offset); + CordRep* left = node->edges_[front.index]; + while (front.n + n <= left->length) { + if (--height < 0) return MakeSubstring(CordRep::Ref(left), front.n, n); + node = left->btree(); + front = node->IndexOf(front.n); + left = node->edges_[front.index]; + } + + const Position back = node->IndexBefore(front, n); + CordRep* const right = node->edges_[back.index]; + assert(back.index > front.index); + + // Get partial suffix and prefix entries. + CopyResult prefix; + CopyResult suffix; + if (height > 0) { + // Copy prefix and suffix of the boundary nodes. + prefix = left->btree()->CopySuffix(front.n); + suffix = right->btree()->CopyPrefix(back.n); + + // If there is an edge between the prefix and suffix edges, then the tree + // must remain at its previous (full) height. If we have no edges between + // prefix and suffix edges, then the tree must be as high as either the + // suffix or prefix edges (which are collapsed to their minimum heights). + if (front.index + 1 == back.index) { + height = (std::max)(prefix.height, suffix.height) + 1; + } + + // Raise prefix and suffixes to the new tree height. + for (int h = prefix.height + 1; h < height; ++h) { + prefix.edge = CordRepBtree::New(prefix.edge); + } + for (int h = suffix.height + 1; h < height; ++h) { + suffix.edge = CordRepBtree::New(suffix.edge); + } + } else { + // Leaf node, simply take substrings for prefix and suffix. + prefix = CopyResult{MakeSubstring(CordRep::Ref(left), front.n), -1}; + suffix = CopyResult{MakeSubstring(CordRep::Ref(right), 0, back.n), -1}; + } + + // Compose resulting tree. + CordRepBtree* sub = CordRepBtree::New(height); + size_t end = 0; + sub->edges_[end++] = prefix.edge; + for (CordRep* r : node->Edges(front.index + 1, back.index)) { + sub->edges_[end++] = CordRep::Ref(r); + } + sub->edges_[end++] = suffix.edge; + sub->set_end(end); + sub->length = n; + return AssertValid(sub); +} + +CordRepBtree* CordRepBtree::MergeTrees(CordRepBtree* left, + CordRepBtree* right) { + return left->height() >= right->height() ? Merge<kBack>(left, right) + : Merge<kFront>(right, left); +} + +bool CordRepBtree::IsFlat(absl::string_view* fragment) const { + if (height() == 0 && size() == 1) { + if (fragment) *fragment = Data(begin()); + return true; + } + return false; +} + +bool CordRepBtree::IsFlat(size_t offset, const size_t n, + absl::string_view* fragment) const { + assert(n <= this->length); + assert(offset <= this->length - n); + if (ABSL_PREDICT_FALSE(n == 0)) return false; + int height = this->height(); + const CordRepBtree* node = this; + for (;;) { + const Position front = node->IndexOf(offset); + const CordRep* edge = node->Edge(front.index); + if (edge->length < front.n + n) return false; + if (--height < 0) { + if (fragment) *fragment = EdgeData(edge).substr(front.n, n); + return true; + } + offset = front.n; + node = node->Edge(front.index)->btree(); + } +} + +char CordRepBtree::GetCharacter(size_t offset) const { + assert(offset < length); + const CordRepBtree* node = this; + int height = node->height(); + for (;;) { + Position front = node->IndexOf(offset); + if (--height < 0) return node->Data(front.index)[front.n]; + offset = front.n; + node = node->Edge(front.index)->btree(); + } +} + +Span<char> CordRepBtree::GetAppendBufferSlow(size_t size) { + // The inlined version in `GetAppendBuffer()` deals with all heights <= 3. + assert(height() >= 4); + assert(refcount.IsMutable()); + + // Build a stack of nodes we may potentially need to update if we find a + // non-shared FLAT with capacity at the leaf level. + const int depth = height(); + CordRepBtree* node = this; + CordRepBtree* stack[kMaxDepth]; + for (int i = 0; i < depth; ++i) { + node = node->Edge(kBack)->btree(); + if (!node->refcount.IsMutable()) return {}; + stack[i] = node; + } + + // Must be a privately owned, mutable flat. + CordRep* const edge = node->Edge(kBack); + if (!edge->refcount.IsMutable() || edge->tag < FLAT) return {}; + + // Must have capacity. + const size_t avail = edge->flat()->Capacity() - edge->length; + if (avail == 0) return {}; + + // Build span on remaining capacity. + size_t delta = (std::min)(size, avail); + Span<char> span = {edge->flat()->Data() + edge->length, delta}; + edge->length += delta; + this->length += delta; + for (int i = 0; i < depth; ++i) { + stack[i]->length += delta; + } + return span; +} + +CordRepBtree* CordRepBtree::CreateSlow(CordRep* rep) { + if (rep->IsBtree()) return rep->btree(); + + CordRepBtree* node = nullptr; + auto consume = [&node](CordRep* r, size_t offset, size_t length) { + r = MakeSubstring(r, offset, length); + if (node == nullptr) { + node = New(r); + } else { + node = CordRepBtree::AddCordRep<kBack>(node, r); + } + }; + Consume(rep, consume); + return node; +} + +CordRepBtree* CordRepBtree::AppendSlow(CordRepBtree* tree, CordRep* rep) { + if (ABSL_PREDICT_TRUE(rep->IsBtree())) { + return MergeTrees(tree, rep->btree()); + } + auto consume = [&tree](CordRep* r, size_t offset, size_t length) { + r = MakeSubstring(r, offset, length); + tree = CordRepBtree::AddCordRep<kBack>(tree, r); + }; + Consume(rep, consume); + return tree; +} + +CordRepBtree* CordRepBtree::PrependSlow(CordRepBtree* tree, CordRep* rep) { + if (ABSL_PREDICT_TRUE(rep->IsBtree())) { + return MergeTrees(rep->btree(), tree); + } + auto consume = [&tree](CordRep* r, size_t offset, size_t length) { + r = MakeSubstring(r, offset, length); + tree = CordRepBtree::AddCordRep<kFront>(tree, r); + }; + ReverseConsume(rep, consume); + return tree; +} + +CordRepBtree* CordRepBtree::Append(CordRepBtree* tree, absl::string_view data, + size_t extra) { + return CordRepBtree::AddData<kBack>(tree, data, extra); +} + +CordRepBtree* CordRepBtree::Prepend(CordRepBtree* tree, absl::string_view data, + size_t extra) { + return CordRepBtree::AddData<kFront>(tree, data, extra); +} + +template CordRepBtree* CordRepBtree::AddCordRep<kFront>(CordRepBtree* tree, + CordRep* rep); +template CordRepBtree* CordRepBtree::AddCordRep<kBack>(CordRepBtree* tree, + CordRep* rep); +template CordRepBtree* CordRepBtree::AddData<kFront>(CordRepBtree* tree, + absl::string_view data, + size_t extra); +template CordRepBtree* CordRepBtree::AddData<kBack>(CordRepBtree* tree, + absl::string_view data, + size_t extra); + +void CordRepBtree::Rebuild(CordRepBtree** stack, CordRepBtree* tree, + bool consume) { + bool owned = consume && tree->refcount.IsOne(); + if (tree->height() == 0) { + for (CordRep* edge : tree->Edges()) { + if (!owned) edge = CordRep::Ref(edge); + size_t height = 0; + size_t length = edge->length; + CordRepBtree* node = stack[0]; + OpResult result = node->AddEdge<kBack>(true, edge, length); + while (result.action == CordRepBtree::kPopped) { + stack[height] = result.tree; + if (stack[++height] == nullptr) { + result.action = CordRepBtree::kSelf; + stack[height] = CordRepBtree::New(node, result.tree); + } else { + node = stack[height]; + result = node->AddEdge<kBack>(true, result.tree, length); + } + } + while (stack[++height] != nullptr) { + stack[height]->length += length; + } + } + } else { + for (CordRep* rep : tree->Edges()) { + Rebuild(stack, rep->btree(), owned); + } + } + if (consume) { + if (owned) { + CordRepBtree::Delete(tree); + } else { + CordRepBtree::Unref(tree); + } + } +} + +CordRepBtree* CordRepBtree::Rebuild(CordRepBtree* tree) { + // Set up initial stack with empty leaf node. + CordRepBtree* node = CordRepBtree::New(); + CordRepBtree* stack[CordRepBtree::kMaxDepth + 1] = {node}; + + // Recursively build the tree, consuming the input tree. + Rebuild(stack, tree, /* consume reference */ true); + + // Return top most node + for (CordRepBtree* parent : stack) { + if (parent == nullptr) return node; + node = parent; + } + + // Unreachable + assert(false); + return nullptr; +} + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil-cpp/absl/strings/internal/cord_rep_btree.h b/third_party/abseil-cpp/absl/strings/internal/cord_rep_btree.h new file mode 100644 index 0000000000..bb38f0c3fe --- /dev/null +++ b/third_party/abseil-cpp/absl/strings/internal/cord_rep_btree.h @@ -0,0 +1,939 @@ +// Copyright 2021 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_H_ +#define ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_H_ + +#include <cassert> +#include <cstdint> +#include <iosfwd> + +#include "absl/base/config.h" +#include "absl/base/internal/raw_logging.h" +#include "absl/base/optimization.h" +#include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/cord_rep_flat.h" +#include "absl/strings/string_view.h" +#include "absl/types/span.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +class CordRepBtreeNavigator; + +// CordRepBtree is as the name implies a btree implementation of a Cordrep tree. +// Data is stored at the leaf level only, non leaf nodes contain down pointers +// only. Allowed types of data edges are FLAT, EXTERNAL and SUBSTRINGs of FLAT +// or EXTERNAL nodes. The implementation allows for data to be added to either +// end of the tree only, it does not provide any 'insert' logic. This has the +// benefit that we can expect good fill ratios: all nodes except the outer +// 'legs' will have 100% fill ratios for trees built using Append/Prepend +// methods. Merged trees will typically have a fill ratio well above 50% as in a +// similar fashion, one side of the merged tree will typically have a 100% fill +// ratio, and the 'open' end will average 50%. All operations are O(log(n)) or +// better, and the tree never needs balancing. +// +// All methods accepting a CordRep* or CordRepBtree* adopt a reference on that +// input unless explicitly stated otherwise. All functions returning a CordRep* +// or CordRepBtree* instance transfer a reference back to the caller. +// Simplified, callers both 'donate' and 'consume' a reference count on each +// call, simplifying the API. An example of building a tree: +// +// CordRepBtree* tree = CordRepBtree::Create(MakeFlat("Hello")); +// tree = CordRepBtree::Append(tree, MakeFlat("world")); +// +// In the above example, all inputs are consumed, making each call affecting +// `tree` reference count neutral. The returned `tree` value can be different +// from the input if the input is shared with other threads, or if the tree +// grows in height, but callers typically never have to concern themselves with +// that and trust that all methods DTRT at all times. +class CordRepBtree : public CordRep { + public: + // EdgeType identifies `front` and `back` enum values. + // Various implementations in CordRepBtree such as `Add` and `Edge` are + // generic and templated on operating on either of the boundary edges. + // For more information on the possible edges contained in a CordRepBtree + // instance see the documentation for `edges_`. + enum class EdgeType { kFront, kBack }; + + // Convenience constants into `EdgeType` + static constexpr EdgeType kFront = EdgeType::kFront; + static constexpr EdgeType kBack = EdgeType::kBack; + + // Maximum number of edges: based on experiments and performance data, we can + // pick suitable values resulting in optimum cacheline aligned values. The + // preferred values are based on 64-bit systems where we aim to align this + // class onto 64 bytes, i.e.: 6 = 64 bytes, 14 = 128 bytes, etc. + // TODO(b/192061034): experiment with alternative sizes. + static constexpr size_t kMaxCapacity = 6; + + // Reasonable maximum height of the btree. We can expect a fill ratio of at + // least 50%: trees are always expanded at the front or back. Concatenating + // trees will then typically fold at the top most node, where the lower nodes + // are at least at capacity on one side of joined inputs. At a lower fill + // rate of 4 edges per node, we have capacity for ~16 million leaf nodes. + // We will fail / abort if an application ever exceeds this height, which + // should be extremely rare (near impossible) and be an indication of an + // application error: we do not assume it reasonable for any application to + // operate correctly with such monster trees. + // Another compelling reason for the number `12` is that any contextual stack + // required for navigation or insertion requires 12 words and 12 bytes, which + // fits inside 2 cache lines with some room to spare, and is reasonable as a + // local stack variable compared to Cord's current near 400 bytes stack use. + // The maximum `height` value of a node is then `kMaxDepth - 1` as node height + // values start with a value of 0 for leaf nodes. + static constexpr int kMaxDepth = 12; + static constexpr int kMaxHeight = kMaxDepth - 1; + + // `Action` defines the action for unwinding changes done at the btree's leaf + // level that need to be propagated up to the parent node(s). Each operation + // on a node has an effect / action defined as follows: + // - kSelf + // The operation (add / update, etc) was performed directly on the node as + // the node is private to the current thread (i.e.: not shared directly or + // indirectly through a refcount > 1). Changes can be propagated directly to + // all parent nodes as all parent nodes are also then private to the current + // thread. + // - kCopied + // The operation (add / update, etc) was performed on a copy of the original + // node, as the node is (potentially) directly or indirectly shared with + // other threads. Changes need to be propagated into the parent nodes where + // the old down pointer must be unreffed and replaced with this new copy. + // Such changes to parent nodes may themselves require a copy if the parent + // node is also shared. A kCopied action can propagate all the way to the + // top node where we then must unref the `tree` input provided by the + // caller, and return the new copy. + // - kPopped + // The operation (typically add) could not be satisfied due to insufficient + // capacity in the targeted node, and a new 'leg' was created that needs to + // be added into the parent node. For example, adding a FLAT inside a leaf + // node that is at capacity will create a new leaf node containing that + // FLAT, that needs to be 'popped' up the btree. Such 'pop' actions can + // cascade up the tree if parent nodes are also at capacity. A 'Popped' + // action propagating all the way to the top of the tree will result in + // the tree becoming one level higher than the current tree through a final + // `CordRepBtree::New(tree, popped)` call, resulting in a new top node + // referencing the old tree and the new (fully popped upwards) 'leg'. + enum Action { kSelf, kCopied, kPopped }; + + // Result of an operation on a node. See the `Action` enum for details. + struct OpResult { + CordRepBtree* tree; + Action action; + }; + + // Return value of the CopyPrefix and CopySuffix methods which can + // return a node or data edge at any height inside the tree. + // A height of 0 defines the lowest (leaf) node, a height of -1 identifies + // `edge` as being a plain data node: EXTERNAL / FLAT or SUBSTRING thereof. + struct CopyResult { + CordRep* edge; + int height; + }; + + // Logical position inside a node: + // - index: index of the edge. + // - n: size or offset value depending on context. + struct Position { + size_t index; + size_t n; + }; + + // Creates a btree from the given input. Adopts a ref of `rep`. + // If the input `rep` is itself a btree, i.e., `IsBtree()`, then this + // function immediately returns `rep->btree()`. If the input is a valid data + // edge (see IsDataEdge()), then a new leaf node is returned containing `rep` + // as the sole data edge. Else, the input is assumed to be a (legacy) concat + // tree, and the input is consumed and transformed into a btree(). + static CordRepBtree* Create(CordRep* rep); + + // Destroys the provided tree. Should only be called by cord internal API's, + // typically after a ref_count.Decrement() on the last reference count. + static void Destroy(CordRepBtree* tree); + + // Use CordRep::Unref() as we overload for absl::Span<CordRep* const>. + using CordRep::Unref; + + // Unrefs all edges in `edges` which are assumed to be 'likely one'. + static void Unref(absl::Span<CordRep* const> edges); + + // Appends / Prepends an existing CordRep instance to this tree. + // The below methods accept three types of input: + // 1) `rep` is a data node (See `IsDataNode` for valid data edges). + // `rep` is appended or prepended to this tree 'as is'. + // 2) `rep` is a BTREE. + // `rep` is merged into `tree` respecting the Append/Prepend order. + // 3) `rep` is some other (legacy) type. + // `rep` is converted in place and added to `tree` + // Requires `tree` and `rep` to be not null. + static CordRepBtree* Append(CordRepBtree* tree, CordRep* rep); + static CordRepBtree* Prepend(CordRepBtree* tree, CordRep* rep); + + // Append/Prepend the data in `data` to this tree. + // The `extra` parameter defines how much extra capacity should be allocated + // for any additional FLAT being allocated. This is an optimization hint from + // the caller. For example, a caller may need to add 2 string_views of data + // "abc" and "defghi" which are not consecutive. The caller can in this case + // invoke `AddData(tree, "abc", 6)`, and any newly added flat is allocated + // where possible with at least 6 bytes of extra capacity beyond `length`. + // This helps avoiding data getting fragmented over multiple flats. + // There is no limit on the size of `data`. If `data` can not be stored inside + // a single flat, then the function will iteratively add flats until all data + // has been consumed and appended or prepended to the tree. + static CordRepBtree* Append(CordRepBtree* tree, string_view data, + size_t extra = 0); + static CordRepBtree* Prepend(CordRepBtree* tree, string_view data, + size_t extra = 0); + + // Returns a new tree, containing `n` bytes of data from this instance + // starting at offset `offset`. Where possible, the returned tree shares + // (re-uses) data edges and nodes with this instance to minimize the + // combined memory footprint of both trees. + // Requires `offset + n <= length`. Returns `nullptr` if `n` is zero. + CordRep* SubTree(size_t offset, size_t n); + + // Removes `n` trailing bytes from `tree`, and returns the resulting tree + // or data edge. Returns `tree` if n is zero, and nullptr if n == length. + // This function is logically identical to: + // result = tree->SubTree(0, tree->length - n); + // Unref(tree); + // return result; + // However, the actual implementation will as much as possible perform 'in + // place' modifications on the tree on all nodes and edges that are mutable. + // For example, in a fully privately owned tree with the last edge being a + // flat of length 12, RemoveSuffix(1) will simply set the length of that data + // edge to 11, and reduce the length of all nodes on the edge path by 1. + static CordRep* RemoveSuffix(CordRepBtree* tree, size_t n); + + // Returns the character at the given offset. + char GetCharacter(size_t offset) const; + + // Returns true if this node holds a single data edge, and if so, sets + // `fragment` to reference the contained data. `fragment` is an optional + // output parameter and allowed to be null. + bool IsFlat(absl::string_view* fragment) const; + + // Returns true if the data of `n` bytes starting at offset `offset` + // is contained in a single data edge, and if so, sets fragment to reference + // the contained data. `fragment` is an optional output parameter and allowed + // to be null. + bool IsFlat(size_t offset, size_t n, absl::string_view* fragment) const; + + // Returns a span (mutable range of bytes) of up to `size` bytes into the + // last FLAT data edge inside this tree under the following conditions: + // - none of the nodes down into the FLAT node are shared. + // - the last data edge in this tree is a non-shared FLAT. + // - the referenced FLAT has additional capacity available. + // If all these conditions are met, a non-empty span is returned, and the + // length of the flat node and involved tree nodes have been increased by + // `span.length()`. The caller is responsible for immediately assigning values + // to all uninitialized data reference by the returned span. + // Requires `this->refcount.IsMutable()`: this function forces the + // caller to do this fast path check on the top level node, as this is the + // most commonly shared node of a cord tree. + Span<char> GetAppendBuffer(size_t size); + + // Returns the `height` of the tree. The height of a tree is limited to + // kMaxHeight. `height` is implemented as an `int` as in some places we + // use negative (-1) values for 'data edges'. + int height() const { return static_cast<int>(storage[0]); } + + // Properties: begin, back, end, front/back boundary indexes. + size_t begin() const { return static_cast<size_t>(storage[1]); } + size_t back() const { return static_cast<size_t>(storage[2]) - 1; } + size_t end() const { return static_cast<size_t>(storage[2]); } + size_t index(EdgeType edge) const { + return edge == kFront ? begin() : back(); + } + + // Properties: size and capacity. + // `capacity` contains the current capacity of this instance, where + // `kMaxCapacity` contains the maximum capacity of a btree node. + // For now, `capacity` and `kMaxCapacity` return the same value, but this may + // change in the future if we see benefit in dynamically sizing 'small' nodes + // to 'large' nodes for large data trees. + size_t size() const { return end() - begin(); } + size_t capacity() const { return kMaxCapacity; } + + // Edge access + inline CordRep* Edge(size_t index) const; + inline CordRep* Edge(EdgeType edge_type) const; + inline absl::Span<CordRep* const> Edges() const; + inline absl::Span<CordRep* const> Edges(size_t begin, size_t end) const; + + // Returns reference to the data edge at `index`. + // Requires this instance to be a leaf node, and `index` to be valid index. + inline absl::string_view Data(size_t index) const; + + static const char* EdgeDataPtr(const CordRep* r); + static absl::string_view EdgeData(const CordRep* r); + + // Returns true if the provided rep is a FLAT, EXTERNAL or a SUBSTRING node + // holding a FLAT or EXTERNAL child rep. + static bool IsDataEdge(const CordRep* rep); + + // Diagnostics: returns true if `tree` is valid and internally consistent. + // If `shallow` is false, then the provided top level node and all child nodes + // below it are recursively checked. If `shallow` is true, only the provided + // node in `tree` and the cumulative length, type and height of the direct + // child nodes of `tree` are checked. The value of `shallow` is ignored if the + // internal `cord_btree_exhaustive_validation` diagnostics variable is true, + // in which case the performed validations works as if `shallow` were false. + // This function is intended for debugging and testing purposes only. + static bool IsValid(const CordRepBtree* tree, bool shallow = false); + + // Diagnostics: asserts that the provided tree is valid. + // `AssertValid()` performs a shallow validation by default. `shallow` can be + // set to false in which case an exhaustive validation is performed. This + // function is implemented in terms of calling `IsValid()` and asserting the + // return value to be true. See `IsValid()` for more information. + // This function is intended for debugging and testing purposes only. + static CordRepBtree* AssertValid(CordRepBtree* tree, bool shallow = true); + static const CordRepBtree* AssertValid(const CordRepBtree* tree, + bool shallow = true); + + // Diagnostics: dump the contents of this tree to `stream`. + // This function is intended for debugging and testing purposes only. + static void Dump(const CordRep* rep, std::ostream& stream); + static void Dump(const CordRep* rep, absl::string_view label, + std::ostream& stream); + static void Dump(const CordRep* rep, absl::string_view label, + bool include_contents, std::ostream& stream); + + // Adds the edge `edge` to this node if possible. `owned` indicates if the + // current node is potentially shared or not with other threads. Returns: + // - {kSelf, <this>} + // The edge was directly added to this node. + // - {kCopied, <node>} + // The edge was added to a copy of this node. + // - {kPopped, New(edge, height())} + // A new leg with the edge was created as this node has no extra capacity. + template <EdgeType edge_type> + inline OpResult AddEdge(bool owned, CordRep* edge, size_t delta); + + // Replaces the front or back edge with the provided new edge. Returns: + // - {kSelf, <this>} + // The edge was directly set in this node. The old edge is unreffed. + // - {kCopied, <node>} + // A copy of this node was created with the new edge value. + // In both cases, the function adopts a reference on `edge`. + template <EdgeType edge_type> + OpResult SetEdge(bool owned, CordRep* edge, size_t delta); + + // Creates a new empty node at the specified height. + static CordRepBtree* New(int height = 0); + + // Creates a new node containing `rep`, with the height being computed + // automatically based on the type of `rep`. + static CordRepBtree* New(CordRep* rep); + + // Creates a new node containing both `front` and `back` at height + // `front.height() + 1`. Requires `back.height() == front.height()`. + static CordRepBtree* New(CordRepBtree* front, CordRepBtree* back); + + // Creates a fully balanced tree from the provided tree by rebuilding a new + // tree from all data edges in the input. This function is automatically + // invoked internally when the tree exceeds the maximum height. + static CordRepBtree* Rebuild(CordRepBtree* tree); + + private: + CordRepBtree() = default; + ~CordRepBtree() = default; + + // Initializes the main properties `tag`, `begin`, `end`, `height`. + inline void InitInstance(int height, size_t begin = 0, size_t end = 0); + + // Direct property access begin / end + void set_begin(size_t begin) { storage[1] = static_cast<uint8_t>(begin); } + void set_end(size_t end) { storage[2] = static_cast<uint8_t>(end); } + + // Decreases the value of `begin` by `n`, and returns the new value. Notice + // how this returns the new value unlike atomic::fetch_add which returns the + // old value. This is because this is used to prepend edges at 'begin - 1'. + size_t sub_fetch_begin(size_t n) { + storage[1] -= static_cast<uint8_t>(n); + return storage[1]; + } + + // Increases the value of `end` by `n`, and returns the previous value. This + // function is typically used to append edges at 'end'. + size_t fetch_add_end(size_t n) { + const uint8_t current = storage[2]; + storage[2] = static_cast<uint8_t>(current + n); + return current; + } + + // Returns the index of the last edge starting on, or before `offset`, with + // `n` containing the relative offset of `offset` inside that edge. + // Requires `offset` < length. + Position IndexOf(size_t offset) const; + + // Returns the index of the last edge starting before `offset`, with `n` + // containing the relative offset of `offset` inside that edge. + // This function is useful to find the edges for some span of bytes ending at + // `offset` (i.e., `n` bytes). For example: + // + // Position pos = IndexBefore(n) + // edges = Edges(begin(), pos.index) // All full edges (may be empty) + // last = Sub(Edge(pos.index), 0, pos.n) // Last partial edge (may be empty) + // + // Requires 0 < `offset` <= length. + Position IndexBefore(size_t offset) const; + + // Returns the index of the edge ending at (or on) length `length`, and the + // number of bytes inside that edge up to `length`. For example, if we have a + // Node with 2 edges, one of 10 and one of 20 long, then IndexOfLength(27) + // will return {1, 17}, and IndexOfLength(10) will return {0, 10}. + Position IndexOfLength(size_t n) const; + + // Identical to the above function except starting from the position `front`. + // This function is equivalent to `IndexBefore(front.n + offset)`, with + // the difference that this function is optimized to start at `front.index`. + Position IndexBefore(Position front, size_t offset) const; + + // Returns the index of the edge directly beyond the edge containing offset + // `offset`, with `n` containing the distance of that edge from `offset`. + // This function is useful for iteratively finding suffix nodes and remaining + // partial bytes in left-most suffix nodes as for example in CopySuffix. + // Requires `offset` < length. + Position IndexBeyond(size_t offset) const; + + // Destruction + static void DestroyLeaf(CordRepBtree* tree, size_t begin, size_t end); + static void DestroyNonLeaf(CordRepBtree* tree, size_t begin, size_t end); + static void DestroyTree(CordRepBtree* tree, size_t begin, size_t end); + static void Delete(CordRepBtree* tree) { delete tree; } + + // Creates a new leaf node containing as much data as possible from `data`. + // The data is added either forwards or reversed depending on `edge_type`. + // Callers must check the length of the returned node to determine if all data + // was copied or not. + // See the `Append/Prepend` function for the meaning and purpose of `extra`. + template <EdgeType edge_type> + static CordRepBtree* NewLeaf(absl::string_view data, size_t extra); + + // Creates a raw copy of this Btree node, copying all properties, but + // without adding any references to existing edges. + CordRepBtree* CopyRaw() const; + + // Creates a full copy of this Btree node, adding a reference on all edges. + CordRepBtree* Copy() const; + + // Creates a partial copy of this Btree node, copying all edges up to `end`, + // adding a reference on each copied edge, and sets the length of the newly + // created copy to `new_length`. + CordRepBtree* CopyBeginTo(size_t end, size_t new_length) const; + + // Returns a tree containing the edges [tree->begin(), end) and length + // of `new_length`. This method consumes a reference on the provided + // tree, and logically performs the following operation: + // result = tree->CopyBeginTo(end, new_length); + // CordRep::Unref(tree); + // return result; + static CordRepBtree* ConsumeBeginTo(CordRepBtree* tree, size_t end, + size_t new_length); + + // Creates a partial copy of this Btree node, copying all edges starting at + // `begin`, adding a reference on each copied edge, and sets the length of + // the newly created copy to `new_length`. + CordRepBtree* CopyToEndFrom(size_t begin, size_t new_length) const; + + // Extracts and returns the front edge from the provided tree. + // This method consumes a reference on the provided tree, and logically + // performs the following operation: + // edge = CordRep::Ref(tree->Edge(kFront)); + // CordRep::Unref(tree); + // return edge; + static CordRep* ExtractFront(CordRepBtree* tree); + + // Returns a tree containing the result of appending `right` to `left`. + static CordRepBtree* MergeTrees(CordRepBtree* left, CordRepBtree* right); + + // Fallback functions for `Create()`, `Append()` and `Prepend()` which + // deal with legacy / non conforming input, i.e.: CONCAT trees. + static CordRepBtree* CreateSlow(CordRep* rep); + static CordRepBtree* AppendSlow(CordRepBtree*, CordRep* rep); + static CordRepBtree* PrependSlow(CordRepBtree*, CordRep* rep); + + // Recursively rebuilds `tree` into `stack`. If 'consume` is set to true, the + // function will consume a reference on `tree`. `stack` is a null terminated + // array containing the new tree's state, with the current leaf node at + // stack[0], and parent nodes above that, or null for 'top of tree'. + static void Rebuild(CordRepBtree** stack, CordRepBtree* tree, bool consume); + + // Aligns existing edges to start at index 0, to allow for a new edge to be + // added to the back of the current edges. + inline void AlignBegin(); + + // Aligns existing edges to end at `capacity`, to allow for a new edge to be + // added in front of the current edges. + inline void AlignEnd(); + + // Adds the provided edge to this node. + // Requires this node to have capacity for the edge. Realigns / moves + // existing edges as needed to prepend or append the new edge. + template <EdgeType edge_type> + inline void Add(CordRep* rep); + + // Adds the provided edges to this node. + // Requires this node to have capacity for the edges. Realigns / moves + // existing edges as needed to prepend or append the new edges. + template <EdgeType edge_type> + inline void Add(absl::Span<CordRep* const>); + + // Adds data from `data` to this node until either all data has been consumed, + // or there is no more capacity for additional flat nodes inside this node. + // Requires the current node to be a leaf node, data to be non empty, and the + // current node to have capacity for at least one more data edge. + // Returns any remaining data from `data` that was not added, which is + // depending on the edge type (front / back) either the remaining prefix of + // suffix of the input. + // See the `Append/Prepend` function for the meaning and purpose of `extra`. + template <EdgeType edge_type> + absl::string_view AddData(absl::string_view data, size_t extra); + + // Replace the front or back edge with the provided value. + // Adopts a reference on `edge` and unrefs the old edge. + template <EdgeType edge_type> + inline void SetEdge(CordRep* edge); + + // Returns a partial copy of the current tree containing the first `n` bytes + // of data. `CopyResult` contains both the resulting edge and its height. The + // resulting tree may be less high than the current tree, or even be a single + // matching data edge if `allow_folding` is set to true. + // For example, if `n == 1`, then the result will be the single data edge, and + // height will be set to -1 (one below the owning leaf node). If n == 0, this + // function returns null. Requires `n <= length` + CopyResult CopyPrefix(size_t n, bool allow_folding = true); + + // Returns a partial copy of the current tree containing all data starting + // after `offset`. `CopyResult` contains both the resulting edge and its + // height. The resulting tree may be less high than the current tree, or even + // be a single matching data edge. For example, if `n == length - 1`, then the + // result will be a single data edge, and height will be set to -1 (one below + // the owning leaf node). + // Requires `offset < length` + CopyResult CopySuffix(size_t offset); + + // Returns a OpResult value of {this, kSelf} or {Copy(), kCopied} + // depending on the value of `owned`. + inline OpResult ToOpResult(bool owned); + + // Adds `rep` to the specified tree, returning the modified tree. + template <EdgeType edge_type> + static CordRepBtree* AddCordRep(CordRepBtree* tree, CordRep* rep); + + // Adds `data` to the specified tree, returning the modified tree. + // See the `Append/Prepend` function for the meaning and purpose of `extra`. + template <EdgeType edge_type> + static CordRepBtree* AddData(CordRepBtree* tree, absl::string_view data, + size_t extra = 0); + + // Merges `src` into `dst` with `src` being added either before (kFront) or + // after (kBack) `dst`. Requires the height of `dst` to be greater than or + // equal to the height of `src`. + template <EdgeType edge_type> + static CordRepBtree* Merge(CordRepBtree* dst, CordRepBtree* src); + + // Fallback version of GetAppendBuffer for large trees: GetAppendBuffer() + // implements an inlined version for trees of limited height (3 levels), + // GetAppendBufferSlow implements the logic for large trees. + Span<char> GetAppendBufferSlow(size_t size); + + // `edges_` contains all edges starting from this instance. + // These are explicitly `child` edges only, a cord btree (or any cord tree in + // that respect) does not store `parent` pointers anywhere: multiple trees / + // parents can reference the same shared child edge. The type of these edges + // depends on the height of the node. `Leaf nodes` (height == 0) contain `data + // edges` (external or flat nodes, or sub-strings thereof). All other nodes + // (height > 0) contain pointers to BTREE nodes with a height of `height - 1`. + CordRep* edges_[kMaxCapacity]; + + friend class CordRepBtreeTestPeer; + friend class CordRepBtreeNavigator; +}; + +inline CordRepBtree* CordRep::btree() { + assert(IsBtree()); + return static_cast<CordRepBtree*>(this); +} + +inline const CordRepBtree* CordRep::btree() const { + assert(IsBtree()); + return static_cast<const CordRepBtree*>(this); +} + +inline void CordRepBtree::InitInstance(int height, size_t begin, size_t end) { + tag = BTREE; + storage[0] = static_cast<uint8_t>(height); + storage[1] = static_cast<uint8_t>(begin); + storage[2] = static_cast<uint8_t>(end); +} + +inline CordRep* CordRepBtree::Edge(size_t index) const { + assert(index >= begin()); + assert(index < end()); + return edges_[index]; +} + +inline CordRep* CordRepBtree::Edge(EdgeType edge_type) const { + return edges_[edge_type == kFront ? begin() : back()]; +} + +inline absl::Span<CordRep* const> CordRepBtree::Edges() const { + return {edges_ + begin(), size()}; +} + +inline absl::Span<CordRep* const> CordRepBtree::Edges(size_t begin, + size_t end) const { + assert(begin <= end); + assert(begin >= this->begin()); + assert(end <= this->end()); + return {edges_ + begin, static_cast<size_t>(end - begin)}; +} + +inline const char* CordRepBtree::EdgeDataPtr(const CordRep* r) { + assert(IsDataEdge(r)); + size_t offset = 0; + if (r->tag == SUBSTRING) { + offset = r->substring()->start; + r = r->substring()->child; + } + return (r->tag >= FLAT ? r->flat()->Data() : r->external()->base) + offset; +} + +inline absl::string_view CordRepBtree::EdgeData(const CordRep* r) { + return absl::string_view(EdgeDataPtr(r), r->length); +} + +inline absl::string_view CordRepBtree::Data(size_t index) const { + assert(height() == 0); + return EdgeData(Edge(index)); +} + +inline bool CordRepBtree::IsDataEdge(const CordRep* rep) { + // The fast path is that `rep` is an EXTERNAL or FLAT node, making the below + // if a single, well predicted branch. We then repeat the FLAT or EXTERNAL + // check in the slow path the SUBSTRING check to optimize for the hot path. + if (rep->tag == EXTERNAL || rep->tag >= FLAT) return true; + if (rep->tag == SUBSTRING) rep = rep->substring()->child; + return rep->tag == EXTERNAL || rep->tag >= FLAT; +} + +inline CordRepBtree* CordRepBtree::New(int height) { + CordRepBtree* tree = new CordRepBtree; + tree->length = 0; + tree->InitInstance(height); + return tree; +} + +inline CordRepBtree* CordRepBtree::New(CordRep* rep) { + CordRepBtree* tree = new CordRepBtree; + int height = rep->IsBtree() ? rep->btree()->height() + 1 : 0; + tree->length = rep->length; + tree->InitInstance(height, /*begin=*/0, /*end=*/1); + tree->edges_[0] = rep; + return tree; +} + +inline CordRepBtree* CordRepBtree::New(CordRepBtree* front, + CordRepBtree* back) { + assert(front->height() == back->height()); + CordRepBtree* tree = new CordRepBtree; + tree->length = front->length + back->length; + tree->InitInstance(front->height() + 1, /*begin=*/0, /*end=*/2); + tree->edges_[0] = front; + tree->edges_[1] = back; + return tree; +} + +inline void CordRepBtree::DestroyTree(CordRepBtree* tree, size_t begin, + size_t end) { + if (tree->height() == 0) { + DestroyLeaf(tree, begin, end); + } else { + DestroyNonLeaf(tree, begin, end); + } +} + +inline void CordRepBtree::Destroy(CordRepBtree* tree) { + DestroyTree(tree, tree->begin(), tree->end()); +} + +inline void CordRepBtree::Unref(absl::Span<CordRep* const> edges) { + for (CordRep* edge : edges) { + if (ABSL_PREDICT_FALSE(!edge->refcount.Decrement())) { + CordRep::Destroy(edge); + } + } +} + +inline CordRepBtree* CordRepBtree::CopyRaw() const { + auto* tree = static_cast<CordRepBtree*>(::operator new(sizeof(CordRepBtree))); + memcpy(static_cast<void*>(tree), this, sizeof(CordRepBtree)); + new (&tree->refcount) RefcountAndFlags; + return tree; +} + +inline CordRepBtree* CordRepBtree::Copy() const { + CordRepBtree* tree = CopyRaw(); + for (CordRep* rep : Edges()) CordRep::Ref(rep); + return tree; +} + +inline CordRepBtree* CordRepBtree::CopyToEndFrom(size_t begin, + size_t new_length) const { + assert(begin >= this->begin()); + assert(begin <= this->end()); + CordRepBtree* tree = CopyRaw(); + tree->length = new_length; + tree->set_begin(begin); + for (CordRep* edge : tree->Edges()) CordRep::Ref(edge); + return tree; +} + +inline CordRepBtree* CordRepBtree::CopyBeginTo(size_t end, + size_t new_length) const { + assert(end <= capacity()); + assert(end >= this->begin()); + CordRepBtree* tree = CopyRaw(); + tree->length = new_length; + tree->set_end(end); + for (CordRep* edge : tree->Edges()) CordRep::Ref(edge); + return tree; +} + +inline void CordRepBtree::AlignBegin() { + // The below code itself does not need to be fast as typically we have + // mono-directional append/prepend calls, and `begin` / `end` are typically + // adjusted no more than once. But we want to avoid potential register clobber + // effects, making the compiler emit register save/store/spills, and minimize + // the size of code. + const size_t delta = begin(); + if (ABSL_PREDICT_FALSE(delta != 0)) { + const size_t new_end = end() - delta; + set_begin(0); + set_end(new_end); + // TODO(mvels): we can write this using 2 loads / 2 stores depending on + // total size for the kMaxCapacity = 6 case. I.e., we can branch (switch) on + // size, and then do overlapping load/store of up to 4 pointers (inlined as + // XMM, YMM or ZMM load/store) and up to 2 pointers (XMM / YMM), which is a) + // compact and b) not clobbering any registers. + ABSL_INTERNAL_ASSUME(new_end <= kMaxCapacity); +#ifdef __clang__ +#pragma unroll 1 +#endif + for (size_t i = 0; i < new_end; ++i) { + edges_[i] = edges_[i + delta]; + } + } +} + +inline void CordRepBtree::AlignEnd() { + // See comments in `AlignBegin` for motivation on the hand-rolled for loops. + const size_t delta = capacity() - end(); + if (delta != 0) { + const size_t new_begin = begin() + delta; + const size_t new_end = end() + delta; + set_begin(new_begin); + set_end(new_end); + ABSL_INTERNAL_ASSUME(new_end <= kMaxCapacity); +#ifdef __clang__ +#pragma unroll 1 +#endif + for (size_t i = new_end - 1; i >= new_begin; --i) { + edges_[i] = edges_[i - delta]; + } + } +} + +template <> +inline void CordRepBtree::Add<CordRepBtree::kBack>(CordRep* rep) { + AlignBegin(); + edges_[fetch_add_end(1)] = rep; +} + +template <> +inline void CordRepBtree::Add<CordRepBtree::kBack>( + absl::Span<CordRep* const> edges) { + AlignBegin(); + size_t new_end = end(); + for (CordRep* edge : edges) edges_[new_end++] = edge; + set_end(new_end); +} + +template <> +inline void CordRepBtree::Add<CordRepBtree::kFront>(CordRep* rep) { + AlignEnd(); + edges_[sub_fetch_begin(1)] = rep; +} + +template <> +inline void CordRepBtree::Add<CordRepBtree::kFront>( + absl::Span<CordRep* const> edges) { + AlignEnd(); + size_t new_begin = begin() - edges.size(); + set_begin(new_begin); + for (CordRep* edge : edges) edges_[new_begin++] = edge; +} + +template <CordRepBtree::EdgeType edge_type> +inline void CordRepBtree::SetEdge(CordRep* edge) { + const int idx = edge_type == kFront ? begin() : back(); + CordRep::Unref(edges_[idx]); + edges_[idx] = edge; +} + +inline CordRepBtree::OpResult CordRepBtree::ToOpResult(bool owned) { + return owned ? OpResult{this, kSelf} : OpResult{Copy(), kCopied}; +} + +inline CordRepBtree::Position CordRepBtree::IndexOf(size_t offset) const { + assert(offset < length); + size_t index = begin(); + while (offset >= edges_[index]->length) offset -= edges_[index++]->length; + return {index, offset}; +} + +inline CordRepBtree::Position CordRepBtree::IndexBefore(size_t offset) const { + assert(offset > 0); + assert(offset <= length); + size_t index = begin(); + while (offset > edges_[index]->length) offset -= edges_[index++]->length; + return {index, offset}; +} + +inline CordRepBtree::Position CordRepBtree::IndexBefore(Position front, + size_t offset) const { + size_t index = front.index; + offset = offset + front.n; + while (offset > edges_[index]->length) offset -= edges_[index++]->length; + return {index, offset}; +} + +inline CordRepBtree::Position CordRepBtree::IndexOfLength(size_t n) const { + assert(n <= length); + size_t index = back(); + size_t strip = length - n; + while (strip >= edges_[index]->length) strip -= edges_[index--]->length; + return {index, edges_[index]->length - strip}; +} + +inline CordRepBtree::Position CordRepBtree::IndexBeyond( + const size_t offset) const { + // We need to find the edge which `starting offset` is beyond (>=)`offset`. + // For this we can't use the `offset -= length` logic of IndexOf. Instead, we + // track the offset of the `current edge` in `off`, which we increase as we + // iterate over the edges until we find the matching edge. + size_t off = 0; + size_t index = begin(); + while (offset > off) off += edges_[index++]->length; + return {index, off - offset}; +} + +inline CordRepBtree* CordRepBtree::Create(CordRep* rep) { + if (IsDataEdge(rep)) return New(rep); + return CreateSlow(rep); +} + +inline Span<char> CordRepBtree::GetAppendBuffer(size_t size) { + assert(refcount.IsMutable()); + CordRepBtree* tree = this; + const int height = this->height(); + CordRepBtree* n1 = tree; + CordRepBtree* n2 = tree; + CordRepBtree* n3 = tree; + switch (height) { + case 3: + tree = tree->Edge(kBack)->btree(); + if (!tree->refcount.IsMutable()) return {}; + n2 = tree; + ABSL_FALLTHROUGH_INTENDED; + case 2: + tree = tree->Edge(kBack)->btree(); + if (!tree->refcount.IsMutable()) return {}; + n1 = tree; + ABSL_FALLTHROUGH_INTENDED; + case 1: + tree = tree->Edge(kBack)->btree(); + if (!tree->refcount.IsMutable()) return {}; + ABSL_FALLTHROUGH_INTENDED; + case 0: + CordRep* edge = tree->Edge(kBack); + if (!edge->refcount.IsMutable()) return {}; + if (edge->tag < FLAT) return {}; + size_t avail = edge->flat()->Capacity() - edge->length; + if (avail == 0) return {}; + size_t delta = (std::min)(size, avail); + Span<char> span = {edge->flat()->Data() + edge->length, delta}; + edge->length += delta; + switch (height) { + case 3: + n3->length += delta; + ABSL_FALLTHROUGH_INTENDED; + case 2: + n2->length += delta; + ABSL_FALLTHROUGH_INTENDED; + case 1: + n1->length += delta; + ABSL_FALLTHROUGH_INTENDED; + case 0: + tree->length += delta; + return span; + } + break; + } + return GetAppendBufferSlow(size); +} + +extern template CordRepBtree* CordRepBtree::AddCordRep<CordRepBtree::kBack>( + CordRepBtree* tree, CordRep* rep); + +extern template CordRepBtree* CordRepBtree::AddCordRep<CordRepBtree::kFront>( + CordRepBtree* tree, CordRep* rep); + +inline CordRepBtree* CordRepBtree::Append(CordRepBtree* tree, CordRep* rep) { + if (ABSL_PREDICT_TRUE(IsDataEdge(rep))) { + return CordRepBtree::AddCordRep<kBack>(tree, rep); + } + return AppendSlow(tree, rep); +} + +inline CordRepBtree* CordRepBtree::Prepend(CordRepBtree* tree, CordRep* rep) { + if (ABSL_PREDICT_TRUE(IsDataEdge(rep))) { + return CordRepBtree::AddCordRep<kFront>(tree, rep); + } + return PrependSlow(tree, rep); +} + +#ifdef NDEBUG + +inline CordRepBtree* CordRepBtree::AssertValid(CordRepBtree* tree, + bool /* shallow */) { + return tree; +} + +inline const CordRepBtree* CordRepBtree::AssertValid(const CordRepBtree* tree, + bool /* shallow */) { + return tree; +} + +#endif + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_H_ diff --git a/third_party/abseil-cpp/absl/strings/internal/cord_rep_btree_navigator.cc b/third_party/abseil-cpp/absl/strings/internal/cord_rep_btree_navigator.cc new file mode 100644 index 0000000000..d1f9995d00 --- /dev/null +++ b/third_party/abseil-cpp/absl/strings/internal/cord_rep_btree_navigator.cc @@ -0,0 +1,185 @@ +// Copyright 2021 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/cord_rep_btree_navigator.h" + +#include <cassert> + +#include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/cord_rep_btree.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +using ReadResult = CordRepBtreeNavigator::ReadResult; + +namespace { + +// Returns a `CordRepSubstring` from `rep` starting at `offset` of size `n`. +// If `rep` is already a `CordRepSubstring` instance, an adjusted instance is +// created based on the old offset and new offset. +// Adopts a reference on `rep`. Rep must be a valid data edge. Returns +// nullptr if `n == 0`, `rep` if `n == rep->length`. +// Requires `offset < rep->length` and `offset + n <= rep->length`. +// TODO(192061034): move to utility library in internal and optimize for small +// substrings of larger reps. +inline CordRep* Substring(CordRep* rep, size_t offset, size_t n) { + assert(n <= rep->length); + assert(offset < rep->length); + assert(offset <= rep->length - n); + assert(CordRepBtree::IsDataEdge(rep)); + + if (n == 0) return nullptr; + if (n == rep->length) return CordRep::Ref(rep); + + if (rep->tag == SUBSTRING) { + offset += rep->substring()->start; + rep = rep->substring()->child; + } + + CordRepSubstring* substring = new CordRepSubstring(); + substring->length = n; + substring->tag = SUBSTRING; + substring->start = offset; + substring->child = CordRep::Ref(rep); + return substring; +} + +inline CordRep* Substring(CordRep* rep, size_t offset) { + return Substring(rep, offset, rep->length - offset); +} + +} // namespace + +CordRepBtreeNavigator::Position CordRepBtreeNavigator::Skip(size_t n) { + int height = 0; + size_t index = index_[0]; + CordRepBtree* node = node_[0]; + CordRep* edge = node->Edge(index); + + // Overall logic: Find an edge of at least the length we need to skip. + // We consume all edges which are smaller (i.e., must be 100% skipped). + // If we exhausted all edges on the current level, we move one level + // up the tree, and repeat until we either find the edge, or until we hit + // the top of the tree meaning the skip exceeds tree->length. + while (n >= edge->length) { + n -= edge->length; + while (++index == node->end()) { + if (++height > height_) return {nullptr, n}; + node = node_[height]; + index = index_[height]; + } + edge = node->Edge(index); + } + + // If we moved up the tree, descend down to the leaf level, consuming all + // edges that must be skipped. + while (height > 0) { + node = edge->btree(); + index_[height] = index; + node_[--height] = node; + index = node->begin(); + edge = node->Edge(index); + while (n >= edge->length) { + n -= edge->length; + ++index; + assert(index != node->end()); + edge = node->Edge(index); + } + } + index_[0] = index; + return {edge, n}; +} + +ReadResult CordRepBtreeNavigator::Read(size_t edge_offset, size_t n) { + int height = 0; + size_t length = edge_offset + n; + size_t index = index_[0]; + CordRepBtree* node = node_[0]; + CordRep* edge = node->Edge(index); + assert(edge_offset < edge->length); + + if (length < edge->length) { + return {Substring(edge, edge_offset, n), length}; + } + + // Similar to 'Skip', we consume all edges that are inside the 'length' of + // data that needs to be read. If we exhaust the current level, we move one + // level up the tree and repeat until we hit the final edge that must be + // (partially) read. We consume all edges into `subtree`. + CordRepBtree* subtree = CordRepBtree::New(Substring(edge, edge_offset)); + size_t subtree_end = 1; + do { + length -= edge->length; + while (++index == node->end()) { + index_[height] = index; + if (++height > height_) { + subtree->set_end(subtree_end); + if (length == 0) return {subtree, 0}; + CordRep::Unref(subtree); + return {nullptr, length}; + } + if (length != 0) { + subtree->set_end(subtree_end); + subtree = CordRepBtree::New(subtree); + subtree_end = 1; + } + node = node_[height]; + index = index_[height]; + } + edge = node->Edge(index); + if (length >= edge->length) { + subtree->length += edge->length; + subtree->edges_[subtree_end++] = CordRep::Ref(edge); + } + } while (length >= edge->length); + CordRepBtree* tree = subtree; + subtree->length += length; + + // If we moved up the tree, descend down to the leaf level, consuming all + // edges that must be read, adding 'down' nodes to `subtree`. + while (height > 0) { + node = edge->btree(); + index_[height] = index; + node_[--height] = node; + index = node->begin(); + edge = node->Edge(index); + + if (length != 0) { + CordRepBtree* right = CordRepBtree::New(height); + right->length = length; + subtree->edges_[subtree_end++] = right; + subtree->set_end(subtree_end); + subtree = right; + subtree_end = 0; + while (length >= edge->length) { + subtree->edges_[subtree_end++] = CordRep::Ref(edge); + length -= edge->length; + edge = node->Edge(++index); + } + } + } + // Add any (partial) edge still remaining at the leaf level. + if (length != 0) { + subtree->edges_[subtree_end++] = Substring(edge, 0, length); + } + subtree->set_end(subtree_end); + index_[0] = index; + return {tree, length}; +} + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil-cpp/absl/strings/internal/cord_rep_btree_navigator.h b/third_party/abseil-cpp/absl/strings/internal/cord_rep_btree_navigator.h new file mode 100644 index 0000000000..971b92eda6 --- /dev/null +++ b/third_party/abseil-cpp/absl/strings/internal/cord_rep_btree_navigator.h @@ -0,0 +1,265 @@ +// Copyright 2021 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_NAVIGATOR_H_ +#define ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_NAVIGATOR_H_ + +#include <cassert> +#include <iostream> + +#include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/cord_rep_btree.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +// CordRepBtreeNavigator is a bi-directional navigator allowing callers to +// navigate all the (leaf) data edges in a CordRepBtree instance. +// +// A CordRepBtreeNavigator instance is by default empty. Callers initialize a +// navigator instance by calling one of `InitFirst()`, `InitLast()` or +// `InitOffset()`, which establishes a current position. Callers can then +// navigate using the `Next`, `Previous`, `Skip` and `Seek` methods. +// +// The navigator instance does not take or adopt a reference on the provided +// `tree` on any of the initialization calls. Callers are responsible for +// guaranteeing the lifecycle of the provided tree. A navigator instance can +// be reset to the empty state by calling `Reset`. +// +// A navigator only keeps positional state on the 'current data edge', it does +// explicitly not keep any 'offset' state. The class does accept and return +// offsets in the `Read()`, `Skip()` and 'Seek()` methods as these would +// otherwise put a big burden on callers. Callers are expected to maintain +// (returned) offset info if they require such granular state. +class CordRepBtreeNavigator { + public: + // The logical position as returned by the Seek() and Skip() functions. + // Returns the current leaf edge for the desired seek or skip position and + // the offset of that position inside that edge. + struct Position { + CordRep* edge; + size_t offset; + }; + + // The read result as returned by the Read() function. + // `tree` contains the resulting tree which is identical to the result + // of calling CordRepBtree::SubTree(...) on the tree being navigated. + // `n` contains the number of bytes used from the last navigated to + // edge of the tree. + struct ReadResult { + CordRep* tree; + size_t n; + }; + + // Returns true if this instance is not empty. + explicit operator bool() const; + + // Returns the tree for this instance or nullptr if empty. + CordRepBtree* btree() const; + + // Returns the data edge of the current position. + // Requires this instance to not be empty. + CordRep* Current() const; + + // Resets this navigator to `tree`, returning the first data edge in the tree. + CordRep* InitFirst(CordRepBtree* tree); + + // Resets this navigator to `tree`, returning the last data edge in the tree. + CordRep* InitLast(CordRepBtree* tree); + + // Resets this navigator to `tree` returning the data edge at position + // `offset` and the relative offset of `offset` into that data edge. + // Returns `Position.edge = nullptr` if the provided offset is greater + // than or equal to the length of the tree, in which case the state of + // the navigator instance remains unchanged. + Position InitOffset(CordRepBtree* tree, size_t offset); + + // Navigates to the next data edge. + // Returns the next data edge or nullptr if there is no next data edge, in + // which case the current position remains unchanged. + CordRep* Next(); + + // Navigates to the previous data edge. + // Returns the previous data edge or nullptr if there is no previous data + // edge, in which case the current position remains unchanged. + CordRep* Previous(); + + // Navigates to the data edge at position `offset`. Returns the navigated to + // data edge in `Position.edge` and the relative offset of `offset` into that + // data edge in `Position.offset`. Returns `Position.edge = nullptr` if the + // provide offset is greater than or equal to the tree's length. + Position Seek(size_t offset); + + // Reads `n` bytes of data starting at offset `edge_offset` of the current + // data edge, and returns the result in `ReadResult.tree`. `ReadResult.n` + // contains the 'bytes used` from the last / current data edge in the tree. + // This allows users that mix regular navigation (using string views) and + // 'read into cord' navigation to keep track of the current state, and which + // bytes have been consumed from a navigator. + // This function returns `ReadResult.tree = nullptr` if the requested length + // exceeds the length of the tree starting at the current data edge. + ReadResult Read(size_t edge_offset, size_t n); + + // Skips `n` bytes forward from the current data edge, returning the navigated + // to data edge in `Position.edge` and `Position.offset` containing the offset + // inside that data edge. Note that the state of the navigator is left + // unchanged if `n` is smaller than the length of the current data edge. + Position Skip(size_t n); + + // Resets this instance to the default / empty state. + void Reset(); + + private: + // Slow path for Next() if Next() reached the end of a leaf node. Backtracks + // up the stack until it finds a node that has a 'next' position available, + // and then does a 'front dive' towards the next leaf node. + CordRep* NextUp(); + + // Slow path for Previous() if Previous() reached the beginning of a leaf + // node. Backtracks up the stack until it finds a node that has a 'previous' + // position available, and then does a 'back dive' towards the previous leaf + // node. + CordRep* PreviousUp(); + + // Generic implementation of InitFirst() and InitLast(). + template <CordRepBtree::EdgeType edge_type> + CordRep* Init(CordRepBtree* tree); + + // `height_` contains the height of the current tree, or -1 if empty. + int height_ = -1; + + // `index_` and `node_` contain the navigation state as the 'path' to the + // current data edge which is at `node_[0]->Edge(index_[0])`. The contents + // of these are undefined until the instance is initialized (`height_ >= 0`). + uint8_t index_[CordRepBtree::kMaxHeight]; + CordRepBtree* node_[CordRepBtree::kMaxHeight]; +}; + +// Returns true if this instance is not empty. +inline CordRepBtreeNavigator::operator bool() const { return height_ >= 0; } + +inline CordRepBtree* CordRepBtreeNavigator::btree() const { + return height_ >= 0 ? node_[height_] : nullptr; +} + +inline CordRep* CordRepBtreeNavigator::Current() const { + assert(height_ >= 0); + return node_[0]->Edge(index_[0]); +} + +inline void CordRepBtreeNavigator::Reset() { height_ = -1; } + +inline CordRep* CordRepBtreeNavigator::InitFirst(CordRepBtree* tree) { + return Init<CordRepBtree::kFront>(tree); +} + +inline CordRep* CordRepBtreeNavigator::InitLast(CordRepBtree* tree) { + return Init<CordRepBtree::kBack>(tree); +} + +template <CordRepBtree::EdgeType edge_type> +inline CordRep* CordRepBtreeNavigator::Init(CordRepBtree* tree) { + assert(tree != nullptr); + assert(tree->size() > 0); + int height = height_ = tree->height(); + size_t index = tree->index(edge_type); + node_[height] = tree; + index_[height] = static_cast<uint8_t>(index); + while (--height >= 0) { + tree = tree->Edge(index)->btree(); + node_[height] = tree; + index = tree->index(edge_type); + index_[height] = static_cast<uint8_t>(index); + } + return node_[0]->Edge(index); +} + +inline CordRepBtreeNavigator::Position CordRepBtreeNavigator::Seek( + size_t offset) { + assert(btree() != nullptr); + int height = height_; + CordRepBtree* edge = node_[height]; + if (ABSL_PREDICT_FALSE(offset >= edge->length)) return {nullptr, 0}; + CordRepBtree::Position index = edge->IndexOf(offset); + index_[height] = static_cast<uint8_t>(index.index); + while (--height >= 0) { + edge = edge->Edge(index.index)->btree(); + node_[height] = edge; + index = edge->IndexOf(index.n); + index_[height] = static_cast<uint8_t>(index.index); + } + return {edge->Edge(index.index), index.n}; +} + +inline CordRepBtreeNavigator::Position CordRepBtreeNavigator::InitOffset( + CordRepBtree* tree, size_t offset) { + assert(tree != nullptr); + if (ABSL_PREDICT_FALSE(offset >= tree->length)) return {nullptr, 0}; + height_ = tree->height(); + node_[height_] = tree; + return Seek(offset); +} + +inline CordRep* CordRepBtreeNavigator::Next() { + CordRepBtree* edge = node_[0]; + return index_[0] == edge->back() ? NextUp() : edge->Edge(++index_[0]); +} + +inline CordRep* CordRepBtreeNavigator::Previous() { + CordRepBtree* edge = node_[0]; + return index_[0] == edge->begin() ? PreviousUp() : edge->Edge(--index_[0]); +} + +inline CordRep* CordRepBtreeNavigator::NextUp() { + assert(index_[0] == node_[0]->back()); + CordRepBtree* edge; + size_t index; + int height = 0; + do { + if (++height > height_) return nullptr; + edge = node_[height]; + index = index_[height] + 1; + } while (index == edge->end()); + index_[height] = static_cast<uint8_t>(index); + do { + node_[--height] = edge = edge->Edge(index)->btree(); + index_[height] = static_cast<uint8_t>(index = edge->begin()); + } while (height > 0); + return edge->Edge(index); +} + +inline CordRep* CordRepBtreeNavigator::PreviousUp() { + assert(index_[0] == node_[0]->begin()); + CordRepBtree* edge; + size_t index; + int height = 0; + do { + if (++height > height_) return nullptr; + edge = node_[height]; + index = index_[height]; + } while (index == edge->begin()); + index_[height] = static_cast<uint8_t>(--index); + do { + node_[--height] = edge = edge->Edge(index)->btree(); + index_[height] = static_cast<uint8_t>(index = edge->back()); + } while (height > 0); + return edge->Edge(index); +} + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_NAVIGATOR_H_ diff --git a/third_party/abseil-cpp/absl/strings/internal/cord_rep_btree_navigator_test.cc b/third_party/abseil-cpp/absl/strings/internal/cord_rep_btree_navigator_test.cc new file mode 100644 index 0000000000..ce09b1992a --- /dev/null +++ b/third_party/abseil-cpp/absl/strings/internal/cord_rep_btree_navigator_test.cc @@ -0,0 +1,325 @@ +// Copyright 2021 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/cord_rep_btree_navigator.h" + +#include <string> +#include <vector> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/base/config.h" +#include "absl/base/internal/raw_logging.h" +#include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/cord_rep_btree.h" +#include "absl/strings/internal/cord_rep_test_util.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/string_view.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { +namespace { + +using ::testing::Eq; +using ::testing::Ne; + +using ::absl::cordrep_testing::CordRepBtreeFromFlats; +using ::absl::cordrep_testing::CordToString; +using ::absl::cordrep_testing::CreateFlatsFromString; +using ::absl::cordrep_testing::CreateRandomString; +using ::absl::cordrep_testing::MakeFlat; +using ::absl::cordrep_testing::MakeSubstring; + +using ReadResult = CordRepBtreeNavigator::ReadResult; +using Position = CordRepBtreeNavigator::Position; + +// CordRepBtreeNavigatorTest is a test fixture which automatically creates a +// tree to test navigation logic on. The parameter `count' defines the number of +// data edges in the test tree. +class CordRepBtreeNavigatorTest : public testing::TestWithParam<int> { + public: + using Flats = std::vector<CordRep*>; + static constexpr size_t kCharsPerFlat = 3; + + CordRepBtreeNavigatorTest() { + data_ = CreateRandomString(count() * kCharsPerFlat); + flats_ = CreateFlatsFromString(data_, kCharsPerFlat); + + // Turn flat 0 or 1 into a substring to cover partial reads on substrings. + if (count() > 1) { + CordRep::Unref(flats_[1]); + flats_[1] = MakeSubstring(kCharsPerFlat, kCharsPerFlat, MakeFlat(data_)); + } else { + CordRep::Unref(flats_[0]); + flats_[0] = MakeSubstring(0, kCharsPerFlat, MakeFlat(data_)); + } + + tree_ = CordRepBtreeFromFlats(flats_); + } + + ~CordRepBtreeNavigatorTest() override { CordRep::Unref(tree_); } + + int count() const { return GetParam(); } + CordRepBtree* tree() { return tree_; } + const std::string& data() const { return data_; } + const std::vector<CordRep*>& flats() const { return flats_; } + + static std::string ToString(testing::TestParamInfo<int> param) { + return absl::StrCat(param.param, "_Flats"); + } + + private: + std::string data_; + Flats flats_; + CordRepBtree* tree_; +}; + +INSTANTIATE_TEST_SUITE_P( + WithParam, CordRepBtreeNavigatorTest, + testing::Values(1, CordRepBtree::kMaxCapacity - 1, + CordRepBtree::kMaxCapacity, + CordRepBtree::kMaxCapacity* CordRepBtree::kMaxCapacity - 1, + CordRepBtree::kMaxCapacity* CordRepBtree::kMaxCapacity, + CordRepBtree::kMaxCapacity* CordRepBtree::kMaxCapacity + 1, + CordRepBtree::kMaxCapacity* CordRepBtree::kMaxCapacity * 2 + + 17), + CordRepBtreeNavigatorTest::ToString); + +TEST(CordRepBtreeNavigatorTest, Uninitialized) { + CordRepBtreeNavigator nav; + EXPECT_FALSE(nav); + EXPECT_THAT(nav.btree(), Eq(nullptr)); +#if defined(GTEST_HAS_DEATH_TEST) && !defined(NDEBUG) + EXPECT_DEATH(nav.Current(), ".*"); +#endif +} + +TEST_P(CordRepBtreeNavigatorTest, InitFirst) { + CordRepBtreeNavigator nav; + CordRep* edge = nav.InitFirst(tree()); + EXPECT_TRUE(nav); + EXPECT_THAT(nav.btree(), Eq(tree())); + EXPECT_THAT(nav.Current(), Eq(flats().front())); + EXPECT_THAT(edge, Eq(flats().front())); +} + +TEST_P(CordRepBtreeNavigatorTest, InitLast) { + CordRepBtreeNavigator nav; + CordRep* edge = nav.InitLast(tree()); + EXPECT_TRUE(nav); + EXPECT_THAT(nav.btree(), Eq(tree())); + EXPECT_THAT(nav.Current(), Eq(flats().back())); + EXPECT_THAT(edge, Eq(flats().back())); +} + +TEST_P(CordRepBtreeNavigatorTest, NextPrev) { + CordRepBtreeNavigator nav; + nav.InitFirst(tree()); + const Flats& flats = this->flats(); + + EXPECT_THAT(nav.Previous(), Eq(nullptr)); + EXPECT_THAT(nav.Current(), Eq(flats.front())); + for (int i = 1; i < flats.size(); ++i) { + ASSERT_THAT(nav.Next(), Eq(flats[i])); + EXPECT_THAT(nav.Current(), Eq(flats[i])); + } + EXPECT_THAT(nav.Next(), Eq(nullptr)); + EXPECT_THAT(nav.Current(), Eq(flats.back())); + for (int i = static_cast<int>(flats.size()) - 2; i >= 0; --i) { + ASSERT_THAT(nav.Previous(), Eq(flats[i])); + EXPECT_THAT(nav.Current(), Eq(flats[i])); + } + EXPECT_THAT(nav.Previous(), Eq(nullptr)); + EXPECT_THAT(nav.Current(), Eq(flats.front())); +} + +TEST_P(CordRepBtreeNavigatorTest, PrevNext) { + CordRepBtreeNavigator nav; + nav.InitLast(tree()); + const Flats& flats = this->flats(); + + EXPECT_THAT(nav.Next(), Eq(nullptr)); + EXPECT_THAT(nav.Current(), Eq(flats.back())); + for (int i = static_cast<int>(flats.size()) - 2; i >= 0; --i) { + ASSERT_THAT(nav.Previous(), Eq(flats[i])); + EXPECT_THAT(nav.Current(), Eq(flats[i])); + } + EXPECT_THAT(nav.Previous(), Eq(nullptr)); + EXPECT_THAT(nav.Current(), Eq(flats.front())); + for (int i = 1; i < flats.size(); ++i) { + ASSERT_THAT(nav.Next(), Eq(flats[i])); + EXPECT_THAT(nav.Current(), Eq(flats[i])); + } + EXPECT_THAT(nav.Next(), Eq(nullptr)); + EXPECT_THAT(nav.Current(), Eq(flats.back())); +} + +TEST(CordRepBtreeNavigatorTest, Reset) { + CordRepBtree* tree = CordRepBtree::Create(MakeFlat("abc")); + CordRepBtreeNavigator nav; + nav.InitFirst(tree); + nav.Reset(); + EXPECT_FALSE(nav); + EXPECT_THAT(nav.btree(), Eq(nullptr)); +#if defined(GTEST_HAS_DEATH_TEST) && !defined(NDEBUG) + EXPECT_DEATH(nav.Current(), ".*"); +#endif + CordRep::Unref(tree); +} + +TEST_P(CordRepBtreeNavigatorTest, Skip) { + int count = this->count(); + const Flats& flats = this->flats(); + CordRepBtreeNavigator nav; + nav.InitFirst(tree()); + + for (int char_offset = 0; char_offset < kCharsPerFlat; ++char_offset) { + Position pos = nav.Skip(char_offset); + EXPECT_THAT(pos.edge, Eq(nav.Current())); + EXPECT_THAT(pos.edge, Eq(flats[0])); + EXPECT_THAT(pos.offset, Eq(char_offset)); + } + + for (int index1 = 0; index1 < count; ++index1) { + for (int index2 = index1; index2 < count; ++index2) { + for (int char_offset = 0; char_offset < kCharsPerFlat; ++char_offset) { + CordRepBtreeNavigator nav; + nav.InitFirst(tree()); + + size_t length1 = index1 * kCharsPerFlat; + Position pos1 = nav.Skip(length1 + char_offset); + ASSERT_THAT(pos1.edge, Eq(flats[index1])); + ASSERT_THAT(pos1.edge, Eq(nav.Current())); + ASSERT_THAT(pos1.offset, Eq(char_offset)); + + size_t length2 = index2 * kCharsPerFlat; + Position pos2 = nav.Skip(length2 - length1 + char_offset); + ASSERT_THAT(pos2.edge, Eq(flats[index2])); + ASSERT_THAT(pos2.edge, Eq(nav.Current())); + ASSERT_THAT(pos2.offset, Eq(char_offset)); + } + } + } +} + +TEST_P(CordRepBtreeNavigatorTest, Seek) { + int count = this->count(); + const Flats& flats = this->flats(); + CordRepBtreeNavigator nav; + nav.InitFirst(tree()); + + for (int char_offset = 0; char_offset < kCharsPerFlat; ++char_offset) { + Position pos = nav.Seek(char_offset); + EXPECT_THAT(pos.edge, Eq(nav.Current())); + EXPECT_THAT(pos.edge, Eq(flats[0])); + EXPECT_THAT(pos.offset, Eq(char_offset)); + } + + for (int index = 0; index < count; ++index) { + for (int char_offset = 0; char_offset < kCharsPerFlat; ++char_offset) { + size_t offset = index * kCharsPerFlat + char_offset; + Position pos1 = nav.Seek(offset); + ASSERT_THAT(pos1.edge, Eq(flats[index])); + ASSERT_THAT(pos1.edge, Eq(nav.Current())); + ASSERT_THAT(pos1.offset, Eq(char_offset)); + } + } +} + +TEST(CordRepBtreeNavigatorTest, InitOffset) { + // Whitebox: InitOffset() is implemented in terms of Seek() which is + // exhaustively tested. Only test it initializes / forwards properly.. + CordRepBtree* tree = CordRepBtree::Create(MakeFlat("abc")); + tree = CordRepBtree::Append(tree, MakeFlat("def")); + CordRepBtreeNavigator nav; + Position pos = nav.InitOffset(tree, 5); + EXPECT_TRUE(nav); + EXPECT_THAT(nav.btree(), Eq(tree)); + EXPECT_THAT(pos.edge, Eq(tree->Edges()[1])); + EXPECT_THAT(pos.edge, Eq(nav.Current())); + EXPECT_THAT(pos.offset, Eq(2)); + CordRep::Unref(tree); +} + +TEST(CordRepBtreeNavigatorTest, InitOffsetAndSeekBeyondLength) { + CordRepBtree* tree1 = CordRepBtree::Create(MakeFlat("abc")); + CordRepBtree* tree2 = CordRepBtree::Create(MakeFlat("def")); + + CordRepBtreeNavigator nav; + nav.InitFirst(tree1); + EXPECT_THAT(nav.Seek(3).edge, Eq(nullptr)); + EXPECT_THAT(nav.Seek(100).edge, Eq(nullptr)); + EXPECT_THAT(nav.btree(), Eq(tree1)); + EXPECT_THAT(nav.Current(), Eq(tree1->Edges().front())); + + EXPECT_THAT(nav.InitOffset(tree2, 3).edge, Eq(nullptr)); + EXPECT_THAT(nav.InitOffset(tree2, 100).edge, Eq(nullptr)); + EXPECT_THAT(nav.btree(), Eq(tree1)); + EXPECT_THAT(nav.Current(), Eq(tree1->Edges().front())); + + CordRep::Unref(tree1); + CordRep::Unref(tree2); +} + +TEST_P(CordRepBtreeNavigatorTest, Read) { + const Flats& flats = this->flats(); + const std::string& data = this->data(); + + for (size_t offset = 0; offset < data.size(); ++offset) { + for (size_t length = 1; length <= data.size() - offset; ++length) { + CordRepBtreeNavigator nav; + nav.InitFirst(tree()); + + // Skip towards edge holding offset + size_t edge_offset = nav.Skip(offset).offset; + + // Read node + ReadResult result = nav.Read(edge_offset, length); + ASSERT_THAT(result.tree, Ne(nullptr)); + EXPECT_THAT(result.tree->length, Eq(length)); + if (result.tree->tag == BTREE) { + ASSERT_TRUE(CordRepBtree::IsValid(result.tree->btree())); + } + + // Verify contents + std::string value = CordToString(result.tree); + EXPECT_THAT(value, Eq(data.substr(offset, length))); + + // Verify 'partial last edge' reads. + size_t partial = (offset + length) % kCharsPerFlat; + ASSERT_THAT(result.n, Eq(partial)); + + // Verify ending position if not EOF + if (offset + length < data.size()) { + size_t index = (offset + length) / kCharsPerFlat; + EXPECT_THAT(nav.Current(), Eq(flats[index])); + } + + CordRep::Unref(result.tree); + } + } +} + +TEST_P(CordRepBtreeNavigatorTest, ReadBeyondLengthOfTree) { + CordRepBtreeNavigator nav; + nav.InitFirst(tree()); + ReadResult result = nav.Read(2, tree()->length); + ASSERT_THAT(result.tree, Eq(nullptr)); +} + +} // namespace +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil-cpp/absl/strings/internal/cord_rep_btree_reader.cc b/third_party/abseil-cpp/absl/strings/internal/cord_rep_btree_reader.cc new file mode 100644 index 0000000000..5dc76966d2 --- /dev/null +++ b/third_party/abseil-cpp/absl/strings/internal/cord_rep_btree_reader.cc @@ -0,0 +1,68 @@ +// Copyright 2021 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/cord_rep_btree_reader.h" + +#include <cassert> + +#include "absl/base/config.h" +#include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/cord_rep_btree.h" +#include "absl/strings/internal/cord_rep_btree_navigator.h" +#include "absl/strings/internal/cord_rep_flat.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +absl::string_view CordRepBtreeReader::Read(size_t n, size_t chunk_size, + CordRep*& tree) { + assert(chunk_size <= navigator_.Current()->length); + + // If chunk_size is non-zero, we need to start inside last returned edge. + // Else we start reading at the next data edge of the tree. + CordRep* edge = chunk_size ? navigator_.Current() : navigator_.Next(); + const size_t offset = chunk_size ? edge->length - chunk_size : 0; + + // Read the sub tree and verify we got what we wanted. + ReadResult result = navigator_.Read(offset, n); + tree = result.tree; + + // If the data returned in `tree` was covered entirely by `chunk_size`, i.e., + // read from the 'previous' edge, we did not consume any additional data, and + // can directly return the substring into the current data edge as the next + // chunk. We can easily establish from the above code that `navigator_.Next()` + // has not been called as that requires `chunk_size` to be zero. + if (n < chunk_size) return CordRepBtree::EdgeData(edge).substr(result.n); + + // The amount of data taken from the last edge is `chunk_size` and `result.n` + // contains the offset into the current edge trailing the read data (which can + // be 0). As the call to `navigator_.Read()` could have consumed all remaining + // data, calling `navigator_.Current()` is not safe before checking if we + // already consumed all remaining data. + const size_t consumed_by_read = n - chunk_size - result.n; + if (consumed_by_read >= remaining_) { + remaining_ = 0; + return {}; + } + + // We did not read all data, return remaining data from current edge. + edge = navigator_.Current(); + remaining_ -= consumed_by_read + edge->length; + return CordRepBtree::EdgeData(edge).substr(result.n); +} + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil-cpp/absl/strings/internal/cord_rep_btree_reader.h b/third_party/abseil-cpp/absl/strings/internal/cord_rep_btree_reader.h new file mode 100644 index 0000000000..7aa79dbf10 --- /dev/null +++ b/third_party/abseil-cpp/absl/strings/internal/cord_rep_btree_reader.h @@ -0,0 +1,211 @@ +// Copyright 2021 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_READER_H_ +#define ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_READER_H_ + +#include <cassert> + +#include "absl/base/config.h" +#include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/cord_rep_btree.h" +#include "absl/strings/internal/cord_rep_btree_navigator.h" +#include "absl/strings/internal/cord_rep_flat.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +// CordRepBtreeReader implements logic to iterate over cord btrees. +// References to the underlying data are returned as absl::string_view values. +// The most typical use case is a forward only iteration over tree data. +// The class also provides `Skip()`, `Seek()` and `Read()` methods similar to +// CordRepBtreeNavigator that allow more advanced navigation. +// +// Example: iterate over all data inside a cord btree: +// +// CordRepBtreeReader reader; +// for (string_view sv = reader.Init(tree); !sv.Empty(); sv = sv.Next()) { +// DoSomethingWithDataIn(sv); +// } +// +// All navigation methods always return the next 'chunk' of data. The class +// assumes that all data is directly 'consumed' by the caller. For example: +// invoking `Skip()` will skip the desired number of bytes, and directly +// read and return the next chunk of data directly after the skipped bytes. +// +// Example: iterate over all data inside a btree skipping the first 100 bytes: +// +// CordRepBtreeReader reader; +// absl::string_view sv = reader.Init(tree); +// if (sv.length() > 100) { +// sv.RemovePrefix(100); +// } else { +// sv = reader.Skip(100 - sv.length()); +// } +// while (!sv.empty()) { +// DoSomethingWithDataIn(sv); +// absl::string_view sv = reader.Next(); +// } +// +// It is important to notice that `remaining` is based on the end position of +// the last data edge returned to the caller, not the cumulative data returned +// to the caller which can be less in cases of skipping or seeking over data. +// +// For example, consider a cord btree with five data edges: "abc", "def", "ghi", +// "jkl" and "mno": +// +// absl::string_view sv; +// CordRepBtreeReader reader; +// +// sv = reader.Init(tree); // sv = "abc", remaining = 12 +// sv = reader.Skip(4); // sv = "hi", remaining = 6 +// sv = reader.Skip(2); // sv = "l", remaining = 3 +// sv = reader.Next(); // sv = "mno", remaining = 0 +// sv = reader.Seek(1); // sv = "bc", remaining = 12 +// +class CordRepBtreeReader { + public: + using ReadResult = CordRepBtreeNavigator::ReadResult; + using Position = CordRepBtreeNavigator::Position; + + // Returns true if this instance is not empty. + explicit operator bool() const { return navigator_.btree() != nullptr; } + + // Returns the tree referenced by this instance or nullptr if empty. + CordRepBtree* btree() const { return navigator_.btree(); } + + // Returns the current data edge inside the referenced btree. + // Requires that the current instance is not empty. + CordRep* node() const { return navigator_.Current(); } + + // Returns the length of the referenced tree. + // Requires that the current instance is not empty. + size_t length() const; + + // Returns the number of remaining bytes available for iteration, which is the + // number of bytes directly following the end of the last chunk returned. + // This value will be zero if we iterated over the last edge in the bound + // tree, in which case any call to Next() or Skip() will return an empty + // string_view reflecting the EOF state. + // Note that a call to `Seek()` resets `remaining` to a value based on the + // end position of the chunk returned by that call. + size_t remaining() const { return remaining_; } + + // Resets this instance to an empty value. + void Reset() { navigator_.Reset(); } + + // Initializes this instance with `tree`. `tree` must not be null. + // Returns a reference to the first data edge of the provided tree. + absl::string_view Init(CordRepBtree* tree); + + // Navigates to and returns the next data edge of the referenced tree. + // Returns an empty string_view if an attempt is made to read beyond the end + // of the tree, i.e.: if `remaining()` is zero indicating an EOF condition. + // Requires that the current instance is not empty. + absl::string_view Next(); + + // Skips the provided amount of bytes and returns a reference to the data + // directly following the skipped bytes. + absl::string_view Skip(size_t skip); + + // Reads `n` bytes into `tree`. + // If `chunk_size` is zero, starts reading at the next data edge. If + // `chunk_size` is non zero, the read starts at the last `chunk_size` bytes of + // the last returned data edge. Effectively, this means that the read starts + // at offset `consumed() - chunk_size`. + // Requires that `chunk_size` is less than or equal to the length of the + // last returned data edge. The purpose of `chunk_size` is to simplify code + // partially consuming a returned chunk and wanting to include the remaining + // bytes in the Read call. For example, the below code will read 1000 bytes of + // data into a cord tree if the first chunk starts with "big:": + // + // CordRepBtreeReader reader; + // absl::string_view sv = reader.Init(tree); + // if (absl::StartsWith(sv, "big:")) { + // CordRepBtree tree; + // sv = reader.Read(1000, sv.size() - 4 /* "big:" */, &tree); + // } + // + // This method will return an empty string view if all remaining data was + // read. If `n` exceeded the amount of remaining data this function will + // return an empty string view and `tree` will be set to nullptr. + // In both cases, `consumed` will be set to `length`. + absl::string_view Read(size_t n, size_t chunk_size, CordRep*& tree); + + // Navigates to the chunk at offset `offset`. + // Returns a reference into the navigated to chunk, adjusted for the relative + // position of `offset` into that chunk. For example, calling `Seek(13)` on a + // cord tree containing 2 chunks of 10 and 20 bytes respectively will return + // a string view into the second chunk starting at offset 3 with a size of 17. + // Returns an empty string view if `offset` is equal to or greater than the + // length of the referenced tree. + absl::string_view Seek(size_t offset); + + private: + size_t remaining_ = 0; + CordRepBtreeNavigator navigator_; +}; + +inline size_t CordRepBtreeReader::length() const { + assert(btree() != nullptr); + return btree()->length; +} + +inline absl::string_view CordRepBtreeReader::Init(CordRepBtree* tree) { + assert(tree != nullptr); + const CordRep* edge = navigator_.InitFirst(tree); + remaining_ = tree->length - edge->length; + return CordRepBtree::EdgeData(edge); +} + +inline absl::string_view CordRepBtreeReader::Next() { + if (remaining_ == 0) return {}; + const CordRep* edge = navigator_.Next(); + assert(edge != nullptr); + remaining_ -= edge->length; + return CordRepBtree::EdgeData(edge); +} + +inline absl::string_view CordRepBtreeReader::Skip(size_t skip) { + // As we are always positioned on the last 'consumed' edge, we + // need to skip the current edge as well as `skip`. + const size_t edge_length = navigator_.Current()->length; + CordRepBtreeNavigator::Position pos = navigator_.Skip(skip + edge_length); + if (ABSL_PREDICT_FALSE(pos.edge == nullptr)) { + remaining_ = 0; + return {}; + } + // The combined length of all edges skipped before `pos.edge` is `skip - + // pos.offset`, all of which are 'consumed', as well as the current edge. + remaining_ -= skip - pos.offset + pos.edge->length; + return CordRepBtree::EdgeData(pos.edge).substr(pos.offset); +} + +inline absl::string_view CordRepBtreeReader::Seek(size_t offset) { + const CordRepBtreeNavigator::Position pos = navigator_.Seek(offset); + if (ABSL_PREDICT_FALSE(pos.edge == nullptr)) { + remaining_ = 0; + return {}; + } + absl::string_view chunk = CordRepBtree::EdgeData(pos.edge).substr(pos.offset); + remaining_ = length() - offset - chunk.length(); + return chunk; +} + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_READER_H_ diff --git a/third_party/abseil-cpp/absl/strings/internal/cord_rep_btree_reader_test.cc b/third_party/abseil-cpp/absl/strings/internal/cord_rep_btree_reader_test.cc new file mode 100644 index 0000000000..9b27a81fdb --- /dev/null +++ b/third_party/abseil-cpp/absl/strings/internal/cord_rep_btree_reader_test.cc @@ -0,0 +1,293 @@ +// Copyright 2021 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/cord_rep_btree_reader.h" + +#include <iostream> +#include <random> +#include <string> +#include <vector> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/base/config.h" +#include "absl/base/internal/raw_logging.h" +#include "absl/strings/cord.h" +#include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/cord_rep_btree.h" +#include "absl/strings/internal/cord_rep_test_util.h" +#include "absl/strings/string_view.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { +namespace { + +using ::testing::Eq; +using ::testing::IsEmpty; +using ::testing::Ne; +using ::testing::Not; + +using ::absl::cordrep_testing::CordRepBtreeFromFlats; +using ::absl::cordrep_testing::MakeFlat; +using ::absl::cordrep_testing::CordToString; +using ::absl::cordrep_testing::CreateFlatsFromString; +using ::absl::cordrep_testing::CreateRandomString; + +using ReadResult = CordRepBtreeReader::ReadResult; + +TEST(CordRepBtreeReaderTest, Next) { + constexpr size_t kChars = 3; + const size_t cap = CordRepBtree::kMaxCapacity; + int counts[] = {1, 2, cap, cap * cap, cap * cap + 1, cap * cap * 2 + 17}; + + for (int count : counts) { + std::string data = CreateRandomString(count * kChars); + std::vector<CordRep*> flats = CreateFlatsFromString(data, kChars); + CordRepBtree* node = CordRepBtreeFromFlats(flats); + + CordRepBtreeReader reader; + size_t remaining = data.length(); + absl::string_view chunk = reader.Init(node); + EXPECT_THAT(chunk, Eq(data.substr(0, chunk.length()))); + + remaining -= chunk.length(); + EXPECT_THAT(reader.remaining(), Eq(remaining)); + + while (remaining > 0) { + const size_t offset = data.length() - remaining; + chunk = reader.Next(); + EXPECT_THAT(chunk, Eq(data.substr(offset, chunk.length()))); + + remaining -= chunk.length(); + EXPECT_THAT(reader.remaining(), Eq(remaining)); + } + + EXPECT_THAT(reader.remaining(), Eq(0)); + + // Verify trying to read beyond EOF returns empty string_view + EXPECT_THAT(reader.Next(), testing::IsEmpty()); + + CordRep::Unref(node); + } +} + +TEST(CordRepBtreeReaderTest, Skip) { + constexpr size_t kChars = 3; + const size_t cap = CordRepBtree::kMaxCapacity; + int counts[] = {1, 2, cap, cap * cap, cap * cap + 1, cap * cap * 2 + 17}; + + for (int count : counts) { + std::string data = CreateRandomString(count * kChars); + std::vector<CordRep*> flats = CreateFlatsFromString(data, kChars); + CordRepBtree* node = CordRepBtreeFromFlats(flats); + + for (size_t skip1 = 0; skip1 < data.length() - kChars; ++skip1) { + for (size_t skip2 = 0; skip2 < data.length() - kChars; ++skip2) { + CordRepBtreeReader reader; + size_t remaining = data.length(); + absl::string_view chunk = reader.Init(node); + remaining -= chunk.length(); + + chunk = reader.Skip(skip1); + size_t offset = data.length() - remaining; + ASSERT_THAT(chunk, Eq(data.substr(offset + skip1, chunk.length()))); + remaining -= chunk.length() + skip1; + ASSERT_THAT(reader.remaining(), Eq(remaining)); + + if (remaining == 0) continue; + + size_t skip = std::min(remaining - 1, skip2); + chunk = reader.Skip(skip); + offset = data.length() - remaining; + ASSERT_THAT(chunk, Eq(data.substr(offset + skip, chunk.length()))); + } + } + + CordRep::Unref(node); + } +} + +TEST(CordRepBtreeReaderTest, SkipBeyondLength) { + CordRepBtree* tree = CordRepBtree::Create(MakeFlat("abc")); + tree = CordRepBtree::Append(tree, MakeFlat("def")); + CordRepBtreeReader reader; + reader.Init(tree); + EXPECT_THAT(reader.Skip(100), IsEmpty()); + EXPECT_THAT(reader.remaining(), Eq(0)); + CordRep::Unref(tree); +} + +TEST(CordRepBtreeReaderTest, Seek) { + constexpr size_t kChars = 3; + const size_t cap = CordRepBtree::kMaxCapacity; + int counts[] = {1, 2, cap, cap * cap, cap * cap + 1, cap * cap * 2 + 17}; + + for (int count : counts) { + std::string data = CreateRandomString(count * kChars); + std::vector<CordRep*> flats = CreateFlatsFromString(data, kChars); + CordRepBtree* node = CordRepBtreeFromFlats(flats); + + for (size_t seek = 0; seek < data.length() - 1; ++seek) { + CordRepBtreeReader reader; + reader.Init(node); + absl::string_view chunk = reader.Seek(seek); + ASSERT_THAT(chunk, Not(IsEmpty())); + ASSERT_THAT(chunk, Eq(data.substr(seek, chunk.length()))); + ASSERT_THAT(reader.remaining(), + Eq(data.length() - seek - chunk.length())); + } + + CordRep::Unref(node); + } +} + +TEST(CordRepBtreeReaderTest, SeekBeyondLength) { + CordRepBtree* tree = CordRepBtree::Create(MakeFlat("abc")); + tree = CordRepBtree::Append(tree, MakeFlat("def")); + CordRepBtreeReader reader; + reader.Init(tree); + EXPECT_THAT(reader.Seek(6), IsEmpty()); + EXPECT_THAT(reader.remaining(), Eq(0)); + EXPECT_THAT(reader.Seek(100), IsEmpty()); + EXPECT_THAT(reader.remaining(), Eq(0)); + CordRep::Unref(tree); +} + +TEST(CordRepBtreeReaderTest, Read) { + std::string data = "abcdefghijklmno"; + std::vector<CordRep*> flats = CreateFlatsFromString(data, 5); + CordRepBtree* node = CordRepBtreeFromFlats(flats); + + CordRep* tree; + CordRepBtreeReader reader; + absl::string_view chunk; + + // Read zero bytes + chunk = reader.Init(node); + chunk = reader.Read(0, chunk.length(), tree); + EXPECT_THAT(tree, Eq(nullptr)); + EXPECT_THAT(chunk, Eq("abcde")); + EXPECT_THAT(reader.remaining(), Eq(10)); + EXPECT_THAT(reader.Next(), Eq("fghij")); + + // Read in full + chunk = reader.Init(node); + chunk = reader.Read(15, chunk.length(), tree); + EXPECT_THAT(tree, Ne(nullptr)); + EXPECT_THAT(CordToString(tree), Eq("abcdefghijklmno")); + EXPECT_THAT(chunk, Eq("")); + EXPECT_THAT(reader.remaining(), Eq(0)); + CordRep::Unref(tree); + + // Read < chunk bytes + chunk = reader.Init(node); + chunk = reader.Read(3, chunk.length(), tree); + ASSERT_THAT(tree, Ne(nullptr)); + EXPECT_THAT(CordToString(tree), Eq("abc")); + EXPECT_THAT(chunk, Eq("de")); + EXPECT_THAT(reader.remaining(), Eq(10)); + EXPECT_THAT(reader.Next(), Eq("fghij")); + CordRep::Unref(tree); + + // Read < chunk bytes at offset + chunk = reader.Init(node); + chunk = reader.Read(2, chunk.length() - 2, tree); + ASSERT_THAT(tree, Ne(nullptr)); + EXPECT_THAT(CordToString(tree), Eq("cd")); + EXPECT_THAT(chunk, Eq("e")); + EXPECT_THAT(reader.remaining(), Eq(10)); + EXPECT_THAT(reader.Next(), Eq("fghij")); + CordRep::Unref(tree); + + // Read from consumed chunk + chunk = reader.Init(node); + chunk = reader.Read(3, 0, tree); + ASSERT_THAT(tree, Ne(nullptr)); + EXPECT_THAT(CordToString(tree), Eq("fgh")); + EXPECT_THAT(chunk, Eq("ij")); + EXPECT_THAT(reader.remaining(), Eq(5)); + EXPECT_THAT(reader.Next(), Eq("klmno")); + CordRep::Unref(tree); + + // Read across chunks + chunk = reader.Init(node); + chunk = reader.Read(12, chunk.length() - 2, tree); + ASSERT_THAT(tree, Ne(nullptr)); + EXPECT_THAT(CordToString(tree), Eq("cdefghijklmn")); + EXPECT_THAT(chunk, Eq("o")); + EXPECT_THAT(reader.remaining(), Eq(0)); + CordRep::Unref(tree); + + // Read across chunks landing on exact edge boundary + chunk = reader.Init(node); + chunk = reader.Read(10 - 2, chunk.length() - 2, tree); + ASSERT_THAT(tree, Ne(nullptr)); + EXPECT_THAT(CordToString(tree), Eq("cdefghij")); + EXPECT_THAT(chunk, Eq("klmno")); + EXPECT_THAT(reader.remaining(), Eq(0)); + CordRep::Unref(tree); + + CordRep::Unref(node); +} + +TEST(CordRepBtreeReaderTest, ReadExhaustive) { + constexpr size_t kChars = 3; + const size_t cap = CordRepBtree::kMaxCapacity; + int counts[] = {1, 2, cap, cap * cap + 1, cap * cap * cap * 2 + 17}; + + for (int count : counts) { + std::string data = CreateRandomString(count * kChars); + std::vector<CordRep*> flats = CreateFlatsFromString(data, kChars); + CordRepBtree* node = CordRepBtreeFromFlats(flats); + + for (size_t read_size : {kChars - 1, kChars, kChars + 7, cap * cap}) { + CordRepBtreeReader reader; + absl::string_view chunk = reader.Init(node); + + // `consumed` tracks the end of last consumed chunk which is the start of + // the next chunk: we always read with `chunk_size = chunk.length()`. + size_t consumed = 0; + size_t remaining = data.length(); + while (remaining > 0) { + CordRep* tree; + size_t n = (std::min)(remaining, read_size); + chunk = reader.Read(n, chunk.length(), tree); + EXPECT_THAT(tree, Ne(nullptr)); + if (tree) { + EXPECT_THAT(CordToString(tree), Eq(data.substr(consumed, n))); + CordRep::Unref(tree); + } + + consumed += n; + remaining -= n; + EXPECT_THAT(reader.remaining(), Eq(remaining - chunk.length())); + + if (remaining > 0) { + ASSERT_FALSE(chunk.empty()); + ASSERT_THAT(chunk, Eq(data.substr(consumed, chunk.length()))); + } else { + ASSERT_TRUE(chunk.empty()) << chunk; + } + } + } + + CordRep::Unref(node); + } +} + +} // namespace +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil-cpp/absl/strings/internal/cord_rep_btree_test.cc b/third_party/abseil-cpp/absl/strings/internal/cord_rep_btree_test.cc new file mode 100644 index 0000000000..be9473d41d --- /dev/null +++ b/third_party/abseil-cpp/absl/strings/internal/cord_rep_btree_test.cc @@ -0,0 +1,1489 @@ +// Copyright 2021 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/cord_rep_btree.h" + +#include <cmath> +#include <deque> +#include <iostream> +#include <string> +#include <vector> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/base/config.h" +#include "absl/base/internal/raw_logging.h" +#include "absl/cleanup/cleanup.h" +#include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/cord_rep_test_util.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/string_view.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +class CordRepBtreeTestPeer { + public: + static void SetEdge(CordRepBtree* node, size_t idx, CordRep* edge) { + node->edges_[idx] = edge; + } + static void AddEdge(CordRepBtree* node, CordRep* edge) { + node->edges_[node->fetch_add_end(1)] = edge; + } +}; + +namespace { + +using ::absl::cordrep_testing::AutoUnref; +using ::absl::cordrep_testing::CordCollectRepsIf; +using ::absl::cordrep_testing::CordToString; +using ::absl::cordrep_testing::CordVisitReps; +using ::absl::cordrep_testing::CreateFlatsFromString; +using ::absl::cordrep_testing::CreateRandomString; +using ::absl::cordrep_testing::MakeConcat; +using ::absl::cordrep_testing::MakeExternal; +using ::absl::cordrep_testing::MakeFlat; +using ::absl::cordrep_testing::MakeSubstring; +using ::testing::_; +using ::testing::AllOf; +using ::testing::AnyOf; +using ::testing::Conditional; +using ::testing::ElementsAre; +using ::testing::ElementsAreArray; +using ::testing::Eq; +using ::testing::HasSubstr; +using ::testing::Le; +using ::testing::Ne; +using ::testing::Not; +using ::testing::SizeIs; +using ::testing::TypedEq; + +MATCHER_P(EqFlatHolding, data, "Equals flat holding data") { + if (arg->tag < FLAT) { + *result_listener << "Expected FLAT, got tag " << static_cast<int>(arg->tag); + return false; + } + std::string actual = CordToString(arg); + if (actual != data) { + *result_listener << "Expected flat holding \"" << data + << "\", got flat holding \"" << actual << "\""; + return false; + } + return true; +} + +MATCHER_P(IsNode, height, absl::StrCat("Is a valid node of height ", height)) { + if (arg == nullptr) { + *result_listener << "Expected NODE, got nullptr"; + return false; + } + if (arg->tag != BTREE) { + *result_listener << "Expected NODE, got " << static_cast<int>(arg->tag); + return false; + } + if (!CordRepBtree::IsValid(arg->btree())) { + CordRepBtree::Dump(arg->btree(), "Expected valid NODE, got:", false, + *result_listener->stream()); + return false; + } + if (arg->btree()->height() != height) { + *result_listener << "Expected NODE of height " << height << ", got " + << arg->btree()->height(); + return false; + } + return true; +} + +MATCHER_P2(IsSubstring, start, length, + absl::StrCat("Is a substring(start = ", start, ", length = ", length, + ")")) { + if (arg == nullptr) { + *result_listener << "Expected substring, got nullptr"; + return false; + } + if (arg->tag != SUBSTRING) { + *result_listener << "Expected SUBSTRING, got " + << static_cast<int>(arg->tag); + return false; + } + const CordRepSubstring* const substr = arg->substring(); + if (substr->start != start || substr->length != length) { + *result_listener << "Expected substring(" << start << ", " << length + << "), got substring(" << substr->start << ", " + << substr->length << ")"; + return false; + } + return true; +} + +// DataConsumer is a simple helper class used by tests to 'consume' string +// fragments from the provided input in forward or backward direction. +class DataConsumer { + public: + // Starts consumption of `data`. Caller must make sure `data` outlives this + // instance. Consumes data starting at the front if `forward` is true, else + // consumes data from the back. + DataConsumer(absl::string_view data, bool forward) + : data_(data), forward_(forward) {} + + // Return the next `n` bytes from referenced data. + absl::string_view Next(size_t n) { + assert(n <= data_.size() - consumed_); + consumed_ += n; + return data_.substr(forward_ ? consumed_ - n : data_.size() - consumed_, n); + } + + // Returns all data consumed so far. + absl::string_view Consumed() const { + return forward_ ? data_.substr(0, consumed_) + : data_.substr(data_.size() - consumed_); + } + + private: + absl::string_view data_; + size_t consumed_ = 0; + bool forward_; +}; + +// BtreeAdd returns either CordRepBtree::Append or CordRepBtree::Prepend. +CordRepBtree* BtreeAdd(CordRepBtree* node, bool append, + absl::string_view data) { + return append ? CordRepBtree::Append(node, data) + : CordRepBtree::Prepend(node, data); +} + +// Recursively collects all leaf edges from `tree` and appends them to `edges`. +void GetLeafEdges(const CordRepBtree* tree, std::vector<CordRep*>& edges) { + if (tree->height() == 0) { + for (CordRep* edge : tree->Edges()) { + edges.push_back(edge); + } + } else { + for (CordRep* edge : tree->Edges()) { + GetLeafEdges(edge->btree(), edges); + } + } +} + +// Recursively collects and returns all leaf edges from `tree`. +std::vector<CordRep*> GetLeafEdges(const CordRepBtree* tree) { + std::vector<CordRep*> edges; + GetLeafEdges(tree, edges); + return edges; +} + +// Creates a flat containing the hexadecimal value of `i` zero padded +// to at least 4 digits prefixed with "0x", e.g.: "0x04AC". +CordRepFlat* MakeHexFlat(size_t i) { + return MakeFlat(absl::StrCat("0x", absl::Hex(i, absl::kZeroPad4))); +} + +CordRepBtree* MakeLeaf(size_t size = CordRepBtree::kMaxCapacity) { + assert(size <= CordRepBtree::kMaxCapacity); + CordRepBtree* leaf = CordRepBtree::Create(MakeHexFlat(0)); + for (size_t i = 1; i < size; ++i) { + leaf = CordRepBtree::Append(leaf, MakeHexFlat(i)); + } + return leaf; +} + +CordRepBtree* MakeTree(size_t size, bool append = true) { + CordRepBtree* tree = CordRepBtree::Create(MakeHexFlat(0)); + for (size_t i = 1; i < size; ++i) { + tree = append ? CordRepBtree::Append(tree, MakeHexFlat(i)) + : CordRepBtree::Prepend(tree, MakeHexFlat(i)); + } + return tree; +} + +CordRepBtree* CreateTree(absl::Span<CordRep* const> reps) { + auto it = reps.begin(); + CordRepBtree* tree = CordRepBtree::Create(*it); + while (++it != reps.end()) tree = CordRepBtree::Append(tree, *it); + return tree; +} + +CordRepBtree* CreateTree(absl::string_view data, size_t chunk_size) { + return CreateTree(CreateFlatsFromString(data, chunk_size)); +} + +CordRepBtree* CreateTreeReverse(absl::string_view data, size_t chunk_size) { + std::vector<CordRep*> flats = CreateFlatsFromString(data, chunk_size); + auto rit = flats.rbegin(); + CordRepBtree* tree = CordRepBtree::Create(*rit); + while (++rit != flats.rend()) tree = CordRepBtree::Prepend(tree, *rit); + return tree; +} + +class CordRepBtreeTest : public testing::TestWithParam<bool> { + public: + bool shared() const { return GetParam(); } + + static std::string ToString(testing::TestParamInfo<bool> param) { + return param.param ? "Shared" : "Private"; + } +}; + +INSTANTIATE_TEST_SUITE_P(WithParam, CordRepBtreeTest, testing::Bool(), + CordRepBtreeTest::ToString); + +class CordRepBtreeHeightTest : public testing::TestWithParam<int> { + public: + int height() const { return GetParam(); } + + static std::string ToString(testing::TestParamInfo<int> param) { + return absl::StrCat(param.param); + } +}; + +INSTANTIATE_TEST_SUITE_P(WithHeights, CordRepBtreeHeightTest, + testing::Range(0, CordRepBtree::kMaxHeight), + CordRepBtreeHeightTest::ToString); + +using TwoBools = testing::tuple<bool, bool>; + +class CordRepBtreeDualTest : public testing::TestWithParam<TwoBools> { + public: + bool first_shared() const { return std::get<0>(GetParam()); } + bool second_shared() const { return std::get<1>(GetParam()); } + + static std::string ToString(testing::TestParamInfo<TwoBools> param) { + if (std::get<0>(param.param)) { + return std::get<1>(param.param) ? "BothShared" : "FirstShared"; + } + return std::get<1>(param.param) ? "SecondShared" : "Private"; + } +}; + +INSTANTIATE_TEST_SUITE_P(WithParam, CordRepBtreeDualTest, + testing::Combine(testing::Bool(), testing::Bool()), + CordRepBtreeDualTest::ToString); + +TEST(CordRepBtreeTest, SizeIsMultipleOf64) { + // Only enforce for fully 64-bit platforms. + if (sizeof(size_t) == 8 && sizeof(void*) == 8) { + EXPECT_THAT(sizeof(CordRepBtree) % 64, Eq(0)) << "Should be multiple of 64"; + } +} + +TEST(CordRepBtreeTest, NewDestroyEmptyTree) { + auto* tree = CordRepBtree::New(); + EXPECT_THAT(tree->size(), Eq(0)); + EXPECT_THAT(tree->height(), Eq(0)); + EXPECT_THAT(tree->Edges(), ElementsAre()); + CordRepBtree::Destroy(tree); +} + +TEST(CordRepBtreeTest, NewDestroyEmptyTreeAtHeight) { + auto* tree = CordRepBtree::New(3); + EXPECT_THAT(tree->size(), Eq(0)); + EXPECT_THAT(tree->height(), Eq(3)); + EXPECT_THAT(tree->Edges(), ElementsAre()); + CordRepBtree::Destroy(tree); +} + +TEST(CordRepBtreeTest, Btree) { + CordRep* rep = CordRepBtree::New(); + EXPECT_THAT(rep->btree(), Eq(rep)); + EXPECT_THAT(static_cast<const CordRep*>(rep)->btree(), Eq(rep)); + CordRep::Unref(rep); +#if defined(GTEST_HAS_DEATH_TEST) && !defined(NDEBUG) + rep = MakeFlat("Hello world"); + EXPECT_DEATH(rep->btree(), ".*"); + EXPECT_DEATH(static_cast<const CordRep*>(rep)->btree(), ".*"); + CordRep::Unref(rep); +#endif +} + +TEST(CordRepBtreeTest, EdgeData) { + CordRepFlat* flat = MakeFlat("Hello world"); + CordRepExternal* external = MakeExternal("Hello external"); + CordRep* substr1 = MakeSubstring(1, 6, CordRep::Ref(flat)); + CordRep* substr2 = MakeSubstring(1, 6, CordRep::Ref(external)); + CordRep* concat = MakeConcat(CordRep::Ref(flat), CordRep::Ref(external)); + CordRep* bad_substr = MakeSubstring(1, 2, CordRep::Ref(substr1)); + + EXPECT_TRUE(CordRepBtree::IsDataEdge(flat)); + EXPECT_THAT(CordRepBtree::EdgeDataPtr(flat), + TypedEq<const void*>(flat->Data())); + EXPECT_THAT(CordRepBtree::EdgeData(flat), Eq("Hello world")); + + EXPECT_TRUE(CordRepBtree::IsDataEdge(external)); + EXPECT_THAT(CordRepBtree::EdgeDataPtr(external), + TypedEq<const void*>(external->base)); + EXPECT_THAT(CordRepBtree::EdgeData(external), Eq("Hello external")); + + EXPECT_TRUE(CordRepBtree::IsDataEdge(substr1)); + EXPECT_THAT(CordRepBtree::EdgeDataPtr(substr1), + TypedEq<const void*>(flat->Data() + 1)); + EXPECT_THAT(CordRepBtree::EdgeData(substr1), Eq("ello w")); + + EXPECT_TRUE(CordRepBtree::IsDataEdge(substr2)); + EXPECT_THAT(CordRepBtree::EdgeDataPtr(substr2), + TypedEq<const void*>(external->base + 1)); + EXPECT_THAT(CordRepBtree::EdgeData(substr2), Eq("ello e")); + + EXPECT_FALSE(CordRepBtree::IsDataEdge(concat)); + EXPECT_FALSE(CordRepBtree::IsDataEdge(bad_substr)); +#if defined(GTEST_HAS_DEATH_TEST) && !defined(NDEBUG) + EXPECT_DEATH(CordRepBtree::EdgeData(concat), ".*"); + EXPECT_DEATH(CordRepBtree::EdgeDataPtr(concat), ".*"); + EXPECT_DEATH(CordRepBtree::EdgeData(bad_substr), ".*"); + EXPECT_DEATH(CordRepBtree::EdgeDataPtr(bad_substr), ".*"); +#endif + + CordRep::Unref(bad_substr); + CordRep::Unref(concat); + CordRep::Unref(substr2); + CordRep::Unref(substr1); + CordRep::Unref(external); + CordRep::Unref(flat); +} + +TEST(CordRepBtreeTest, CreateUnrefLeaf) { + auto* flat = MakeFlat("a"); + auto* leaf = CordRepBtree::Create(flat); + EXPECT_THAT(leaf->size(), Eq(1)); + EXPECT_THAT(leaf->height(), Eq(0)); + EXPECT_THAT(leaf->Edges(), ElementsAre(flat)); + CordRepBtree::Unref(leaf); +} + +TEST(CordRepBtreeTest, NewUnrefNode) { + auto* leaf = CordRepBtree::Create(MakeFlat("a")); + CordRepBtree* tree = CordRepBtree::New(leaf); + EXPECT_THAT(tree->size(), Eq(1)); + EXPECT_THAT(tree->height(), Eq(1)); + EXPECT_THAT(tree->Edges(), ElementsAre(leaf)); + CordRepBtree::Unref(tree); +} + +TEST_P(CordRepBtreeTest, AppendToLeafToCapacity) { + AutoUnref refs; + std::vector<CordRep*> flats; + flats.push_back(MakeHexFlat(0)); + auto* leaf = CordRepBtree::Create(flats.back()); + + for (size_t i = 1; i < CordRepBtree::kMaxCapacity; ++i) { + refs.RefIf(shared(), leaf); + flats.push_back(MakeHexFlat(i)); + auto* result = CordRepBtree::Append(leaf, flats.back()); + EXPECT_THAT(result->height(), Eq(0)); + EXPECT_THAT(result, Conditional(shared(), Ne(leaf), Eq(leaf))); + EXPECT_THAT(result->Edges(), ElementsAreArray(flats)); + leaf = result; + } + CordRep::Unref(leaf); +} + +TEST_P(CordRepBtreeTest, PrependToLeafToCapacity) { + AutoUnref refs; + std::deque<CordRep*> flats; + flats.push_front(MakeHexFlat(0)); + auto* leaf = CordRepBtree::Create(flats.front()); + + for (size_t i = 1; i < CordRepBtree::kMaxCapacity; ++i) { + refs.RefIf(shared(), leaf); + flats.push_front(MakeHexFlat(i)); + auto* result = CordRepBtree::Prepend(leaf, flats.front()); + EXPECT_THAT(result->height(), Eq(0)); + EXPECT_THAT(result, Conditional(shared(), Ne(leaf), Eq(leaf))); + EXPECT_THAT(result->Edges(), ElementsAreArray(flats)); + leaf = result; + } + CordRep::Unref(leaf); +} + +// This test specifically aims at code aligning data at either the front or the +// back of the contained `edges[]` array, alternating Append and Prepend will +// move `begin()` and `end()` values as needed for each added value. +TEST_P(CordRepBtreeTest, AppendPrependToLeafToCapacity) { + AutoUnref refs; + std::deque<CordRep*> flats; + flats.push_front(MakeHexFlat(0)); + auto* leaf = CordRepBtree::Create(flats.front()); + + for (size_t i = 1; i < CordRepBtree::kMaxCapacity; ++i) { + refs.RefIf(shared(), leaf); + CordRepBtree* result; + if (i % 2 != 0) { + flats.push_front(MakeHexFlat(i)); + result = CordRepBtree::Prepend(leaf, flats.front()); + } else { + flats.push_back(MakeHexFlat(i)); + result = CordRepBtree::Append(leaf, flats.back()); + } + EXPECT_THAT(result->height(), Eq(0)); + EXPECT_THAT(result, Conditional(shared(), Ne(leaf), Eq(leaf))); + EXPECT_THAT(result->Edges(), ElementsAreArray(flats)); + leaf = result; + } + CordRep::Unref(leaf); +} + +TEST_P(CordRepBtreeTest, AppendToLeafBeyondCapacity) { + AutoUnref refs; + auto* leaf = MakeLeaf(); + refs.RefIf(shared(), leaf); + CordRep* flat = MakeFlat("abc"); + auto* result = CordRepBtree::Append(leaf, flat); + ASSERT_THAT(result, IsNode(1)); + EXPECT_THAT(result, Ne(leaf)); + absl::Span<CordRep* const> edges = result->Edges(); + ASSERT_THAT(edges, ElementsAre(leaf, IsNode(0))); + EXPECT_THAT(edges[1]->btree()->Edges(), ElementsAre(flat)); + CordRep::Unref(result); +} + +TEST_P(CordRepBtreeTest, PrependToLeafBeyondCapacity) { + AutoUnref refs; + auto* leaf = MakeLeaf(); + refs.RefIf(shared(), leaf); + CordRep* flat = MakeFlat("abc"); + auto* result = CordRepBtree::Prepend(leaf, flat); + ASSERT_THAT(result, IsNode(1)); + EXPECT_THAT(result, Ne(leaf)); + absl::Span<CordRep* const> edges = result->Edges(); + ASSERT_THAT(edges, ElementsAre(IsNode(0), leaf)); + EXPECT_THAT(edges[0]->btree()->Edges(), ElementsAre(flat)); + CordRep::Unref(result); +} + +TEST_P(CordRepBtreeTest, AppendToTreeOneDeep) { + constexpr size_t max_cap = CordRepBtree::kMaxCapacity; + AutoUnref refs; + std::vector<CordRep*> flats; + flats.push_back(MakeHexFlat(0)); + CordRepBtree* tree = CordRepBtree::Create(flats.back()); + for (size_t i = 1; i <= max_cap; ++i) { + flats.push_back(MakeHexFlat(i)); + tree = CordRepBtree::Append(tree, flats.back()); + } + ASSERT_THAT(tree, IsNode(1)); + + for (size_t i = max_cap + 1; i < max_cap * max_cap; ++i) { + // Ref top level tree based on param. + // Ref leaf node once every 4 iterations, which should not have an + // observable effect other than that the leaf itself is copied. + refs.RefIf(shared(), tree); + refs.RefIf(i % 4 == 0, tree->Edges().back()); + + flats.push_back(MakeHexFlat(i)); + CordRepBtree* result = CordRepBtree::Append(tree, flats.back()); + ASSERT_THAT(result, IsNode(1)); + ASSERT_THAT(result, Conditional(shared(), Ne(tree), Eq(tree))); + std::vector<CordRep*> edges = GetLeafEdges(result); + ASSERT_THAT(edges, ElementsAreArray(flats)); + tree = result; + } + CordRep::Unref(tree); +} + +TEST_P(CordRepBtreeTest, AppendToTreeTwoDeep) { + constexpr size_t max_cap = CordRepBtree::kMaxCapacity; + AutoUnref refs; + std::vector<CordRep*> flats; + flats.push_back(MakeHexFlat(0)); + CordRepBtree* tree = CordRepBtree::Create(flats.back()); + for (size_t i = 1; i <= max_cap * max_cap; ++i) { + flats.push_back(MakeHexFlat(i)); + tree = CordRepBtree::Append(tree, flats.back()); + } + ASSERT_THAT(tree, IsNode(2)); + for (size_t i = max_cap * max_cap + 1; i < max_cap * max_cap * max_cap; ++i) { + // Ref top level tree based on param. + // Ref child node once every 16 iterations, and leaf node every 4 + // iterrations which which should not have an observable effect other than + // the node and/or the leaf below it being copied. + refs.RefIf(shared(), tree); + refs.RefIf(i % 16 == 0, tree->Edges().back()); + refs.RefIf(i % 4 == 0, tree->Edges().back()->btree()->Edges().back()); + + flats.push_back(MakeHexFlat(i)); + CordRepBtree* result = CordRepBtree::Append(tree, flats.back()); + ASSERT_THAT(result, IsNode(2)); + ASSERT_THAT(result, Conditional(shared(), Ne(tree), Eq(tree))); + std::vector<CordRep*> edges = GetLeafEdges(result); + ASSERT_THAT(edges, ElementsAreArray(flats)); + tree = result; + } + CordRep::Unref(tree); +} + +TEST_P(CordRepBtreeTest, PrependToTreeOneDeep) { + constexpr size_t max_cap = CordRepBtree::kMaxCapacity; + AutoUnref refs; + std::deque<CordRep*> flats; + flats.push_back(MakeHexFlat(0)); + CordRepBtree* tree = CordRepBtree::Create(flats.back()); + for (size_t i = 1; i <= max_cap; ++i) { + flats.push_front(MakeHexFlat(i)); + tree = CordRepBtree::Prepend(tree, flats.front()); + } + ASSERT_THAT(tree, IsNode(1)); + + for (size_t i = max_cap + 1; i < max_cap * max_cap; ++i) { + // Ref top level tree based on param. + // Ref leaf node once every 4 iterations which should not have an observable + // effect other than than the leaf itself is copied. + refs.RefIf(shared(), tree); + refs.RefIf(i % 4 == 0, tree->Edges().back()); + + flats.push_front(MakeHexFlat(i)); + CordRepBtree* result = CordRepBtree::Prepend(tree, flats.front()); + ASSERT_THAT(result, IsNode(1)); + ASSERT_THAT(result, Conditional(shared(), Ne(tree), Eq(tree))); + std::vector<CordRep*> edges = GetLeafEdges(result); + ASSERT_THAT(edges, ElementsAreArray(flats)); + tree = result; + } + CordRep::Unref(tree); +} + +TEST_P(CordRepBtreeTest, PrependToTreeTwoDeep) { + constexpr size_t max_cap = CordRepBtree::kMaxCapacity; + AutoUnref refs; + std::deque<CordRep*> flats; + flats.push_back(MakeHexFlat(0)); + CordRepBtree* tree = CordRepBtree::Create(flats.back()); + for (size_t i = 1; i <= max_cap * max_cap; ++i) { + flats.push_front(MakeHexFlat(i)); + tree = CordRepBtree::Prepend(tree, flats.front()); + } + ASSERT_THAT(tree, IsNode(2)); + for (size_t i = max_cap * max_cap + 1; i < max_cap * max_cap * max_cap; ++i) { + // Ref top level tree based on param. + // Ref child node once every 16 iterations, and leaf node every 4 + // iterrations which which should not have an observable effect other than + // the node and/or the leaf below it being copied. + refs.RefIf(shared(), tree); + refs.RefIf(i % 16 == 0, tree->Edges().back()); + refs.RefIf(i % 4 == 0, tree->Edges().back()->btree()->Edges().back()); + + flats.push_front(MakeHexFlat(i)); + CordRepBtree* result = CordRepBtree::Prepend(tree, flats.front()); + ASSERT_THAT(result, IsNode(2)); + ASSERT_THAT(result, Conditional(shared(), Ne(tree), Eq(tree))); + std::vector<CordRep*> edges = GetLeafEdges(result); + ASSERT_THAT(edges, ElementsAreArray(flats)); + tree = result; + } + CordRep::Unref(tree); +} + +TEST_P(CordRepBtreeDualTest, MergeLeafsNotExceedingCapacity) { + for (bool use_append : {false, true}) { + SCOPED_TRACE(use_append ? "Using Append" : "Using Prepend"); + + AutoUnref refs; + std::vector<CordRep*> flats; + + // Build `left` side leaf appending all contained flats to `flats` + CordRepBtree* left = MakeLeaf(3); + GetLeafEdges(left, flats); + refs.RefIf(first_shared(), left); + + // Build `right` side leaf appending all contained flats to `flats` + CordRepBtree* right = MakeLeaf(2); + GetLeafEdges(right, flats); + refs.RefIf(second_shared(), right); + + CordRepBtree* tree = use_append ? CordRepBtree::Append(left, right) + : CordRepBtree::Prepend(right, left); + EXPECT_THAT(tree, IsNode(0)); + + // `tree` contains all flats originally belonging to `left` and `right`. + EXPECT_THAT(tree->Edges(), ElementsAreArray(flats)); + CordRepBtree::Unref(tree); + } +} + +TEST_P(CordRepBtreeDualTest, MergeLeafsExceedingCapacity) { + for (bool use_append : {false, true}) { + SCOPED_TRACE(use_append ? "Using Append" : "Using Prepend"); + + AutoUnref refs; + + // Build `left` side tree appending all contained flats to `flats` + CordRepBtree* left = MakeLeaf(CordRepBtree::kMaxCapacity - 2); + refs.RefIf(first_shared(), left); + + // Build `right` side tree appending all contained flats to `flats` + CordRepBtree* right = MakeLeaf(CordRepBtree::kMaxCapacity - 1); + refs.RefIf(second_shared(), right); + + CordRepBtree* tree = use_append ? CordRepBtree::Append(left, right) + : CordRepBtree::Prepend(right, left); + EXPECT_THAT(tree, IsNode(1)); + EXPECT_THAT(tree->Edges(), ElementsAre(left, right)); + CordRepBtree::Unref(tree); + } +} + +TEST_P(CordRepBtreeDualTest, MergeEqualHeightTrees) { + for (bool use_append : {false, true}) { + SCOPED_TRACE(use_append ? "Using Append" : "Using Prepend"); + + AutoUnref refs; + std::vector<CordRep*> flats; + + // Build `left` side tree appending all contained flats to `flats` + CordRepBtree* left = MakeTree(CordRepBtree::kMaxCapacity * 3); + GetLeafEdges(left, flats); + refs.RefIf(first_shared(), left); + + // Build `right` side tree appending all contained flats to `flats` + CordRepBtree* right = MakeTree(CordRepBtree::kMaxCapacity * 2); + GetLeafEdges(right, flats); + refs.RefIf(second_shared(), right); + + CordRepBtree* tree = use_append ? CordRepBtree::Append(left, right) + : CordRepBtree::Prepend(right, left); + EXPECT_THAT(tree, IsNode(1)); + EXPECT_THAT(tree->Edges(), SizeIs(5)); + + // `tree` contains all flats originally belonging to `left` and `right`. + EXPECT_THAT(GetLeafEdges(tree), ElementsAreArray(flats)); + CordRepBtree::Unref(tree); + } +} + +TEST_P(CordRepBtreeDualTest, MergeLeafWithTreeNotExceedingLeafCapacity) { + for (bool use_append : {false, true}) { + SCOPED_TRACE(use_append ? "Using Append" : "Using Prepend"); + + AutoUnref refs; + std::vector<CordRep*> flats; + + // Build `left` side tree appending all added flats to `flats` + CordRepBtree* left = MakeTree(CordRepBtree::kMaxCapacity * 2 + 2); + GetLeafEdges(left, flats); + refs.RefIf(first_shared(), left); + + // Build `right` side tree appending all added flats to `flats` + CordRepBtree* right = MakeTree(3); + GetLeafEdges(right, flats); + refs.RefIf(second_shared(), right); + + CordRepBtree* tree = use_append ? CordRepBtree::Append(left, right) + : CordRepBtree::Prepend(right, left); + EXPECT_THAT(tree, IsNode(1)); + EXPECT_THAT(tree->Edges(), SizeIs(3)); + + // `tree` contains all flats originally belonging to `left` and `right`. + EXPECT_THAT(GetLeafEdges(tree), ElementsAreArray(flats)); + CordRepBtree::Unref(tree); + } +} + +TEST_P(CordRepBtreeDualTest, MergeLeafWithTreeExceedingLeafCapacity) { + for (bool use_append : {false, true}) { + SCOPED_TRACE(use_append ? "Using Append" : "Using Prepend"); + + AutoUnref refs; + std::vector<CordRep*> flats; + + // Build `left` side tree appending all added flats to `flats` + CordRepBtree* left = MakeTree(CordRepBtree::kMaxCapacity * 3 - 2); + GetLeafEdges(left, flats); + refs.RefIf(first_shared(), left); + + // Build `right` side tree appending all added flats to `flats` + CordRepBtree* right = MakeTree(3); + GetLeafEdges(right, flats); + refs.RefIf(second_shared(), right); + + CordRepBtree* tree = use_append ? CordRepBtree::Append(left, right) + : CordRepBtree::Prepend(right, left); + EXPECT_THAT(tree, IsNode(1)); + EXPECT_THAT(tree->Edges(), SizeIs(4)); + + // `tree` contains all flats originally belonging to `left` and `right`. + EXPECT_THAT(GetLeafEdges(tree), ElementsAreArray(flats)); + CordRepBtree::Unref(tree); + } +} + +void RefEdgesAt(size_t depth, AutoUnref& refs, CordRepBtree* tree) { + absl::Span<CordRep* const> edges = tree->Edges(); + if (depth == 0) { + refs.Ref(edges.front()); + refs.Ref(edges.back()); + } else { + assert(tree->height() > 0); + RefEdgesAt(depth - 1, refs, edges.front()->btree()); + RefEdgesAt(depth - 1, refs, edges.back()->btree()); + } +} + +TEST(CordRepBtreeTest, MergeFuzzTest) { + constexpr size_t max_cap = CordRepBtree::kMaxCapacity; + std::minstd_rand rnd; + std::uniform_int_distribution<int> coin_flip(0, 1); + std::uniform_int_distribution<int> dice_throw(1, 6); + + auto random_leaf_count = [&]() { + std::uniform_int_distribution<int> dist_height(0, 3); + std::uniform_int_distribution<int> dist_leaf(0, max_cap - 1); + const size_t height = dist_height(rnd); + return (height ? pow(max_cap, height) : 0) + dist_leaf(rnd); + }; + + for (int i = 0; i < 10000; ++i) { + AutoUnref refs; + std::vector<CordRep*> flats; + + CordRepBtree* left = MakeTree(random_leaf_count(), coin_flip(rnd)); + GetLeafEdges(left, flats); + if (dice_throw(rnd) == 1) { + std::uniform_int_distribution<int> dist(0, left->height()); + RefEdgesAt(dist(rnd), refs, left); + } + + CordRepBtree* right = MakeTree(random_leaf_count(), coin_flip(rnd)); + GetLeafEdges(right, flats); + if (dice_throw(rnd) == 1) { + std::uniform_int_distribution<int> dist(0, right->height()); + RefEdgesAt(dist(rnd), refs, right); + } + + CordRepBtree* tree = CordRepBtree::Append(left, right); + EXPECT_THAT(GetLeafEdges(tree), ElementsAreArray(flats)); + CordRepBtree::Unref(tree); + } +} + +TEST_P(CordRepBtreeTest, RemoveSuffix) { + // Create tree of 1, 2 and 3 levels high + constexpr size_t max_cap = CordRepBtree::kMaxCapacity; + for (size_t cap : {max_cap - 1, max_cap * 2, max_cap * max_cap * 2}) { + const std::string data = CreateRandomString(cap * 512); + + { + // Verify RemoveSuffix(<all>) + AutoUnref refs; + CordRepBtree* node = refs.RefIf(shared(), CreateTree(data, 512)); + EXPECT_THAT(CordRepBtree::RemoveSuffix(node, data.length()), Eq(nullptr)); + + // Verify RemoveSuffix(<none>) + node = refs.RefIf(shared(), CreateTree(data, 512)); + EXPECT_THAT(CordRepBtree::RemoveSuffix(node, 0), Eq(node)); + CordRep::Unref(node); + } + + for (int n = 1; n < data.length(); ++n) { + AutoUnref refs; + auto flats = CreateFlatsFromString(data, 512); + CordRepBtree* node = refs.RefIf(shared(), CreateTree(flats)); + CordRep* rep = refs.Add(CordRepBtree::RemoveSuffix(node, n)); + EXPECT_THAT(CordToString(rep), Eq(data.substr(0, data.length() - n))); + + // Collect all flats + auto is_flat = [](CordRep* rep) { return rep->tag >= FLAT; }; + std::vector<CordRep*> edges = CordCollectRepsIf(is_flat, rep); + ASSERT_THAT(edges.size(), Le(flats.size())); + + // Isolate last edge + CordRep* last_edge = edges.back(); + edges.pop_back(); + const size_t last_length = rep->length - edges.size() * 512; + + // All flats except the last edge must be kept or copied 'as is' + int index = 0; + for (CordRep* edge : edges) { + ASSERT_THAT(edge, Eq(flats[index++])); + ASSERT_THAT(edge->length, Eq(512)); + } + + // CordRepBtree may optimize small substrings to avoid waste, so only + // check for flat sharing / updates where the code should always do this. + if (last_length >= 500) { + EXPECT_THAT(last_edge, Eq(flats[index++])); + if (shared()) { + EXPECT_THAT(last_edge->length, Eq(512)); + } else { + EXPECT_TRUE(last_edge->refcount.IsOne()); + EXPECT_THAT(last_edge->length, Eq(last_length)); + } + } + } + } +} + +TEST(CordRepBtreeTest, SubTree) { + // Create tree of at least 2 levels high + constexpr size_t max_cap = CordRepBtree::kMaxCapacity; + const size_t n = max_cap * max_cap * 2; + const std::string data = CreateRandomString(n * 3); + std::vector<CordRep*> flats; + for (absl::string_view s = data; !s.empty(); s.remove_prefix(3)) { + flats.push_back(MakeFlat(s.substr(0, 3))); + } + CordRepBtree* node = CordRepBtree::Create(CordRep::Ref(flats[0])); + for (size_t i = 1; i < flats.size(); ++i) { + node = CordRepBtree::Append(node, CordRep::Ref(flats[i])); + } + + for (int offset = 0; offset < data.length(); ++offset) { + for (int length = 1; length <= data.length() - offset; ++length) { + CordRep* rep = node->SubTree(offset, length); + EXPECT_THAT(CordToString(rep), Eq(data.substr(offset, length))); + CordRep::Unref(rep); + } + } + CordRepBtree::Unref(node); + for (CordRep* rep : flats) { + CordRep::Unref(rep); + } +} + +TEST(CordRepBtreeTest, SubTreeOnExistingSubstring) { + // This test verifies that a SubTree call on a pre-existing (large) substring + // adjusts the existing substring if not shared, and else rewrites the + // existing substring. + AutoUnref refs; + std::string data = CreateRandomString(1000); + CordRepBtree* leaf = CordRepBtree::Create(MakeFlat("abc")); + CordRep* flat = MakeFlat(data); + leaf = CordRepBtree::Append(leaf, flat); + + // Setup tree containing substring. + CordRep* result = leaf->SubTree(0, 3 + 990); + ASSERT_THAT(result->tag, Eq(BTREE)); + CordRep::Unref(leaf); + leaf = result->btree(); + ASSERT_THAT(leaf->Edges(), ElementsAre(_, IsSubstring(0, 990))); + EXPECT_THAT(leaf->Edges()[1]->substring()->child, Eq(flat)); + + // Verify substring of substring. + result = leaf->SubTree(3 + 5, 970); + ASSERT_THAT(result, IsSubstring(5, 970)); + EXPECT_THAT(result->substring()->child, Eq(flat)); + CordRep::Unref(result); + + CordRep::Unref(leaf); +} + +TEST_P(CordRepBtreeTest, AddDataToLeaf) { + const size_t n = CordRepBtree::kMaxCapacity; + const std::string data = CreateRandomString(n * 3); + + for (bool append : {true, false}) { + AutoUnref refs; + DataConsumer consumer(data, append); + SCOPED_TRACE(append ? "Append" : "Prepend"); + + CordRepBtree* leaf = CordRepBtree::Create(MakeFlat(consumer.Next(3))); + for (size_t i = 1; i < n; ++i) { + refs.RefIf(shared(), leaf); + CordRepBtree* result = BtreeAdd(leaf, append, consumer.Next(3)); + EXPECT_THAT(result, Conditional(shared(), Ne(leaf), Eq(leaf))); + EXPECT_THAT(CordToString(result), Eq(consumer.Consumed())); + leaf = result; + } + CordRep::Unref(leaf); + } +} + +TEST_P(CordRepBtreeTest, AppendDataToTree) { + AutoUnref refs; + size_t n = CordRepBtree::kMaxCapacity + CordRepBtree::kMaxCapacity / 2; + std::string data = CreateRandomString(n * 3); + CordRepBtree* tree = refs.RefIf(shared(), CreateTree(data, 3)); + CordRepBtree* leaf0 = tree->Edges()[0]->btree(); + CordRepBtree* leaf1 = tree->Edges()[1]->btree(); + CordRepBtree* result = CordRepBtree::Append(tree, "123456789"); + EXPECT_THAT(result, Conditional(shared(), Ne(tree), Eq(tree))); + EXPECT_THAT(result->Edges(), + ElementsAre(leaf0, Conditional(shared(), Ne(leaf1), Eq(leaf1)))); + EXPECT_THAT(CordToString(result), Eq(data + "123456789")); + CordRep::Unref(result); +} + +TEST_P(CordRepBtreeTest, PrependDataToTree) { + AutoUnref refs; + size_t n = CordRepBtree::kMaxCapacity + CordRepBtree::kMaxCapacity / 2; + std::string data = CreateRandomString(n * 3); + CordRepBtree* tree = refs.RefIf(shared(), CreateTreeReverse(data, 3)); + CordRepBtree* leaf0 = tree->Edges()[0]->btree(); + CordRepBtree* leaf1 = tree->Edges()[1]->btree(); + CordRepBtree* result = CordRepBtree::Prepend(tree, "123456789"); + EXPECT_THAT(result, Conditional(shared(), Ne(tree), Eq(tree))); + EXPECT_THAT(result->Edges(), + ElementsAre(Conditional(shared(), Ne(leaf0), Eq(leaf0)), leaf1)); + EXPECT_THAT(CordToString(result), Eq("123456789" + data)); + CordRep::Unref(result); +} + +TEST_P(CordRepBtreeTest, AddDataToTreeThreeLevelsDeep) { + constexpr size_t max_cap = CordRepBtree::kMaxCapacity; + const size_t n = max_cap * max_cap * max_cap; + const std::string data = CreateRandomString(n * 3); + + for (bool append : {true, false}) { + AutoUnref refs; + DataConsumer consumer(data, append); + SCOPED_TRACE(append ? "Append" : "Prepend"); + + // Fill leaf + CordRepBtree* tree = CordRepBtree::Create(MakeFlat(consumer.Next(3))); + for (size_t i = 1; i < max_cap; ++i) { + tree = BtreeAdd(tree, append, consumer.Next(3)); + } + ASSERT_THAT(CordToString(tree), Eq(consumer.Consumed())); + + // Fill to maximum at one deep + refs.RefIf(shared(), tree); + CordRepBtree* result = BtreeAdd(tree, append, consumer.Next(3)); + ASSERT_THAT(result, IsNode(1)); + ASSERT_THAT(result, Ne(tree)); + ASSERT_THAT(CordToString(result), Eq(consumer.Consumed())); + tree = result; + for (size_t i = max_cap + 1; i < max_cap * max_cap; ++i) { + refs.RefIf(shared(), tree); + result = BtreeAdd(tree, append, consumer.Next(3)); + ASSERT_THAT(result, Conditional(shared(), Ne(tree), Eq(tree))); + ASSERT_THAT(CordToString(result), Eq(consumer.Consumed())); + tree = result; + } + + // Fill to maximum at two deep + refs.RefIf(shared(), tree); + result = BtreeAdd(tree, append, consumer.Next(3)); + ASSERT_THAT(result, IsNode(2)); + ASSERT_THAT(result, Ne(tree)); + ASSERT_THAT(CordToString(result), Eq(consumer.Consumed())); + tree = result; + for (size_t i = max_cap * max_cap + 1; i < max_cap * max_cap * max_cap; + ++i) { + refs.RefIf(shared(), tree); + result = BtreeAdd(tree, append, consumer.Next(3)); + ASSERT_THAT(result, Conditional(shared(), Ne(tree), Eq(tree))); + ASSERT_THAT(CordToString(result), Eq(consumer.Consumed())); + tree = result; + } + + CordRep::Unref(tree); + } +} + +TEST_P(CordRepBtreeTest, AddLargeDataToLeaf) { + const size_t max_cap = CordRepBtree::kMaxCapacity; + const size_t n = max_cap * max_cap * max_cap * 3 + 2; + const std::string data = CreateRandomString(n * kMaxFlatLength); + + for (bool append : {true, false}) { + AutoUnref refs; + SCOPED_TRACE(append ? "Append" : "Prepend"); + + CordRepBtree* leaf = CordRepBtree::Create(MakeFlat("abc")); + refs.RefIf(shared(), leaf); + CordRepBtree* result = BtreeAdd(leaf, append, data); + EXPECT_THAT(CordToString(result), Eq(append ? "abc" + data : data + "abc")); + CordRep::Unref(result); + } +} + +TEST_P(CordRepBtreeDualTest, CreateFromConcat) { + AutoUnref refs; + CordRep* flats[] = {MakeFlat("abcdefgh"), MakeFlat("ijklm"), + MakeFlat("nopqrstuv"), MakeFlat("wxyz")}; + auto* left = MakeConcat(flats[0], flats[1]); + auto* right = MakeConcat(flats[2], refs.RefIf(first_shared(), flats[3])); + auto* concat = refs.RefIf(second_shared(), MakeConcat(left, right)); + CordRepBtree* result = CordRepBtree::Create(concat); + ASSERT_TRUE(CordRepBtree::IsValid(result)); + EXPECT_THAT(result->length, Eq(26)); + EXPECT_THAT(CordToString(result), Eq("abcdefghijklmnopqrstuvwxyz")); + CordRep::Unref(result); +} + +TEST_P(CordRepBtreeDualTest, AppendConcat) { + AutoUnref refs; + CordRep* flats[] = {MakeFlat("defgh"), MakeFlat("ijklm"), + MakeFlat("nopqrstuv"), MakeFlat("wxyz")}; + auto* left = MakeConcat(flats[0], flats[1]); + auto* right = MakeConcat(flats[2], refs.RefIf(first_shared(), flats[3])); + auto* concat = refs.RefIf(second_shared(), MakeConcat(left, right)); + CordRepBtree* result = CordRepBtree::Create(MakeFlat("abc")); + result = CordRepBtree::Append(result, concat); + ASSERT_TRUE(CordRepBtree::IsValid(result)); + EXPECT_THAT(result->length, Eq(26)); + EXPECT_THAT(CordToString(result), Eq("abcdefghijklmnopqrstuvwxyz")); + CordRep::Unref(result); +} + +TEST_P(CordRepBtreeDualTest, PrependConcat) { + AutoUnref refs; + CordRep* flats[] = {MakeFlat("abcdefgh"), MakeFlat("ijklm"), + MakeFlat("nopqrstuv"), MakeFlat("wx")}; + auto* left = MakeConcat(flats[0], flats[1]); + auto* right = MakeConcat(flats[2], refs.RefIf(first_shared(), flats[3])); + auto* concat = refs.RefIf(second_shared(), MakeConcat(left, right)); + CordRepBtree* result = CordRepBtree::Create(MakeFlat("yz")); + result = CordRepBtree::Prepend(result, concat); + ASSERT_TRUE(CordRepBtree::IsValid(result)); + EXPECT_THAT(result->length, Eq(26)); + EXPECT_THAT(CordToString(result), Eq("abcdefghijklmnopqrstuvwxyz")); + CordRep::Unref(result); +} + +TEST_P(CordRepBtreeTest, CreateFromTreeReturnsTree) { + AutoUnref refs; + CordRepBtree* leaf = CordRepBtree::Create(MakeFlat("Hello world")); + refs.RefIf(shared(), leaf); + CordRepBtree* result = CordRepBtree::Create(leaf); + EXPECT_THAT(result, Eq(leaf)); + CordRep::Unref(result); +} + +TEST(CordRepBtreeTest, GetCharacter) { + size_t n = CordRepBtree::kMaxCapacity * CordRepBtree::kMaxCapacity + 2; + std::string data = CreateRandomString(n * 3); + CordRepBtree* tree = CreateTree(data, 3); + // Add a substring node for good measure. + tree = tree->Append(tree, MakeSubstring(4, 5, MakeFlat("abcdefghijklm"))); + data += "efghi"; + for (size_t i = 0; i < data.length(); ++i) { + ASSERT_THAT(tree->GetCharacter(i), Eq(data[i])); + } + CordRep::Unref(tree); +} + +TEST_P(CordRepBtreeTest, IsFlatSingleFlat) { + CordRepBtree* leaf = CordRepBtree::Create(MakeFlat("Hello world")); + + absl::string_view fragment; + EXPECT_TRUE(leaf->IsFlat(nullptr)); + EXPECT_TRUE(leaf->IsFlat(&fragment)); + EXPECT_THAT(fragment, Eq("Hello world")); + fragment = ""; + EXPECT_TRUE(leaf->IsFlat(0, 11, nullptr)); + EXPECT_TRUE(leaf->IsFlat(0, 11, &fragment)); + EXPECT_THAT(fragment, Eq("Hello world")); + + // Arbitrary ranges must check true as well. + EXPECT_TRUE(leaf->IsFlat(1, 4, &fragment)); + EXPECT_THAT(fragment, Eq("ello")); + EXPECT_TRUE(leaf->IsFlat(6, 5, &fragment)); + EXPECT_THAT(fragment, Eq("world")); + + CordRep::Unref(leaf); +} + +TEST(CordRepBtreeTest, IsFlatMultiFlat) { + size_t n = CordRepBtree::kMaxCapacity * CordRepBtree::kMaxCapacity + 2; + std::string data = CreateRandomString(n * 3); + CordRepBtree* tree = CreateTree(data, 3); + // Add substring nodes for good measure. + tree = tree->Append(tree, MakeSubstring(4, 3, MakeFlat("abcdefghijklm"))); + tree = tree->Append(tree, MakeSubstring(8, 3, MakeFlat("abcdefghijklm"))); + data += "efgijk"; + + EXPECT_FALSE(tree->IsFlat(nullptr)); + absl::string_view fragment = "Can't touch this"; + EXPECT_FALSE(tree->IsFlat(&fragment)); + EXPECT_THAT(fragment, Eq("Can't touch this")); + + for (size_t offset = 0; offset < data.size(); offset += 3) { + EXPECT_TRUE(tree->IsFlat(offset, 3, nullptr)); + EXPECT_TRUE(tree->IsFlat(offset, 3, &fragment)); + EXPECT_THAT(fragment, Eq(data.substr(offset, 3))); + + fragment = "Can't touch this"; + if (offset > 0) { + EXPECT_FALSE(tree->IsFlat(offset - 1, 4, nullptr)); + EXPECT_FALSE(tree->IsFlat(offset - 1, 4, &fragment)); + EXPECT_THAT(fragment, Eq("Can't touch this")); + } + if (offset < data.size() - 4) { + EXPECT_FALSE(tree->IsFlat(offset, 4, nullptr)); + EXPECT_FALSE(tree->IsFlat(offset, 4, &fragment)); + EXPECT_THAT(fragment, Eq("Can't touch this")); + } + } + + CordRep::Unref(tree); +} + +#if defined(GTEST_HAS_DEATH_TEST) && !defined(NDEBUG) + +TEST_P(CordRepBtreeHeightTest, GetAppendBufferNotPrivate) { + CordRepBtree* tree = CordRepBtree::Create(MakeExternal("Foo")); + CordRepBtree::Ref(tree); + EXPECT_DEATH(tree->GetAppendBuffer(1), ".*"); + CordRepBtree::Unref(tree); + CordRepBtree::Unref(tree); +} + +#endif // defined(GTEST_HAS_DEATH_TEST) && !defined(NDEBUG) + +TEST_P(CordRepBtreeHeightTest, GetAppendBufferNotFlat) { + CordRepBtree* tree = CordRepBtree::Create(MakeExternal("Foo")); + for (int i = 1; i <= height(); ++i) { + tree = CordRepBtree::New(tree); + } + EXPECT_THAT(tree->GetAppendBuffer(1), SizeIs(0)); + CordRepBtree::Unref(tree); +} + +TEST_P(CordRepBtreeHeightTest, GetAppendBufferFlatNotPrivate) { + CordRepFlat* flat = MakeFlat("abc"); + CordRepBtree* tree = CordRepBtree::Create(CordRep::Ref(flat)); + for (int i = 1; i <= height(); ++i) { + tree = CordRepBtree::New(tree); + } + EXPECT_THAT(tree->GetAppendBuffer(1), SizeIs(0)); + CordRepBtree::Unref(tree); + CordRep::Unref(flat); +} + +TEST_P(CordRepBtreeHeightTest, GetAppendBufferTreeNotPrivate) { + if (height() == 0) return; + AutoUnref refs; + CordRepFlat* flat = MakeFlat("abc"); + CordRepBtree* tree = CordRepBtree::Create(CordRep::Ref(flat)); + for (int i = 1; i <= height(); ++i) { + if (i == (height() + 1) / 2) refs.Ref(tree); + tree = CordRepBtree::New(tree); + } + EXPECT_THAT(tree->GetAppendBuffer(1), SizeIs(0)); + CordRepBtree::Unref(tree); + CordRep::Unref(flat); +} + +TEST_P(CordRepBtreeHeightTest, GetAppendBufferFlatNoCapacity) { + CordRepFlat* flat = MakeFlat("abc"); + flat->length = flat->Capacity(); + CordRepBtree* tree = CordRepBtree::Create(flat); + for (int i = 1; i <= height(); ++i) { + tree = CordRepBtree::New(tree); + } + EXPECT_THAT(tree->GetAppendBuffer(1), SizeIs(0)); + CordRepBtree::Unref(tree); +} + +TEST_P(CordRepBtreeHeightTest, GetAppendBufferFlatWithCapacity) { + CordRepFlat* flat = MakeFlat("abc"); + CordRepBtree* tree = CordRepBtree::Create(flat); + for (int i = 1; i <= height(); ++i) { + tree = CordRepBtree::New(tree); + } + absl::Span<char> span = tree->GetAppendBuffer(2); + EXPECT_THAT(span, SizeIs(2)); + EXPECT_THAT(span.data(), TypedEq<void*>(flat->Data() + 3)); + EXPECT_THAT(tree->length, Eq(5)); + + size_t avail = flat->Capacity() - 5; + span = tree->GetAppendBuffer(avail + 100); + EXPECT_THAT(span, SizeIs(avail)); + EXPECT_THAT(span.data(), TypedEq<void*>(flat->Data() + 5)); + EXPECT_THAT(tree->length, Eq(5 + avail)); + + CordRepBtree::Unref(tree); +} + +TEST(CordRepBtreeTest, Dump) { + // Handles nullptr + std::stringstream ss; + CordRepBtree::Dump(nullptr, ss); + CordRepBtree::Dump(nullptr, "Once upon a label", ss); + CordRepBtree::Dump(nullptr, "Once upon a label", false, ss); + CordRepBtree::Dump(nullptr, "Once upon a label", true, ss); + + // Cover legal edges + CordRepFlat* flat = MakeFlat("Hello world"); + CordRepExternal* external = MakeExternal("Hello external"); + CordRep* substr_flat = MakeSubstring(1, 6, CordRep::Ref(flat)); + CordRep* substr_external = MakeSubstring(2, 7, CordRep::Ref(external)); + + // Build tree + CordRepBtree* tree = CordRepBtree::Create(flat); + tree = CordRepBtree::Append(tree, external); + tree = CordRepBtree::Append(tree, substr_flat); + tree = CordRepBtree::Append(tree, substr_external); + + // Repeat until we have a tree + while (tree->height() == 0) { + tree = CordRepBtree::Append(tree, CordRep::Ref(flat)); + tree = CordRepBtree::Append(tree, CordRep::Ref(external)); + tree = CordRepBtree::Append(tree, CordRep::Ref(substr_flat)); + tree = CordRepBtree::Append(tree, CordRep::Ref(substr_external)); + } + + for (int api = 0; api <= 3; ++api) { + absl::string_view api_scope; + std::stringstream ss; + switch (api) { + case 0: + api_scope = "Bare"; + CordRepBtree::Dump(tree, ss); + break; + case 1: + api_scope = "Label only"; + CordRepBtree::Dump(tree, "Once upon a label", ss); + break; + case 2: + api_scope = "Label no content"; + CordRepBtree::Dump(tree, "Once upon a label", false, ss); + break; + default: + api_scope = "Label and content"; + CordRepBtree::Dump(tree, "Once upon a label", true, ss); + break; + } + SCOPED_TRACE(api_scope); + std::string str = ss.str(); + + // Contains Node(depth) / Leaf and private / shared indicators + EXPECT_THAT(str, AllOf(HasSubstr("Node(1)"), HasSubstr("Leaf"), + HasSubstr("Private"), HasSubstr("Shared"))); + + // Contains length and start offset of all data edges + EXPECT_THAT(str, AllOf(HasSubstr("len = 11"), HasSubstr("len = 14"), + HasSubstr("len = 6"), HasSubstr("len = 7"), + HasSubstr("start = 1"), HasSubstr("start = 2"))); + + // Contains address of all data edges + EXPECT_THAT( + str, AllOf(HasSubstr(absl::StrCat("0x", absl::Hex(flat))), + HasSubstr(absl::StrCat("0x", absl::Hex(external))), + HasSubstr(absl::StrCat("0x", absl::Hex(substr_flat))), + HasSubstr(absl::StrCat("0x", absl::Hex(substr_external))))); + + if (api != 0) { + // Contains label + EXPECT_THAT(str, HasSubstr("Once upon a label")); + } + + if (api != 3) { + // Does not contain contents + EXPECT_THAT(str, Not(AnyOf((HasSubstr("data = \"Hello world\""), + HasSubstr("data = \"Hello external\""), + HasSubstr("data = \"ello w\""), + HasSubstr("data = \"llo ext\""))))); + } else { + // Contains contents + EXPECT_THAT(str, AllOf((HasSubstr("data = \"Hello world\""), + HasSubstr("data = \"Hello external\""), + HasSubstr("data = \"ello w\""), + HasSubstr("data = \"llo ext\"")))); + } + } + + CordRep::Unref(tree); +} + +TEST(CordRepBtreeTest, IsValid) { + EXPECT_FALSE(CordRepBtree::IsValid(nullptr)); + + CordRepBtree* empty = CordRepBtree::New(0); + EXPECT_TRUE(CordRepBtree::IsValid(empty)); + CordRep::Unref(empty); + + for (bool as_tree : {false, true}) { + CordRepBtree* leaf = CordRepBtree::Create(MakeFlat("abc")); + CordRepBtree* tree = as_tree ? CordRepBtree::New(leaf) : nullptr; + CordRepBtree* check = as_tree ? tree : leaf; + + ASSERT_TRUE(CordRepBtree::IsValid(check)); + leaf->length--; + EXPECT_FALSE(CordRepBtree::IsValid(check)); + leaf->length++; + + ASSERT_TRUE(CordRepBtree::IsValid(check)); + leaf->tag--; + EXPECT_FALSE(CordRepBtree::IsValid(check)); + leaf->tag++; + + // Height + ASSERT_TRUE(CordRepBtree::IsValid(check)); + leaf->storage[0] = static_cast<uint8_t>(CordRepBtree::kMaxHeight + 1); + EXPECT_FALSE(CordRepBtree::IsValid(check)); + leaf->storage[0] = 1; + EXPECT_FALSE(CordRepBtree::IsValid(check)); + leaf->storage[0] = 0; + + // Begin + ASSERT_TRUE(CordRepBtree::IsValid(check)); + const uint8_t begin = leaf->storage[1]; + leaf->storage[1] = static_cast<uint8_t>(CordRepBtree::kMaxCapacity); + EXPECT_FALSE(CordRepBtree::IsValid(check)); + leaf->storage[1] = 2; + EXPECT_FALSE(CordRepBtree::IsValid(check)); + leaf->storage[1] = begin; + + // End + ASSERT_TRUE(CordRepBtree::IsValid(check)); + const uint8_t end = leaf->storage[2]; + leaf->storage[2] = static_cast<uint8_t>(CordRepBtree::kMaxCapacity + 1); + EXPECT_FALSE(CordRepBtree::IsValid(check)); + leaf->storage[2] = end; + + // DataEdge tag and value + ASSERT_TRUE(CordRepBtree::IsValid(check)); + CordRep* const edge = leaf->Edges()[0]; + const uint8_t tag = edge->tag; + CordRepBtreeTestPeer::SetEdge(leaf, begin, nullptr); + EXPECT_FALSE(CordRepBtree::IsValid(check)); + CordRepBtreeTestPeer::SetEdge(leaf, begin, edge); + edge->tag = BTREE; + EXPECT_FALSE(CordRepBtree::IsValid(check)); + edge->tag = tag; + + if (as_tree) { + ASSERT_TRUE(CordRepBtree::IsValid(check)); + leaf->length--; + EXPECT_FALSE(CordRepBtree::IsValid(check)); + leaf->length++; + + // Height + ASSERT_TRUE(CordRepBtree::IsValid(check)); + tree->storage[0] = static_cast<uint8_t>(2); + EXPECT_FALSE(CordRepBtree::IsValid(check)); + tree->storage[0] = 1; + + // Btree edge + ASSERT_TRUE(CordRepBtree::IsValid(check)); + CordRep* const edge = tree->Edges()[0]; + const uint8_t tag = edge->tag; + edge->tag = FLAT; + EXPECT_FALSE(CordRepBtree::IsValid(check)); + edge->tag = tag; + } + + ASSERT_TRUE(CordRepBtree::IsValid(check)); + CordRep::Unref(check); + } +} + +TEST(CordRepBtreeTest, AssertValid) { + CordRepBtree* tree = CordRepBtree::Create(MakeFlat("abc")); + const CordRepBtree* ctree = tree; + EXPECT_THAT(CordRepBtree::AssertValid(tree), Eq(tree)); + EXPECT_THAT(CordRepBtree::AssertValid(ctree), Eq(ctree)); + +#if defined(GTEST_HAS_DEATH_TEST) + CordRepBtree* nulltree = nullptr; + const CordRepBtree* cnulltree = nullptr; + EXPECT_DEBUG_DEATH( + EXPECT_THAT(CordRepBtree::AssertValid(nulltree), Eq(nulltree)), ".*"); + EXPECT_DEBUG_DEATH( + EXPECT_THAT(CordRepBtree::AssertValid(cnulltree), Eq(cnulltree)), ".*"); + + tree->length--; + EXPECT_DEBUG_DEATH(EXPECT_THAT(CordRepBtree::AssertValid(tree), Eq(tree)), + ".*"); + EXPECT_DEBUG_DEATH(EXPECT_THAT(CordRepBtree::AssertValid(ctree), Eq(ctree)), + ".*"); + tree->length++; +#endif + CordRep::Unref(tree); +} + +TEST(CordRepBtreeTest, CheckAssertValidShallowVsDeep) { + // Restore exhaustive validation on any exit. + const bool exhaustive_validation = cord_btree_exhaustive_validation.load(); + auto cleanup = absl::MakeCleanup([exhaustive_validation] { + cord_btree_exhaustive_validation.store(exhaustive_validation); + }); + + // Create a tree of at least 2 levels, and mess with the original flat, which + // should go undetected in shallow mode as the flat is too far away, but + // should be detected in forced non-shallow mode. + CordRep* flat = MakeFlat("abc"); + CordRepBtree* tree = CordRepBtree::Create(flat); + constexpr size_t max_cap = CordRepBtree::kMaxCapacity; + const size_t n = max_cap * max_cap * 2; + for (size_t i = 0; i < n; ++i) { + tree = CordRepBtree::Append(tree, MakeFlat("Hello world")); + } + flat->length = 100; + + cord_btree_exhaustive_validation.store(false); + EXPECT_FALSE(CordRepBtree::IsValid(tree)); + EXPECT_TRUE(CordRepBtree::IsValid(tree, true)); + EXPECT_FALSE(CordRepBtree::IsValid(tree, false)); + CordRepBtree::AssertValid(tree); + CordRepBtree::AssertValid(tree, true); +#if defined(GTEST_HAS_DEATH_TEST) + EXPECT_DEBUG_DEATH(CordRepBtree::AssertValid(tree, false), ".*"); +#endif + + cord_btree_exhaustive_validation.store(true); + EXPECT_FALSE(CordRepBtree::IsValid(tree)); + EXPECT_FALSE(CordRepBtree::IsValid(tree, true)); + EXPECT_FALSE(CordRepBtree::IsValid(tree, false)); +#if defined(GTEST_HAS_DEATH_TEST) + EXPECT_DEBUG_DEATH(CordRepBtree::AssertValid(tree), ".*"); + EXPECT_DEBUG_DEATH(CordRepBtree::AssertValid(tree, true), ".*"); +#endif + + flat->length = 3; + CordRep::Unref(tree); +} + +TEST_P(CordRepBtreeTest, Rebuild) { + for (size_t size : {3, 8, 100, 10000, 1000000}) { + SCOPED_TRACE(absl::StrCat("Rebuild @", size)); + + std::vector<CordRepFlat*> flats; + for (int i = 0; i < size; ++i) { + flats.push_back(CordRepFlat::New(2)); + flats.back()->Data()[0] = 'x'; + flats.back()->length = 1; + } + + // Build the tree into 'right', and each so many 'split_limit' edges, + // combine 'left' + 'right' into a new 'left', and start a new 'right'. + // This guarantees we get a reasonable amount of chaos in the tree. + size_t split_count = 0; + size_t split_limit = 3; + auto it = flats.begin(); + CordRepBtree* left = nullptr; + CordRepBtree* right = CordRepBtree::New(*it); + while (++it != flats.end()) { + if (++split_count >= split_limit) { + split_limit += split_limit / 16; + left = left ? CordRepBtree::Append(left, right) : right; + right = CordRepBtree::New(*it); + } else { + right = CordRepBtree::Append(right, *it); + } + } + + // Finalize tree + left = left ? CordRepBtree::Append(left, right) : right; + + // Rebuild + AutoUnref ref; + left = ref.Add(CordRepBtree::Rebuild(ref.RefIf(shared(), left))); + ASSERT_TRUE(CordRepBtree::IsValid(left)); + + // Verify we have the exact same edges in the exact same order. + bool ok = true; + it = flats.begin(); + CordVisitReps(left, [&](CordRep* edge) { + if (edge->tag < FLAT) return; + ok = ok && (it != flats.end() && *it++ == edge); + }); + EXPECT_TRUE(ok && it == flats.end()) << "Rebuild edges mismatch"; + } +} + +} // namespace +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil-cpp/absl/strings/internal/cord_rep_consume.cc b/third_party/abseil-cpp/absl/strings/internal/cord_rep_consume.cc new file mode 100644 index 0000000000..81514543db --- /dev/null +++ b/third_party/abseil-cpp/absl/strings/internal/cord_rep_consume.cc @@ -0,0 +1,129 @@ +// Copyright 2021 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/cord_rep_consume.h" + +#include <array> +#include <utility> + +#include "absl/container/inlined_vector.h" +#include "absl/functional/function_ref.h" +#include "absl/strings/internal/cord_internal.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +namespace { + +// Unrefs the provided `substring`, and returns `substring->child` +// Adds or assumes a reference on `substring->child` +CordRep* ClipSubstring(CordRepSubstring* substring) { + CordRep* child = substring->child; + if (substring->refcount.IsOne()) { + delete substring; + } else { + CordRep::Ref(child); + CordRep::Unref(substring); + } + return child; +} + +// Unrefs the provided `concat`, and returns `{concat->left, concat->right}` +// Adds or assumes a reference on `concat->left` and `concat->right`. +// Returns an array of 2 elements containing the left and right nodes. +std::array<CordRep*, 2> ClipConcat(CordRepConcat* concat) { + std::array<CordRep*, 2> result{concat->left, concat->right}; + if (concat->refcount.IsOne()) { + delete concat; + } else { + CordRep::Ref(result[0]); + CordRep::Ref(result[1]); + CordRep::Unref(concat); + } + return result; +} + +void Consume(bool forward, CordRep* rep, ConsumeFn consume_fn) { + size_t offset = 0; + size_t length = rep->length; + struct Entry { + CordRep* rep; + size_t offset; + size_t length; + }; + absl::InlinedVector<Entry, 40> stack; + + for (;;) { + if (rep->tag == CONCAT) { + std::array<CordRep*, 2> res = ClipConcat(rep->concat()); + CordRep* left = res[0]; + CordRep* right = res[1]; + + if (left->length <= offset) { + // Don't need left node + offset -= left->length; + CordRep::Unref(left); + rep = right; + continue; + } + + size_t length_left = left->length - offset; + if (length_left >= length) { + // Don't need right node + CordRep::Unref(right); + rep = left; + continue; + } + + // Need both nodes + size_t length_right = length - length_left; + if (forward) { + stack.push_back({right, 0, length_right}); + rep = left; + length = length_left; + } else { + stack.push_back({left, offset, length_left}); + rep = right; + offset = 0; + length = length_right; + } + } else if (rep->tag == SUBSTRING) { + offset += rep->substring()->start; + rep = ClipSubstring(rep->substring()); + } else { + consume_fn(rep, offset, length); + if (stack.empty()) return; + + rep = stack.back().rep; + offset = stack.back().offset; + length = stack.back().length; + stack.pop_back(); + } + } +} + +} // namespace + +void Consume(CordRep* rep, ConsumeFn consume_fn) { + return Consume(true, rep, std::move(consume_fn)); +} + +void ReverseConsume(CordRep* rep, ConsumeFn consume_fn) { + return Consume(false, rep, std::move(consume_fn)); +} + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil-cpp/absl/strings/internal/cord_rep_consume.h b/third_party/abseil-cpp/absl/strings/internal/cord_rep_consume.h new file mode 100644 index 0000000000..d46fca2b21 --- /dev/null +++ b/third_party/abseil-cpp/absl/strings/internal/cord_rep_consume.h @@ -0,0 +1,50 @@ +// Copyright 2021 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_CORD_REP_CONSUME_H_ +#define ABSL_STRINGS_INTERNAL_CORD_REP_CONSUME_H_ + +#include <functional> + +#include "absl/functional/function_ref.h" +#include "absl/strings/internal/cord_internal.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +// Functor for the Consume() and ReverseConsume() functions: +// void ConsumeFunc(CordRep* rep, size_t offset, size_t length); +// See the Consume() and ReverseConsume() function comments for documentation. +using ConsumeFn = FunctionRef<void(CordRep*, size_t, size_t)>; + +// Consume() and ReverseConsume() consume CONCAT based trees and invoke the +// provided functor with the contained nodes in the proper forward or reverse +// order, which is used to convert CONCAT trees into other tree or cord data. +// All CONCAT and SUBSTRING nodes are processed internally. The 'offset` +// parameter of the functor is non-zero for any nodes below SUBSTRING nodes. +// It's up to the caller to form these back into SUBSTRING nodes or otherwise +// store offset / prefix information. These functions are intended to be used +// only for migration / transitional code where due to factors such as ODR +// violations, we can not 100% guarantee that all code respects 'new format' +// settings and flags, so we need to be able to parse old data on the fly until +// all old code is deprecated / no longer the default format. +void Consume(CordRep* rep, ConsumeFn consume_fn); +void ReverseConsume(CordRep* rep, ConsumeFn consume_fn); + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_CORD_REP_CONSUME_H_ diff --git a/third_party/abseil-cpp/absl/strings/internal/cord_rep_consume_test.cc b/third_party/abseil-cpp/absl/strings/internal/cord_rep_consume_test.cc new file mode 100644 index 0000000000..e507824b4f --- /dev/null +++ b/third_party/abseil-cpp/absl/strings/internal/cord_rep_consume_test.cc @@ -0,0 +1,173 @@ +// Copyright 2021 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/cord_rep_consume.h" + +#include <functional> +#include <utility> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/cord_rep_flat.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { +namespace { + +using testing::InSequence; +using testing::MockFunction; + +// Returns the depth of a node +int Depth(const CordRep* rep) { + return (rep->tag == CONCAT) ? rep->concat()->depth() : 0; +} + +// Creates a concatenation of the specified nodes. +CordRepConcat* CreateConcat(CordRep* left, CordRep* right) { + auto* concat = new CordRepConcat(); + concat->tag = CONCAT; + concat->left = left; + concat->right = right; + concat->length = left->length + right->length; + concat->set_depth(1 + (std::max)(Depth(left), Depth(right))); + return concat; +} + +// Creates a flat with the length set to `length` +CordRepFlat* CreateFlatWithLength(size_t length) { + auto* flat = CordRepFlat::New(length); + flat->length = length; + return flat; +} + +// Creates a substring node on the specified child. +CordRepSubstring* CreateSubstring(CordRep* child, size_t start, size_t length) { + auto* rep = new CordRepSubstring(); + rep->length = length; + rep->tag = SUBSTRING; + rep->start = start; + rep->child = child; + return rep; +} + +// Flats we use in the tests +CordRep* flat[6]; + +// Creates a test tree +CordRep* CreateTestTree() { + flat[0] = CreateFlatWithLength(1); + flat[1] = CreateFlatWithLength(7); + CordRepConcat* left = CreateConcat(flat[0], CreateSubstring(flat[1], 2, 4)); + + flat[2] = CreateFlatWithLength(9); + flat[3] = CreateFlatWithLength(13); + CordRepConcat* right1 = CreateConcat(flat[2], flat[3]); + + flat[4] = CreateFlatWithLength(15); + flat[5] = CreateFlatWithLength(19); + CordRepConcat* right2 = CreateConcat(flat[4], flat[5]); + + CordRepConcat* right = CreateConcat(right1, CreateSubstring(right2, 5, 17)); + return CreateConcat(left, right); +} + +TEST(CordRepConsumeTest, Consume) { + InSequence in_sequence; + CordRep* tree = CreateTestTree(); + MockFunction<void(CordRep*, size_t, size_t)> consume; + EXPECT_CALL(consume, Call(flat[0], 0, 1)); + EXPECT_CALL(consume, Call(flat[1], 2, 4)); + EXPECT_CALL(consume, Call(flat[2], 0, 9)); + EXPECT_CALL(consume, Call(flat[3], 0, 13)); + EXPECT_CALL(consume, Call(flat[4], 5, 10)); + EXPECT_CALL(consume, Call(flat[5], 0, 7)); + Consume(tree, consume.AsStdFunction()); + for (CordRep* rep : flat) { + EXPECT_TRUE(rep->refcount.IsOne()); + CordRep::Unref(rep); + } +} + +TEST(CordRepConsumeTest, ConsumeShared) { + InSequence in_sequence; + CordRep* tree = CreateTestTree(); + MockFunction<void(CordRep*, size_t, size_t)> consume; + EXPECT_CALL(consume, Call(flat[0], 0, 1)); + EXPECT_CALL(consume, Call(flat[1], 2, 4)); + EXPECT_CALL(consume, Call(flat[2], 0, 9)); + EXPECT_CALL(consume, Call(flat[3], 0, 13)); + EXPECT_CALL(consume, Call(flat[4], 5, 10)); + EXPECT_CALL(consume, Call(flat[5], 0, 7)); + Consume(CordRep::Ref(tree), consume.AsStdFunction()); + for (CordRep* rep : flat) { + EXPECT_FALSE(rep->refcount.IsOne()); + CordRep::Unref(rep); + } + CordRep::Unref(tree); +} + +TEST(CordRepConsumeTest, Reverse) { + InSequence in_sequence; + CordRep* tree = CreateTestTree(); + MockFunction<void(CordRep*, size_t, size_t)> consume; + EXPECT_CALL(consume, Call(flat[5], 0, 7)); + EXPECT_CALL(consume, Call(flat[4], 5, 10)); + EXPECT_CALL(consume, Call(flat[3], 0, 13)); + EXPECT_CALL(consume, Call(flat[2], 0, 9)); + EXPECT_CALL(consume, Call(flat[1], 2, 4)); + EXPECT_CALL(consume, Call(flat[0], 0, 1)); + ReverseConsume(tree, consume.AsStdFunction()); + for (CordRep* rep : flat) { + EXPECT_TRUE(rep->refcount.IsOne()); + CordRep::Unref(rep); + } +} + +TEST(CordRepConsumeTest, ReverseShared) { + InSequence in_sequence; + CordRep* tree = CreateTestTree(); + MockFunction<void(CordRep*, size_t, size_t)> consume; + EXPECT_CALL(consume, Call(flat[5], 0, 7)); + EXPECT_CALL(consume, Call(flat[4], 5, 10)); + EXPECT_CALL(consume, Call(flat[3], 0, 13)); + EXPECT_CALL(consume, Call(flat[2], 0, 9)); + EXPECT_CALL(consume, Call(flat[1], 2, 4)); + EXPECT_CALL(consume, Call(flat[0], 0, 1)); + ReverseConsume(CordRep::Ref(tree), consume.AsStdFunction()); + for (CordRep* rep : flat) { + EXPECT_FALSE(rep->refcount.IsOne()); + CordRep::Unref(rep); + } + CordRep::Unref(tree); +} + +TEST(CordRepConsumeTest, UnreachableFlat) { + InSequence in_sequence; + CordRepFlat* flat1 = CreateFlatWithLength(10); + CordRepFlat* flat2 = CreateFlatWithLength(20); + CordRepConcat* concat = CreateConcat(flat1, flat2); + CordRepSubstring* tree = CreateSubstring(concat, 15, 10); + MockFunction<void(CordRep*, size_t, size_t)> consume; + EXPECT_CALL(consume, Call(flat2, 5, 10)); + Consume(tree, consume.AsStdFunction()); + EXPECT_TRUE(flat2->refcount.IsOne()); + CordRep::Unref(flat2); +} + +} // namespace +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil-cpp/absl/strings/internal/cord_rep_flat.h b/third_party/abseil-cpp/absl/strings/internal/cord_rep_flat.h new file mode 100644 index 0000000000..4d0f988697 --- /dev/null +++ b/third_party/abseil-cpp/absl/strings/internal/cord_rep_flat.h @@ -0,0 +1,146 @@ +// Copyright 2020 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_CORD_REP_FLAT_H_ +#define ABSL_STRINGS_INTERNAL_CORD_REP_FLAT_H_ + +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <memory> + +#include "absl/strings/internal/cord_internal.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +// Note: all constants below are never ODR used and internal to cord, we define +// these as static constexpr to avoid 'in struct' definition and usage clutter. + +// Largest and smallest flat node lengths we are willing to allocate +// Flat allocation size is stored in tag, which currently can encode sizes up +// to 4K, encoded as multiple of either 8 or 32 bytes. +// If we allow for larger sizes, we need to change this to 8/64, 16/128, etc. +// kMinFlatSize is bounded by tag needing to be at least FLAT * 8 bytes, and +// ideally a 'nice' size aligning with allocation and cacheline sizes like 32. +// kMaxFlatSize is bounded by the size resulting in a computed tag no greater +// than MAX_FLAT_TAG. MAX_FLAT_TAG provides for additional 'high' tag values. +static constexpr size_t kFlatOverhead = offsetof(CordRep, storage); +static constexpr size_t kMinFlatSize = 32; +static constexpr size_t kMaxFlatSize = 4096; +static constexpr size_t kMaxFlatLength = kMaxFlatSize - kFlatOverhead; +static constexpr size_t kMinFlatLength = kMinFlatSize - kFlatOverhead; + +constexpr uint8_t AllocatedSizeToTagUnchecked(size_t size) { + return static_cast<uint8_t>((size <= 1024) ? size / 8 + 1 + : 129 + size / 32 - 1024 / 32); +} + +static_assert(kMinFlatSize / 8 + 1 >= FLAT, ""); +static_assert(AllocatedSizeToTagUnchecked(kMaxFlatSize) <= MAX_FLAT_TAG, ""); + +// Helper functions for rounded div, and rounding to exact sizes. +constexpr size_t DivUp(size_t n, size_t m) { return (n + m - 1) / m; } +constexpr size_t RoundUp(size_t n, size_t m) { return DivUp(n, m) * m; } + +// Returns the size to the nearest equal or larger value that can be +// expressed exactly as a tag value. +inline size_t RoundUpForTag(size_t size) { + return RoundUp(size, (size <= 1024) ? 8 : 32); +} + +// Converts the allocated size to a tag, rounding down if the size +// does not exactly match a 'tag expressible' size value. The result is +// undefined if the size exceeds the maximum size that can be encoded in +// a tag, i.e., if size is larger than TagToAllocatedSize(<max tag>). +inline uint8_t AllocatedSizeToTag(size_t size) { + const uint8_t tag = AllocatedSizeToTagUnchecked(size); + assert(tag <= MAX_FLAT_TAG); + return tag; +} + +// Converts the provided tag to the corresponding allocated size +constexpr size_t TagToAllocatedSize(uint8_t tag) { + return (tag <= 129) ? ((tag - 1) * 8) : (1024 + (tag - 129) * 32); +} + +// Converts the provided tag to the corresponding available data length +constexpr size_t TagToLength(uint8_t tag) { + return TagToAllocatedSize(tag) - kFlatOverhead; +} + +// Enforce that kMaxFlatSize maps to a well-known exact tag value. +static_assert(TagToAllocatedSize(225) == kMaxFlatSize, "Bad tag logic"); + +struct CordRepFlat : public CordRep { + // Creates a new flat node. + static CordRepFlat* New(size_t len) { + if (len <= kMinFlatLength) { + len = kMinFlatLength; + } else if (len > kMaxFlatLength) { + len = kMaxFlatLength; + } + + // Round size up so it matches a size we can exactly express in a tag. + const size_t size = RoundUpForTag(len + kFlatOverhead); + void* const raw_rep = ::operator new(size); + CordRepFlat* rep = new (raw_rep) CordRepFlat(); + rep->tag = AllocatedSizeToTag(size); + return rep; + } + + // Deletes a CordRepFlat instance created previously through a call to New(). + // Flat CordReps are allocated and constructed with raw ::operator new and + // placement new, and must be destructed and deallocated accordingly. + static void Delete(CordRep*rep) { + assert(rep->tag >= FLAT && rep->tag <= MAX_FLAT_TAG); + +#if defined(__cpp_sized_deallocation) + size_t size = TagToAllocatedSize(rep->tag); + rep->~CordRep(); + ::operator delete(rep, size); +#else + rep->~CordRep(); + ::operator delete(rep); +#endif + } + + // Returns a pointer to the data inside this flat rep. + char* Data() { return reinterpret_cast<char*>(storage); } + const char* Data() const { return reinterpret_cast<const char*>(storage); } + + // Returns the maximum capacity (payload size) of this instance. + size_t Capacity() const { return TagToLength(tag); } + + // Returns the allocated size (payload + overhead) of this instance. + size_t AllocatedSize() const { return TagToAllocatedSize(tag); } +}; + +// Now that CordRepFlat is defined, we can define CordRep's helper casts: +inline CordRepFlat* CordRep::flat() { + assert(tag >= FLAT && tag <= MAX_FLAT_TAG); + return reinterpret_cast<CordRepFlat*>(this); +} + +inline const CordRepFlat* CordRep::flat() const { + assert(tag >= FLAT && tag <= MAX_FLAT_TAG); + return reinterpret_cast<const CordRepFlat*>(this); +} + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_CORD_REP_FLAT_H_ diff --git a/third_party/abseil-cpp/absl/strings/internal/cord_rep_ring.cc b/third_party/abseil-cpp/absl/strings/internal/cord_rep_ring.cc new file mode 100644 index 0000000000..07c77eb3e5 --- /dev/null +++ b/third_party/abseil-cpp/absl/strings/internal/cord_rep_ring.cc @@ -0,0 +1,771 @@ +// Copyright 2020 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "absl/strings/internal/cord_rep_ring.h" + +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <iostream> +#include <limits> +#include <memory> +#include <string> + +#include "absl/base/internal/raw_logging.h" +#include "absl/base/internal/throw_delegate.h" +#include "absl/base/macros.h" +#include "absl/container/inlined_vector.h" +#include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/cord_rep_consume.h" +#include "absl/strings/internal/cord_rep_flat.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +namespace { + +using index_type = CordRepRing::index_type; + +enum class Direction { kForward, kReversed }; + +inline bool IsFlatOrExternal(CordRep* rep) { + return rep->IsFlat() || rep->IsExternal(); +} + +// Verifies that n + extra <= kMaxCapacity: throws std::length_error otherwise. +inline void CheckCapacity(size_t n, size_t extra) { + if (ABSL_PREDICT_FALSE(extra > CordRepRing::kMaxCapacity - n)) { + base_internal::ThrowStdLengthError("Maximum capacity exceeded"); + } +} + +// Creates a flat from the provided string data, allocating up to `extra` +// capacity in the returned flat depending on kMaxFlatLength limitations. +// Requires `len` to be less or equal to `kMaxFlatLength` +CordRepFlat* CreateFlat(const char* s, size_t n, size_t extra = 0) { // NOLINT + assert(n <= kMaxFlatLength); + auto* rep = CordRepFlat::New(n + extra); + rep->length = n; + memcpy(rep->Data(), s, n); + return rep; +} + +// Unrefs the entries in `[head, tail)`. +// Requires all entries to be a FLAT or EXTERNAL node. +void UnrefEntries(const CordRepRing* rep, index_type head, index_type tail) { + rep->ForEach(head, tail, [rep](index_type ix) { + CordRep* child = rep->entry_child(ix); + if (!child->refcount.Decrement()) { + if (child->tag >= FLAT) { + CordRepFlat::Delete(child->flat()); + } else { + CordRepExternal::Delete(child->external()); + } + } + }); +} + +} // namespace + +std::ostream& operator<<(std::ostream& s, const CordRepRing& rep) { + // Note: 'pos' values are defined as size_t (for overflow reasons), but that + // prints really awkward for small prepended values such as -5. ssize_t is not + // portable (POSIX), so we use ptrdiff_t instead to cast to signed values. + s << " CordRepRing(" << &rep << ", length = " << rep.length + << ", head = " << rep.head_ << ", tail = " << rep.tail_ + << ", cap = " << rep.capacity_ << ", rc = " << rep.refcount.Get() + << ", begin_pos_ = " << static_cast<ptrdiff_t>(rep.begin_pos_) << ") {\n"; + CordRepRing::index_type head = rep.head(); + do { + CordRep* child = rep.entry_child(head); + s << " entry[" << head << "] length = " << rep.entry_length(head) + << ", child " << child << ", clen = " << child->length + << ", tag = " << static_cast<int>(child->tag) + << ", rc = " << child->refcount.Get() + << ", offset = " << rep.entry_data_offset(head) + << ", end_pos = " << static_cast<ptrdiff_t>(rep.entry_end_pos(head)) + << "\n"; + head = rep.advance(head); + } while (head != rep.tail()); + return s << "}\n"; +} + +void CordRepRing::AddDataOffset(index_type index, size_t n) { + entry_data_offset()[index] += static_cast<offset_type>(n); +} + +void CordRepRing::SubLength(index_type index, size_t n) { + entry_end_pos()[index] -= n; +} + +class CordRepRing::Filler { + public: + Filler(CordRepRing* rep, index_type pos) : rep_(rep), head_(pos), pos_(pos) {} + + index_type head() const { return head_; } + index_type pos() const { return pos_; } + + void Add(CordRep* child, size_t offset, pos_type end_pos) { + rep_->entry_end_pos()[pos_] = end_pos; + rep_->entry_child()[pos_] = child; + rep_->entry_data_offset()[pos_] = static_cast<offset_type>(offset); + pos_ = rep_->advance(pos_); + } + + private: + CordRepRing* rep_; + index_type head_; + index_type pos_; +}; + +constexpr size_t CordRepRing::kMaxCapacity; // NOLINT: needed for c++11 + +bool CordRepRing::IsValid(std::ostream& output) const { + if (capacity_ == 0) { + output << "capacity == 0"; + return false; + } + + if (head_ >= capacity_ || tail_ >= capacity_) { + output << "head " << head_ << " and/or tail " << tail_ << "exceed capacity " + << capacity_; + return false; + } + + const index_type back = retreat(tail_); + size_t pos_length = Distance(begin_pos_, entry_end_pos(back)); + if (pos_length != length) { + output << "length " << length << " does not match positional length " + << pos_length << " from begin_pos " << begin_pos_ << " and entry[" + << back << "].end_pos " << entry_end_pos(back); + return false; + } + + index_type head = head_; + pos_type begin_pos = begin_pos_; + do { + pos_type end_pos = entry_end_pos(head); + size_t entry_length = Distance(begin_pos, end_pos); + if (entry_length == 0) { + output << "entry[" << head << "] has an invalid length " << entry_length + << " from begin_pos " << begin_pos << " and end_pos " << end_pos; + return false; + } + + CordRep* child = entry_child(head); + if (child == nullptr) { + output << "entry[" << head << "].child == nullptr"; + return false; + } + if (child->tag < FLAT && child->tag != EXTERNAL) { + output << "entry[" << head << "].child has an invalid tag " + << static_cast<int>(child->tag); + return false; + } + + size_t offset = entry_data_offset(head); + if (offset >= child->length || entry_length > child->length - offset) { + output << "entry[" << head << "] has offset " << offset + << " and entry length " << entry_length + << " which are outside of the child's length of " << child->length; + return false; + } + + begin_pos = end_pos; + head = advance(head); + } while (head != tail_); + + return true; +} + +#ifdef EXTRA_CORD_RING_VALIDATION +CordRepRing* CordRepRing::Validate(CordRepRing* rep, const char* file, + int line) { + if (!rep->IsValid(std::cerr)) { + std::cerr << "\nERROR: CordRepRing corrupted"; + if (line) std::cerr << " at line " << line; + if (file) std::cerr << " in file " << file; + std::cerr << "\nContent = " << *rep; + abort(); + } + return rep; +} +#endif // EXTRA_CORD_RING_VALIDATION + +CordRepRing* CordRepRing::New(size_t capacity, size_t extra) { + CheckCapacity(capacity, extra); + + size_t size = AllocSize(capacity += extra); + void* mem = ::operator new(size); + auto* rep = new (mem) CordRepRing(static_cast<index_type>(capacity)); + rep->tag = RING; + rep->capacity_ = static_cast<index_type>(capacity); + rep->begin_pos_ = 0; + return rep; +} + +void CordRepRing::SetCapacityForTesting(size_t capacity) { + // Adjust for the changed layout + assert(capacity <= capacity_); + assert(head() == 0 || head() < tail()); + memmove(Layout::Partial(capacity).Pointer<1>(data_) + head(), + Layout::Partial(capacity_).Pointer<1>(data_) + head(), + entries() * sizeof(Layout::ElementType<1>)); + memmove(Layout::Partial(capacity, capacity).Pointer<2>(data_) + head(), + Layout::Partial(capacity_, capacity_).Pointer<2>(data_) + head(), + entries() * sizeof(Layout::ElementType<2>)); + capacity_ = static_cast<index_type>(capacity); +} + +void CordRepRing::Delete(CordRepRing* rep) { + assert(rep != nullptr && rep->IsRing()); +#if defined(__cpp_sized_deallocation) + size_t size = AllocSize(rep->capacity_); + rep->~CordRepRing(); + ::operator delete(rep, size); +#else + rep->~CordRepRing(); + ::operator delete(rep); +#endif +} + +void CordRepRing::Destroy(CordRepRing* rep) { + UnrefEntries(rep, rep->head(), rep->tail()); + Delete(rep); +} + +template <bool ref> +void CordRepRing::Fill(const CordRepRing* src, index_type head, + index_type tail) { + this->length = src->length; + head_ = 0; + tail_ = advance(0, src->entries(head, tail)); + begin_pos_ = src->begin_pos_; + + // TODO(mvels): there may be opportunities here for large buffers. + auto* dst_pos = entry_end_pos(); + auto* dst_child = entry_child(); + auto* dst_offset = entry_data_offset(); + src->ForEach(head, tail, [&](index_type index) { + *dst_pos++ = src->entry_end_pos(index); + CordRep* child = src->entry_child(index); + *dst_child++ = ref ? CordRep::Ref(child) : child; + *dst_offset++ = src->entry_data_offset(index); + }); +} + +CordRepRing* CordRepRing::Copy(CordRepRing* rep, index_type head, + index_type tail, size_t extra) { + CordRepRing* newrep = CordRepRing::New(rep->entries(head, tail), extra); + newrep->Fill<true>(rep, head, tail); + CordRep::Unref(rep); + return newrep; +} + +CordRepRing* CordRepRing::Mutable(CordRepRing* rep, size_t extra) { + // Get current number of entries, and check for max capacity. + size_t entries = rep->entries(); + + if (!rep->refcount.IsMutable()) { + return Copy(rep, rep->head(), rep->tail(), extra); + } else if (entries + extra > rep->capacity()) { + const size_t min_grow = rep->capacity() + rep->capacity() / 2; + const size_t min_extra = (std::max)(extra, min_grow - entries); + CordRepRing* newrep = CordRepRing::New(entries, min_extra); + newrep->Fill<false>(rep, rep->head(), rep->tail()); + CordRepRing::Delete(rep); + return newrep; + } else { + return rep; + } +} + +Span<char> CordRepRing::GetAppendBuffer(size_t size) { + assert(refcount.IsMutable()); + index_type back = retreat(tail_); + CordRep* child = entry_child(back); + if (child->tag >= FLAT && child->refcount.IsMutable()) { + size_t capacity = child->flat()->Capacity(); + pos_type end_pos = entry_end_pos(back); + size_t data_offset = entry_data_offset(back); + size_t entry_length = Distance(entry_begin_pos(back), end_pos); + size_t used = data_offset + entry_length; + if (size_t n = (std::min)(capacity - used, size)) { + child->length = data_offset + entry_length + n; + entry_end_pos()[back] = end_pos + n; + this->length += n; + return {child->flat()->Data() + used, n}; + } + } + return {nullptr, 0}; +} + +Span<char> CordRepRing::GetPrependBuffer(size_t size) { + assert(refcount.IsMutable()); + CordRep* child = entry_child(head_); + size_t data_offset = entry_data_offset(head_); + if (data_offset && child->refcount.IsMutable() && child->tag >= FLAT) { + size_t n = (std::min)(data_offset, size); + this->length += n; + begin_pos_ -= n; + data_offset -= n; + entry_data_offset()[head_] = static_cast<offset_type>(data_offset); + return {child->flat()->Data() + data_offset, n}; + } + return {nullptr, 0}; +} + +CordRepRing* CordRepRing::CreateFromLeaf(CordRep* child, size_t offset, + size_t len, size_t extra) { + CordRepRing* rep = CordRepRing::New(1, extra); + rep->head_ = 0; + rep->tail_ = rep->advance(0); + rep->length = len; + rep->entry_end_pos()[0] = len; + rep->entry_child()[0] = child; + rep->entry_data_offset()[0] = static_cast<offset_type>(offset); + return Validate(rep); +} + +CordRepRing* CordRepRing::CreateSlow(CordRep* child, size_t extra) { + CordRepRing* rep = nullptr; + Consume(child, [&](CordRep* child_arg, size_t offset, size_t len) { + if (IsFlatOrExternal(child_arg)) { + rep = rep ? AppendLeaf(rep, child_arg, offset, len) + : CreateFromLeaf(child_arg, offset, len, extra); + } else if (rep) { + rep = AddRing<AddMode::kAppend>(rep, child_arg->ring(), offset, len); + } else if (offset == 0 && child_arg->length == len) { + rep = Mutable(child_arg->ring(), extra); + } else { + rep = SubRing(child_arg->ring(), offset, len, extra); + } + }); + return Validate(rep, nullptr, __LINE__); +} + +CordRepRing* CordRepRing::Create(CordRep* child, size_t extra) { + size_t length = child->length; + if (IsFlatOrExternal(child)) { + return CreateFromLeaf(child, 0, length, extra); + } + if (child->IsRing()) { + return Mutable(child->ring(), extra); + } + return CreateSlow(child, extra); +} + +template <CordRepRing::AddMode mode> +CordRepRing* CordRepRing::AddRing(CordRepRing* rep, CordRepRing* ring, + size_t offset, size_t len) { + assert(offset < ring->length); + constexpr bool append = mode == AddMode::kAppend; + Position head = ring->Find(offset); + Position tail = ring->FindTail(head.index, offset + len); + const index_type entries = ring->entries(head.index, tail.index); + + rep = Mutable(rep, entries); + + // The delta for making ring[head].end_pos into 'len - offset' + const pos_type delta_length = + (append ? rep->begin_pos_ + rep->length : rep->begin_pos_ - len) - + ring->entry_begin_pos(head.index) - head.offset; + + // Start filling at `tail`, or `entries` before `head` + Filler filler(rep, append ? rep->tail_ : rep->retreat(rep->head_, entries)); + + if (ring->refcount.IsOne()) { + // Copy entries from source stealing the ref and adjusting the end position. + // Commit the filler as this is no-op. + ring->ForEach(head.index, tail.index, [&](index_type ix) { + filler.Add(ring->entry_child(ix), ring->entry_data_offset(ix), + ring->entry_end_pos(ix) + delta_length); + }); + + // Unref entries we did not copy over, and delete source. + if (head.index != ring->head_) UnrefEntries(ring, ring->head_, head.index); + if (tail.index != ring->tail_) UnrefEntries(ring, tail.index, ring->tail_); + CordRepRing::Delete(ring); + } else { + ring->ForEach(head.index, tail.index, [&](index_type ix) { + CordRep* child = ring->entry_child(ix); + filler.Add(child, ring->entry_data_offset(ix), + ring->entry_end_pos(ix) + delta_length); + CordRep::Ref(child); + }); + CordRepRing::Unref(ring); + } + + if (head.offset) { + // Increase offset of first 'source' entry appended or prepended. + // This is always the entry in `filler.head()` + rep->AddDataOffset(filler.head(), head.offset); + } + + if (tail.offset) { + // Reduce length of last 'source' entry appended or prepended. + // This is always the entry tailed by `filler.pos()` + rep->SubLength(rep->retreat(filler.pos()), tail.offset); + } + + // Commit changes + rep->length += len; + if (append) { + rep->tail_ = filler.pos(); + } else { + rep->head_ = filler.head(); + rep->begin_pos_ -= len; + } + + return Validate(rep); +} + +CordRepRing* CordRepRing::AppendSlow(CordRepRing* rep, CordRep* child) { + Consume(child, [&rep](CordRep* child_arg, size_t offset, size_t len) { + if (child_arg->IsRing()) { + rep = AddRing<AddMode::kAppend>(rep, child_arg->ring(), offset, len); + } else { + rep = AppendLeaf(rep, child_arg, offset, len); + } + }); + return rep; +} + +CordRepRing* CordRepRing::AppendLeaf(CordRepRing* rep, CordRep* child, + size_t offset, size_t len) { + rep = Mutable(rep, 1); + index_type back = rep->tail_; + const pos_type begin_pos = rep->begin_pos_ + rep->length; + rep->tail_ = rep->advance(rep->tail_); + rep->length += len; + rep->entry_end_pos()[back] = begin_pos + len; + rep->entry_child()[back] = child; + rep->entry_data_offset()[back] = static_cast<offset_type>(offset); + return Validate(rep, nullptr, __LINE__); +} + +CordRepRing* CordRepRing::Append(CordRepRing* rep, CordRep* child) { + size_t length = child->length; + if (IsFlatOrExternal(child)) { + return AppendLeaf(rep, child, 0, length); + } + if (child->IsRing()) { + return AddRing<AddMode::kAppend>(rep, child->ring(), 0, length); + } + return AppendSlow(rep, child); +} + +CordRepRing* CordRepRing::PrependSlow(CordRepRing* rep, CordRep* child) { + ReverseConsume(child, [&](CordRep* child_arg, size_t offset, size_t len) { + if (IsFlatOrExternal(child_arg)) { + rep = PrependLeaf(rep, child_arg, offset, len); + } else { + rep = AddRing<AddMode::kPrepend>(rep, child_arg->ring(), offset, len); + } + }); + return Validate(rep); +} + +CordRepRing* CordRepRing::PrependLeaf(CordRepRing* rep, CordRep* child, + size_t offset, size_t len) { + rep = Mutable(rep, 1); + index_type head = rep->retreat(rep->head_); + pos_type end_pos = rep->begin_pos_; + rep->head_ = head; + rep->length += len; + rep->begin_pos_ -= len; + rep->entry_end_pos()[head] = end_pos; + rep->entry_child()[head] = child; + rep->entry_data_offset()[head] = static_cast<offset_type>(offset); + return Validate(rep); +} + +CordRepRing* CordRepRing::Prepend(CordRepRing* rep, CordRep* child) { + size_t length = child->length; + if (IsFlatOrExternal(child)) { + return PrependLeaf(rep, child, 0, length); + } + if (child->IsRing()) { + return AddRing<AddMode::kPrepend>(rep, child->ring(), 0, length); + } + return PrependSlow(rep, child); +} + +CordRepRing* CordRepRing::Append(CordRepRing* rep, absl::string_view data, + size_t extra) { + if (rep->refcount.IsMutable()) { + Span<char> avail = rep->GetAppendBuffer(data.length()); + if (!avail.empty()) { + memcpy(avail.data(), data.data(), avail.length()); + data.remove_prefix(avail.length()); + } + } + if (data.empty()) return Validate(rep); + + const size_t flats = (data.length() - 1) / kMaxFlatLength + 1; + rep = Mutable(rep, flats); + + Filler filler(rep, rep->tail_); + pos_type pos = rep->begin_pos_ + rep->length; + + while (data.length() >= kMaxFlatLength) { + auto* flat = CreateFlat(data.data(), kMaxFlatLength); + filler.Add(flat, 0, pos += kMaxFlatLength); + data.remove_prefix(kMaxFlatLength); + } + + if (data.length()) { + auto* flat = CreateFlat(data.data(), data.length(), extra); + filler.Add(flat, 0, pos += data.length()); + } + + rep->length = pos - rep->begin_pos_; + rep->tail_ = filler.pos(); + + return Validate(rep); +} + +CordRepRing* CordRepRing::Prepend(CordRepRing* rep, absl::string_view data, + size_t extra) { + if (rep->refcount.IsMutable()) { + Span<char> avail = rep->GetPrependBuffer(data.length()); + if (!avail.empty()) { + const char* tail = data.data() + data.length() - avail.length(); + memcpy(avail.data(), tail, avail.length()); + data.remove_suffix(avail.length()); + } + } + if (data.empty()) return rep; + + const size_t flats = (data.length() - 1) / kMaxFlatLength + 1; + rep = Mutable(rep, flats); + pos_type pos = rep->begin_pos_; + Filler filler(rep, rep->retreat(rep->head_, static_cast<index_type>(flats))); + + size_t first_size = data.size() - (flats - 1) * kMaxFlatLength; + CordRepFlat* flat = CordRepFlat::New(first_size + extra); + flat->length = first_size + extra; + memcpy(flat->Data() + extra, data.data(), first_size); + data.remove_prefix(first_size); + filler.Add(flat, extra, pos); + pos -= first_size; + + while (!data.empty()) { + assert(data.size() >= kMaxFlatLength); + flat = CreateFlat(data.data(), kMaxFlatLength); + filler.Add(flat, 0, pos); + pos -= kMaxFlatLength; + data.remove_prefix(kMaxFlatLength); + } + + rep->head_ = filler.head(); + rep->length += rep->begin_pos_ - pos; + rep->begin_pos_ = pos; + + return Validate(rep); +} + +// 32 entries is 32 * sizeof(pos_type) = 4 cache lines on x86 +static constexpr index_type kBinarySearchThreshold = 32; +static constexpr index_type kBinarySearchEndCount = 8; + +template <bool wrap> +CordRepRing::index_type CordRepRing::FindBinary(index_type head, + index_type tail, + size_t offset) const { + index_type count = tail + (wrap ? capacity_ : 0) - head; + do { + count = (count - 1) / 2; + assert(count < entries(head, tail_)); + index_type mid = wrap ? advance(head, count) : head + count; + index_type after_mid = wrap ? advance(mid) : mid + 1; + bool larger = (offset >= entry_end_offset(mid)); + head = larger ? after_mid : head; + tail = larger ? tail : mid; + assert(head != tail); + } while (ABSL_PREDICT_TRUE(count > kBinarySearchEndCount)); + return head; +} + +CordRepRing::Position CordRepRing::FindSlow(index_type head, + size_t offset) const { + index_type tail = tail_; + + // Binary search until we are good for linear search + // Optimize for branchless / non wrapping ops + if (tail > head) { + index_type count = tail - head; + if (count > kBinarySearchThreshold) { + head = FindBinary<false>(head, tail, offset); + } + } else { + index_type count = capacity_ + tail - head; + if (count > kBinarySearchThreshold) { + head = FindBinary<true>(head, tail, offset); + } + } + + pos_type pos = entry_begin_pos(head); + pos_type end_pos = entry_end_pos(head); + while (offset >= Distance(begin_pos_, end_pos)) { + head = advance(head); + pos = end_pos; + end_pos = entry_end_pos(head); + } + + return {head, offset - Distance(begin_pos_, pos)}; +} + +CordRepRing::Position CordRepRing::FindTailSlow(index_type head, + size_t offset) const { + index_type tail = tail_; + const size_t tail_offset = offset - 1; + + // Binary search until we are good for linear search + // Optimize for branchless / non wrapping ops + if (tail > head) { + index_type count = tail - head; + if (count > kBinarySearchThreshold) { + head = FindBinary<false>(head, tail, tail_offset); + } + } else { + index_type count = capacity_ + tail - head; + if (count > kBinarySearchThreshold) { + head = FindBinary<true>(head, tail, tail_offset); + } + } + + size_t end_offset = entry_end_offset(head); + while (tail_offset >= end_offset) { + head = advance(head); + end_offset = entry_end_offset(head); + } + + return {advance(head), end_offset - offset}; +} + +char CordRepRing::GetCharacter(size_t offset) const { + assert(offset < length); + + Position pos = Find(offset); + size_t data_offset = entry_data_offset(pos.index) + pos.offset; + return GetRepData(entry_child(pos.index))[data_offset]; +} + +CordRepRing* CordRepRing::SubRing(CordRepRing* rep, size_t offset, + size_t len, size_t extra) { + assert(offset <= rep->length); + assert(offset <= rep->length - len); + + if (len == 0) { + CordRep::Unref(rep); + return nullptr; + } + + // Find position of first byte + Position head = rep->Find(offset); + Position tail = rep->FindTail(head.index, offset + len); + const size_t new_entries = rep->entries(head.index, tail.index); + + if (rep->refcount.IsMutable() && extra <= (rep->capacity() - new_entries)) { + // We adopt a privately owned rep and no extra entries needed. + if (head.index != rep->head_) UnrefEntries(rep, rep->head_, head.index); + if (tail.index != rep->tail_) UnrefEntries(rep, tail.index, rep->tail_); + rep->head_ = head.index; + rep->tail_ = tail.index; + } else { + // Copy subset to new rep + rep = Copy(rep, head.index, tail.index, extra); + head.index = rep->head_; + tail.index = rep->tail_; + } + + // Adjust begin_pos and length + rep->length = len; + rep->begin_pos_ += offset; + + // Adjust head and tail blocks + if (head.offset) { + rep->AddDataOffset(head.index, head.offset); + } + if (tail.offset) { + rep->SubLength(rep->retreat(tail.index), tail.offset); + } + + return Validate(rep); +} + +CordRepRing* CordRepRing::RemovePrefix(CordRepRing* rep, size_t len, + size_t extra) { + assert(len <= rep->length); + if (len == rep->length) { + CordRep::Unref(rep); + return nullptr; + } + + Position head = rep->Find(len); + if (rep->refcount.IsMutable()) { + if (head.index != rep->head_) UnrefEntries(rep, rep->head_, head.index); + rep->head_ = head.index; + } else { + rep = Copy(rep, head.index, rep->tail_, extra); + head.index = rep->head_; + } + + // Adjust begin_pos and length + rep->length -= len; + rep->begin_pos_ += len; + + // Adjust head block + if (head.offset) { + rep->AddDataOffset(head.index, head.offset); + } + + return Validate(rep); +} + +CordRepRing* CordRepRing::RemoveSuffix(CordRepRing* rep, size_t len, + size_t extra) { + assert(len <= rep->length); + + if (len == rep->length) { + CordRep::Unref(rep); + return nullptr; + } + + Position tail = rep->FindTail(rep->length - len); + if (rep->refcount.IsMutable()) { + // We adopt a privately owned rep, scrub. + if (tail.index != rep->tail_) UnrefEntries(rep, tail.index, rep->tail_); + rep->tail_ = tail.index; + } else { + // Copy subset to new rep + rep = Copy(rep, rep->head_, tail.index, extra); + tail.index = rep->tail_; + } + + // Adjust length + rep->length -= len; + + // Adjust tail block + if (tail.offset) { + rep->SubLength(rep->retreat(tail.index), tail.offset); + } + + return Validate(rep); +} + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil-cpp/absl/strings/internal/cord_rep_ring.h b/third_party/abseil-cpp/absl/strings/internal/cord_rep_ring.h new file mode 100644 index 0000000000..2000e21ea0 --- /dev/null +++ b/third_party/abseil-cpp/absl/strings/internal/cord_rep_ring.h @@ -0,0 +1,607 @@ +// Copyright 2020 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_CORD_REP_RING_H_ +#define ABSL_STRINGS_INTERNAL_CORD_REP_RING_H_ + +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <iosfwd> +#include <limits> +#include <memory> + +#include "absl/container/internal/layout.h" +#include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/cord_rep_flat.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +// All operations modifying a ring buffer are implemented as static methods +// requiring a CordRepRing instance with a reference adopted by the method. +// +// The methods return the modified ring buffer, which may be equal to the input +// if the input was not shared, and having large enough capacity to accommodate +// any newly added node(s). Otherwise, a copy of the input rep with the new +// node(s) added is returned. +// +// Any modification on non shared ring buffers with enough capacity will then +// require minimum atomic operations. Caller should where possible provide +// reasonable `extra` hints for both anticipated extra `flat` byte space, as +// well as anticipated extra nodes required for complex operations. +// +// Example of code creating a ring buffer, adding some data to it, +// and discarding the buffer when done: +// +// void FunWithRings() { +// // Create ring with 3 flats +// CordRep* flat = CreateFlat("Hello"); +// CordRepRing* ring = CordRepRing::Create(flat, 2); +// ring = CordRepRing::Append(ring, CreateFlat(" ")); +// ring = CordRepRing::Append(ring, CreateFlat("world")); +// DoSomethingWithRing(ring); +// CordRep::Unref(ring); +// } +// +// Example of code Copying an existing ring buffer and modifying it: +// +// void MoreFunWithRings(CordRepRing* src) { +// CordRepRing* ring = CordRep::Ref(src)->ring(); +// ring = CordRepRing::Append(ring, CreateFlat("Hello")); +// ring = CordRepRing::Append(ring, CreateFlat(" ")); +// ring = CordRepRing::Append(ring, CreateFlat("world")); +// DoSomethingWithRing(ring); +// CordRep::Unref(ring); +// } +// +class CordRepRing : public CordRep { + public: + // `pos_type` represents a 'logical position'. A CordRepRing instance has a + // `begin_pos` (default 0), and each node inside the buffer will have an + // `end_pos` which is the `end_pos` of the previous node (or `begin_pos`) plus + // this node's length. The purpose is to allow for a binary search on this + // position, while allowing O(1) prepend and append operations. + using pos_type = size_t; + + // `index_type` is the type for the `head`, `tail` and `capacity` indexes. + // Ring buffers are limited to having no more than four billion entries. + using index_type = uint32_t; + + // `offset_type` is the type for the data offset inside a child rep's data. + using offset_type = uint32_t; + + // Position holds the node index and relative offset into the node for + // some physical offset in the contained data as returned by the Find() + // and FindTail() methods. + struct Position { + index_type index; + size_t offset; + }; + + // The maximum # of child nodes that can be hosted inside a CordRepRing. + static constexpr size_t kMaxCapacity = (std::numeric_limits<uint32_t>::max)(); + + // CordRepring can not be default constructed, moved, copied or assigned. + CordRepRing() = delete; + CordRepRing(const CordRepRing&) = delete; + CordRepRing& operator=(const CordRepRing&) = delete; + + // Returns true if this instance is valid, false if some or all of the + // invariants are broken. Intended for debug purposes only. + // `output` receives an explanation of the broken invariants. + bool IsValid(std::ostream& output) const; + + // Returns the size in bytes for a CordRepRing with `capacity' entries. + static constexpr size_t AllocSize(size_t capacity); + + // Returns the distance in bytes from `pos` to `end_pos`. + static constexpr size_t Distance(pos_type pos, pos_type end_pos); + + // Creates a new ring buffer from the provided `rep`. Adopts a reference + // on `rep`. The returned ring buffer has a capacity of at least `extra + 1` + static CordRepRing* Create(CordRep* child, size_t extra = 0); + + // `head`, `tail` and `capacity` indexes defining the ring buffer boundaries. + index_type head() const { return head_; } + index_type tail() const { return tail_; } + index_type capacity() const { return capacity_; } + + // Returns the number of entries in this instance. + index_type entries() const { return entries(head_, tail_); } + + // Returns the logical begin position of this instance. + pos_type begin_pos() const { return begin_pos_; } + + // Returns the number of entries for a given head-tail range. + // Requires `head` and `tail` values to be less than `capacity()`. + index_type entries(index_type head, index_type tail) const { + assert(head < capacity_ && tail < capacity_); + return tail - head + ((tail > head) ? 0 : capacity_); + } + + // Returns the logical end position of entry `index`. + pos_type const& entry_end_pos(index_type index) const { + assert(IsValidIndex(index)); + return Layout::Partial().Pointer<0>(data_)[index]; + } + + // Returns the child pointer of entry `index`. + CordRep* const& entry_child(index_type index) const { + assert(IsValidIndex(index)); + return Layout::Partial(capacity()).Pointer<1>(data_)[index]; + } + + // Returns the data offset of entry `index` + offset_type const& entry_data_offset(index_type index) const { + assert(IsValidIndex(index)); + return Layout::Partial(capacity(), capacity()).Pointer<2>(data_)[index]; + } + + // Appends the provided child node to the `rep` instance. + // Adopts a reference from `rep` and `child` which may not be null. + // If the provided child is a FLAT or EXTERNAL node, or a SUBSTRING node + // containing a FLAT or EXTERNAL node, then flat or external the node is added + // 'as is', with an offset added for the SUBSTRING case. + // If the provided child is a RING or CONCAT tree, or a SUBSTRING of a RING or + // CONCAT tree, then all child nodes not excluded by any start offset or + // length values are added recursively. + static CordRepRing* Append(CordRepRing* rep, CordRep* child); + + // Appends the provided string data to the `rep` instance. + // This function will attempt to utilize any remaining capacity in the last + // node of the input if that node is not shared (directly or indirectly), and + // of type FLAT. Remaining data will be added as one or more FLAT nodes. + // Any last node added to the ring buffer will be allocated with up to + // `extra` bytes of capacity for (anticipated) subsequent append actions. + static CordRepRing* Append(CordRepRing* rep, string_view data, + size_t extra = 0); + + // Prepends the provided child node to the `rep` instance. + // Adopts a reference from `rep` and `child` which may not be null. + // If the provided child is a FLAT or EXTERNAL node, or a SUBSTRING node + // containing a FLAT or EXTERNAL node, then flat or external the node is + // prepended 'as is', with an optional offset added for the SUBSTRING case. + // If the provided child is a RING or CONCAT tree, or a SUBSTRING of a RING + // or CONCAT tree, then all child nodes not excluded by any start offset or + // length values are added recursively. + static CordRepRing* Prepend(CordRepRing* rep, CordRep* child); + + // Prepends the provided string data to the `rep` instance. + // This function will attempt to utilize any remaining capacity in the first + // node of the input if that node is not shared (directly or indirectly), and + // of type FLAT. Remaining data will be added as one or more FLAT nodes. + // Any first node prepnded to the ring buffer will be allocated with up to + // `extra` bytes of capacity for (anticipated) subsequent prepend actions. + static CordRepRing* Prepend(CordRepRing* rep, string_view data, + size_t extra = 0); + + // Returns a span referencing potentially unused capacity in the last node. + // The returned span may be empty if no such capacity is available, or if the + // current instance is shared. Else, a span of size `n <= size` is returned. + // If non empty, the ring buffer is adjusted to the new length, with the newly + // added capacity left uninitialized. Callers should assign a value to the + // entire span before any other operations on this instance. + Span<char> GetAppendBuffer(size_t size); + + // Returns a span referencing potentially unused capacity in the first node. + // This function is identical to GetAppendBuffer except that it returns a span + // referencing up to `size` capacity directly before the existing data. + Span<char> GetPrependBuffer(size_t size); + + // Returns a cord ring buffer containing `len` bytes of data starting at + // `offset`. If the input is not shared, this function will remove all head + // and tail child nodes outside of the requested range, and adjust the new + // head and tail nodes as required. If the input is shared, this function + // returns a new instance sharing some or all of the nodes from the input. + static CordRepRing* SubRing(CordRepRing* r, size_t offset, size_t len, + size_t extra = 0); + + // Returns a cord ring buffer with the first `len` bytes removed. + // If the input is not shared, this function will remove all head child nodes + // fully inside the first `length` bytes, and adjust the new head as required. + // If the input is shared, this function returns a new instance sharing some + // or all of the nodes from the input. + static CordRepRing* RemoveSuffix(CordRepRing* r, size_t len, + size_t extra = 0); + + // Returns a cord ring buffer with the last `len` bytes removed. + // If the input is not shared, this function will remove all head child nodes + // fully inside the first `length` bytes, and adjust the new head as required. + // If the input is shared, this function returns a new instance sharing some + // or all of the nodes from the input. + static CordRepRing* RemovePrefix(CordRepRing* r, size_t len, + size_t extra = 0); + + // Returns the character at `offset`. Requires that `offset < length`. + char GetCharacter(size_t offset) const; + + // Returns true if this instance manages a single contiguous buffer, in which + // case the (optional) output parameter `fragment` is set. Otherwise, the + // function returns false, and `fragment` is left unchanged. + bool IsFlat(absl::string_view* fragment) const; + + // Returns true if the data starting at `offset` with length `len` is + // managed by this instance inside a single contiguous buffer, in which case + // the (optional) output parameter `fragment` is set to the contiguous memory + // starting at offset `offset` with length `length`. Otherwise, the function + // returns false, and `fragment` is left unchanged. + bool IsFlat(size_t offset, size_t len, absl::string_view* fragment) const; + + // Testing only: set capacity to requested capacity. + void SetCapacityForTesting(size_t capacity); + + // Returns the CordRep data pointer for the provided CordRep. + // Requires that the provided `rep` is either a FLAT or EXTERNAL CordRep. + static const char* GetLeafData(const CordRep* rep); + + // Returns the CordRep data pointer for the provided CordRep. + // Requires that `rep` is either a FLAT, EXTERNAL, or SUBSTRING CordRep. + static const char* GetRepData(const CordRep* rep); + + // Advances the provided position, wrapping around capacity as needed. + // Requires `index` < capacity() + inline index_type advance(index_type index) const; + + // Advances the provided position by 'n`, wrapping around capacity as needed. + // Requires `index` < capacity() and `n` <= capacity. + inline index_type advance(index_type index, index_type n) const; + + // Retreats the provided position, wrapping around 0 as needed. + // Requires `index` < capacity() + inline index_type retreat(index_type index) const; + + // Retreats the provided position by 'n', wrapping around 0 as needed. + // Requires `index` < capacity() + inline index_type retreat(index_type index, index_type n) const; + + // Returns the logical begin position of entry `index` + pos_type const& entry_begin_pos(index_type index) const { + return (index == head_) ? begin_pos_ : entry_end_pos(retreat(index)); + } + + // Returns the physical start offset of entry `index` + size_t entry_start_offset(index_type index) const { + return Distance(begin_pos_, entry_begin_pos(index)); + } + + // Returns the physical end offset of entry `index` + size_t entry_end_offset(index_type index) const { + return Distance(begin_pos_, entry_end_pos(index)); + } + + // Returns the data length for entry `index` + size_t entry_length(index_type index) const { + return Distance(entry_begin_pos(index), entry_end_pos(index)); + } + + // Returns the data for entry `index` + absl::string_view entry_data(index_type index) const; + + // Returns the position for `offset` as {index, prefix}. `index` holds the + // index of the entry at the specified offset and `prefix` holds the relative + // offset inside that entry. + // Requires `offset` < length. + // + // For example we can implement GetCharacter(offset) as: + // char GetCharacter(size_t offset) { + // Position pos = this->Find(offset); + // return this->entry_data(pos.pos)[pos.offset]; + // } + inline Position Find(size_t offset) const; + + // Find starting at `head` + inline Position Find(index_type head, size_t offset) const; + + // Returns the tail position for `offset` as {tail index, suffix}. + // `tail index` holds holds the index of the entry holding the offset directly + // before 'offset` advanced by one. 'suffix` holds the relative offset from + // that relative offset in the entry to the end of the entry. + // For example, FindTail(length) will return {tail(), 0}, FindTail(length - 5) + // will return {retreat(tail), 5)} provided the preceding entry contains at + // least 5 bytes of data. + // Requires offset >= 1 && offset <= length. + // + // This function is very useful in functions that need to clip the end of some + // ring buffer such as 'RemovePrefix'. + // For example, we could implement RemovePrefix for non shared instances as: + // void RemoveSuffix(size_t n) { + // Position pos = FindTail(length - n); + // UnrefEntries(pos.pos, this->tail_); + // this->tail_ = pos.pos; + // entry(retreat(pos.pos)).end_pos -= pos.offset; + // } + inline Position FindTail(size_t offset) const; + + // Find tail starting at `head` + inline Position FindTail(index_type head, size_t offset) const; + + // Invokes f(index_type index) for each entry inside the range [head, tail> + template <typename F> + void ForEach(index_type head, index_type tail, F&& f) const { + index_type n1 = (tail > head) ? tail : capacity_; + for (index_type i = head; i < n1; ++i) f(i); + if (tail <= head) { + for (index_type i = 0; i < tail; ++i) f(i); + } + } + + // Invokes f(index_type index) for each entry inside this instance. + template <typename F> + void ForEach(F&& f) const { + ForEach(head_, tail_, std::forward<F>(f)); + } + + // Dump this instance's data tp stream `s` in human readable format, excluding + // the actual data content itself. Intended for debug purposes only. + friend std::ostream& operator<<(std::ostream& s, const CordRepRing& rep); + + private: + enum class AddMode { kAppend, kPrepend }; + + using Layout = container_internal::Layout<pos_type, CordRep*, offset_type>; + + class Filler; + class Transaction; + class CreateTransaction; + + static constexpr size_t kLayoutAlignment = Layout::Partial().Alignment(); + + // Creates a new CordRepRing. + explicit CordRepRing(index_type capacity) : capacity_(capacity) {} + + // Returns true if `index` is a valid index into this instance. + bool IsValidIndex(index_type index) const; + + // Debug use only: validates the provided CordRepRing invariants. + // Verification of all CordRepRing methods can be enabled by defining + // EXTRA_CORD_RING_VALIDATION, i.e.: `--copts=-DEXTRA_CORD_RING_VALIDATION` + // Verification is VERY expensive, so only do it for debugging purposes. + static CordRepRing* Validate(CordRepRing* rep, const char* file = nullptr, + int line = 0); + + // Allocates a CordRepRing large enough to hold `capacity + extra' entries. + // The returned capacity may be larger if the allocated memory allows for it. + // The maximum capacity of a CordRepRing is capped at kMaxCapacity. + // Throws `std::length_error` if `capacity + extra' exceeds kMaxCapacity. + static CordRepRing* New(size_t capacity, size_t extra); + + // Deallocates (but does not destroy) the provided ring buffer. + static void Delete(CordRepRing* rep); + + // Destroys the provided ring buffer, decrementing the reference count of all + // contained child CordReps. The provided 1\`rep` should have a ref count of + // one (pre decrement destroy call observing `refcount.IsOne()`) or zero + // (post decrement destroy call observing `!refcount.Decrement()`). + static void Destroy(CordRepRing* rep); + + // Returns a mutable reference to the logical end position array. + pos_type* entry_end_pos() { + return Layout::Partial().Pointer<0>(data_); + } + + // Returns a mutable reference to the child pointer array. + CordRep** entry_child() { + return Layout::Partial(capacity()).Pointer<1>(data_); + } + + // Returns a mutable reference to the data offset array. + offset_type* entry_data_offset() { + return Layout::Partial(capacity(), capacity()).Pointer<2>(data_); + } + + // Find implementations for the non fast path 0 / length cases. + Position FindSlow(index_type head, size_t offset) const; + Position FindTailSlow(index_type head, size_t offset) const; + + // Finds the index of the first node that is inside a reasonable distance + // of the node at `offset` from which we can continue with a linear search. + template <bool wrap> + index_type FindBinary(index_type head, index_type tail, size_t offset) const; + + // Fills the current (initialized) instance from the provided source, copying + // entries [head, tail). Adds a reference to copied entries if `ref` is true. + template <bool ref> + void Fill(const CordRepRing* src, index_type head, index_type tail); + + // Create a copy of 'rep', copying all entries [head, tail), allocating room + // for `extra` entries. Adds a reference on all copied entries. + static CordRepRing* Copy(CordRepRing* rep, index_type head, index_type tail, + size_t extra = 0); + + // Returns a Mutable CordRepRing reference from `rep` with room for at least + // `extra` additional nodes. Adopts a reference count from `rep`. + // This function will return `rep` if, and only if: + // - rep.entries + extra <= rep.capacity + // - rep.refcount == 1 + // Otherwise, this function will create a new copy of `rep` with additional + // capacity to satisfy `extra` extra nodes, and unref the old `rep` instance. + // + // If a new CordRepRing can not be allocated, or the new capacity would exceed + // the maxmimum capacity, then the input is consumed only, and an exception is + // thrown. + static CordRepRing* Mutable(CordRepRing* rep, size_t extra); + + // Slow path for Append(CordRepRing* rep, CordRep* child). This function is + // exercised if the provided `child` in Append() is not a leaf node, i.e., a + // ring buffer or old (concat) cord tree. + static CordRepRing* AppendSlow(CordRepRing* rep, CordRep* child); + + // Appends the provided leaf node. Requires `child` to be FLAT or EXTERNAL. + static CordRepRing* AppendLeaf(CordRepRing* rep, CordRep* child, + size_t offset, size_t length); + + // Prepends the provided leaf node. Requires `child` to be FLAT or EXTERNAL. + static CordRepRing* PrependLeaf(CordRepRing* rep, CordRep* child, + size_t offset, size_t length); + + // Slow path for Prepend(CordRepRing* rep, CordRep* child). This function is + // exercised if the provided `child` in Prepend() is not a leaf node, i.e., a + // ring buffer or old (concat) cord tree. + static CordRepRing* PrependSlow(CordRepRing* rep, CordRep* child); + + // Slow path for Create(CordRep* child, size_t extra). This function is + // exercised if the provided `child` in Prepend() is not a leaf node, i.e., a + // ring buffer or old (concat) cord tree. + static CordRepRing* CreateSlow(CordRep* child, size_t extra); + + // Creates a new ring buffer from the provided `child` leaf node. Requires + // `child` to be FLAT or EXTERNAL. on `rep`. + // The returned ring buffer has a capacity of at least `1 + extra` + static CordRepRing* CreateFromLeaf(CordRep* child, size_t offset, + size_t length, size_t extra); + + // Appends or prepends (depending on AddMode) the ring buffer in `ring' to + // `rep` starting at `offset` with length `len`. + template <AddMode mode> + static CordRepRing* AddRing(CordRepRing* rep, CordRepRing* ring, + size_t offset, size_t len); + + // Increases the data offset for entry `index` by `n`. + void AddDataOffset(index_type index, size_t n); + + // Descreases the length for entry `index` by `n`. + void SubLength(index_type index, size_t n); + + index_type head_; + index_type tail_; + index_type capacity_; + pos_type begin_pos_; + + alignas(kLayoutAlignment) char data_[kLayoutAlignment]; + + friend struct CordRep; +}; + +constexpr size_t CordRepRing::AllocSize(size_t capacity) { + return sizeof(CordRepRing) - sizeof(data_) + + Layout(capacity, capacity, capacity).AllocSize(); +} + +inline constexpr size_t CordRepRing::Distance(pos_type pos, pos_type end_pos) { + return (end_pos - pos); +} + +inline const char* CordRepRing::GetLeafData(const CordRep* rep) { + return rep->tag != EXTERNAL ? rep->flat()->Data() : rep->external()->base; +} + +inline const char* CordRepRing::GetRepData(const CordRep* rep) { + if (rep->tag >= FLAT) return rep->flat()->Data(); + if (rep->tag == EXTERNAL) return rep->external()->base; + return GetLeafData(rep->substring()->child) + rep->substring()->start; +} + +inline CordRepRing::index_type CordRepRing::advance(index_type index) const { + assert(index < capacity_); + return ++index == capacity_ ? 0 : index; +} + +inline CordRepRing::index_type CordRepRing::advance(index_type index, + index_type n) const { + assert(index < capacity_ && n <= capacity_); + return (index += n) >= capacity_ ? index - capacity_ : index; +} + +inline CordRepRing::index_type CordRepRing::retreat(index_type index) const { + assert(index < capacity_); + return (index > 0 ? index : capacity_) - 1; +} + +inline CordRepRing::index_type CordRepRing::retreat(index_type index, + index_type n) const { + assert(index < capacity_ && n <= capacity_); + return index >= n ? index - n : capacity_ - n + index; +} + +inline absl::string_view CordRepRing::entry_data(index_type index) const { + size_t data_offset = entry_data_offset(index); + return {GetRepData(entry_child(index)) + data_offset, entry_length(index)}; +} + +inline bool CordRepRing::IsValidIndex(index_type index) const { + if (index >= capacity_) return false; + return (tail_ > head_) ? (index >= head_ && index < tail_) + : (index >= head_ || index < tail_); +} + +#ifndef EXTRA_CORD_RING_VALIDATION +inline CordRepRing* CordRepRing::Validate(CordRepRing* rep, + const char* /*file*/, int /*line*/) { + return rep; +} +#endif + +inline CordRepRing::Position CordRepRing::Find(size_t offset) const { + assert(offset < length); + return (offset == 0) ? Position{head_, 0} : FindSlow(head_, offset); +} + +inline CordRepRing::Position CordRepRing::Find(index_type head, + size_t offset) const { + assert(offset < length); + assert(IsValidIndex(head) && offset >= entry_start_offset(head)); + return (offset == 0) ? Position{head_, 0} : FindSlow(head, offset); +} + +inline CordRepRing::Position CordRepRing::FindTail(size_t offset) const { + assert(offset > 0 && offset <= length); + return (offset == length) ? Position{tail_, 0} : FindTailSlow(head_, offset); +} + +inline CordRepRing::Position CordRepRing::FindTail(index_type head, + size_t offset) const { + assert(offset > 0 && offset <= length); + assert(IsValidIndex(head) && offset >= entry_start_offset(head) + 1); + return (offset == length) ? Position{tail_, 0} : FindTailSlow(head, offset); +} + +// Now that CordRepRing is defined, we can define CordRep's helper casts: +inline CordRepRing* CordRep::ring() { + assert(IsRing()); + return static_cast<CordRepRing*>(this); +} + +inline const CordRepRing* CordRep::ring() const { + assert(IsRing()); + return static_cast<const CordRepRing*>(this); +} + +inline bool CordRepRing::IsFlat(absl::string_view* fragment) const { + if (entries() == 1) { + if (fragment) *fragment = entry_data(head()); + return true; + } + return false; +} + +inline bool CordRepRing::IsFlat(size_t offset, size_t len, + absl::string_view* fragment) const { + const Position pos = Find(offset); + const absl::string_view data = entry_data(pos.index); + if (data.length() >= len && data.length() - len >= pos.offset) { + if (fragment) *fragment = data.substr(pos.offset, len); + return true; + } + return false; +} + +std::ostream& operator<<(std::ostream& s, const CordRepRing& rep); + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_CORD_REP_RING_H_ diff --git a/third_party/abseil-cpp/absl/strings/internal/cord_rep_ring_reader.h b/third_party/abseil-cpp/absl/strings/internal/cord_rep_ring_reader.h new file mode 100644 index 0000000000..7ceeaa000e --- /dev/null +++ b/third_party/abseil-cpp/absl/strings/internal/cord_rep_ring_reader.h @@ -0,0 +1,118 @@ +// Copyright 2021 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_CORD_REP_RING_READER_H_ +#define ABSL_STRINGS_INTERNAL_CORD_REP_RING_READER_H_ + +#include <cassert> +#include <cstddef> +#include <cstdint> + +#include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/cord_rep_ring.h" +#include "absl/strings/string_view.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +// CordRepRingReader provides basic navigation over CordRepRing data. +class CordRepRingReader { + public: + // Returns true if this instance is not empty. + explicit operator bool() const { return ring_ != nullptr; } + + // Returns the ring buffer reference for this instance, or nullptr if empty. + CordRepRing* ring() const { return ring_; } + + // Returns the current node index inside the ring buffer for this instance. + // The returned value is undefined if this instance is empty. + CordRepRing::index_type index() const { return index_; } + + // Returns the current node inside the ring buffer for this instance. + // The returned value is undefined if this instance is empty. + CordRep* node() const { return ring_->entry_child(index_); } + + // Returns the length of the referenced ring buffer. + // Requires the current instance to be non empty. + size_t length() const { + assert(ring_); + return ring_->length; + } + + // Returns the end offset of the last navigated-to chunk, which represents the + // total bytes 'consumed' relative to the start of the ring. The returned + // value is never zero. For example, initializing a reader with a ring buffer + // with a first chunk of 19 bytes will return consumed() = 19. + // Requires the current instance to be non empty. + size_t consumed() const { + assert(ring_); + return ring_->entry_end_offset(index_); + } + + // Returns the number of bytes remaining beyond the last navigated-to chunk. + // Requires the current instance to be non empty. + size_t remaining() const { + assert(ring_); + return length() - consumed(); + } + + // Resets this instance to an empty value + void Reset() { ring_ = nullptr; } + + // Resets this instance to the start of `ring`. `ring` must not be null. + // Returns a reference into the first chunk of the provided ring. + absl::string_view Reset(CordRepRing* ring) { + assert(ring); + ring_ = ring; + index_ = ring_->head(); + return ring_->entry_data(index_); + } + + // Navigates to the next chunk inside the reference ring buffer. + // Returns a reference into the navigated-to chunk. + // Requires remaining() to be non zero. + absl::string_view Next() { + assert(remaining()); + index_ = ring_->advance(index_); + return ring_->entry_data(index_); + } + + // Navigates to the chunk at offset `offset`. + // Returns a reference into the navigated-to chunk, adjusted for the relative + // position of `offset` into that chunk. For example, calling Seek(13) on a + // ring buffer containing 2 chunks of 10 and 20 bytes respectively will return + // a string view into the second chunk starting at offset 3 with a size of 17. + // Requires `offset` to be less than `length()` + absl::string_view Seek(size_t offset) { + assert(offset < length()); + size_t current = ring_->entry_end_offset(index_); + CordRepRing::index_type hint = (offset >= current) ? index_ : ring_->head(); + const CordRepRing::Position head = ring_->Find(hint, offset); + index_ = head.index; + auto data = ring_->entry_data(head.index); + data.remove_prefix(head.offset); + return data; + } + + private: + CordRepRing* ring_ = nullptr; + CordRepRing::index_type index_; +}; + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_CORD_REP_RING_READER_H_ diff --git a/third_party/abseil-cpp/absl/strings/internal/cord_rep_test_util.h b/third_party/abseil-cpp/absl/strings/internal/cord_rep_test_util.h new file mode 100644 index 0000000000..ad828af2a5 --- /dev/null +++ b/third_party/abseil-cpp/absl/strings/internal/cord_rep_test_util.h @@ -0,0 +1,220 @@ +// Copyright 2021 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_CORD_REP_TEST_UTIL_H_ +#define ABSL_STRINGS_INTERNAL_CORD_REP_TEST_UTIL_H_ + +#include <cassert> +#include <memory> +#include <random> +#include <string> +#include <vector> + +#include "absl/base/config.h" +#include "absl/base/internal/raw_logging.h" +#include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/cord_rep_btree.h" +#include "absl/strings/internal/cord_rep_flat.h" +#include "absl/strings/string_view.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cordrep_testing { + +inline cord_internal::CordRepSubstring* MakeSubstring( + size_t start, size_t len, cord_internal::CordRep* rep) { + auto* sub = new cord_internal::CordRepSubstring; + sub->tag = cord_internal::SUBSTRING; + sub->start = start; + sub->length = len <= 0 ? rep->length - start + len : len; + sub->child = rep; + return sub; +} + +inline cord_internal::CordRepConcat* MakeConcat(cord_internal::CordRep* left, + cord_internal::CordRep* right, + int depth = 0) { + auto* concat = new cord_internal::CordRepConcat; + concat->tag = cord_internal::CONCAT; + concat->length = left->length + right->length; + concat->left = left; + concat->right = right; + concat->set_depth(depth); + return concat; +} + +inline cord_internal::CordRepFlat* MakeFlat(absl::string_view value) { + assert(value.length() <= cord_internal::kMaxFlatLength); + auto* flat = cord_internal::CordRepFlat::New(value.length()); + flat->length = value.length(); + memcpy(flat->Data(), value.data(), value.length()); + return flat; +} + +// Creates an external node for testing +inline cord_internal::CordRepExternal* MakeExternal(absl::string_view s) { + struct Rep : public cord_internal::CordRepExternal { + std::string s; + explicit Rep(absl::string_view sv) : s(sv) { + this->tag = cord_internal::EXTERNAL; + this->base = s.data(); + this->length = s.length(); + this->releaser_invoker = [](cord_internal::CordRepExternal* self) { + delete static_cast<Rep*>(self); + }; + } + }; + return new Rep(s); +} + +inline std::string CreateRandomString(size_t n) { + absl::string_view data = + "abcdefghijklmnopqrstuvwxyz" + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "0123456789~!@#$%^&*()_+=-<>?:\"{}[]|"; + std::minstd_rand rnd; + std::uniform_int_distribution<size_t> dist(0, data.size() - 1); + std::string s(n, ' '); + for (size_t i = 0; i < n; ++i) { + s[i] = data[dist(rnd)]; + } + return s; +} + +// Creates an array of flats from the provided string, chopping +// the provided string up into flats of size `chunk_size` characters +// resulting in roughly `data.size() / chunk_size` total flats. +inline std::vector<cord_internal::CordRep*> CreateFlatsFromString( + absl::string_view data, size_t chunk_size) { + assert(chunk_size > 0); + std::vector<cord_internal::CordRep*> flats; + for (absl::string_view s = data; !s.empty(); s.remove_prefix(chunk_size)) { + flats.push_back(MakeFlat(s.substr(0, chunk_size))); + } + return flats; +} + +inline cord_internal::CordRepBtree* CordRepBtreeFromFlats( + absl::Span<cord_internal::CordRep* const> flats) { + assert(!flats.empty()); + auto* node = cord_internal::CordRepBtree::Create(flats[0]); + for (size_t i = 1; i < flats.size(); ++i) { + node = cord_internal::CordRepBtree::Append(node, flats[i]); + } + return node; +} + +template <typename Fn> +inline void CordVisitReps(cord_internal::CordRep* rep, Fn&& fn) { + fn(rep); + while (rep->tag == cord_internal::SUBSTRING) { + rep = rep->substring()->child; + fn(rep); + } + if (rep->tag == cord_internal::BTREE) { + for (cord_internal::CordRep* edge : rep->btree()->Edges()) { + CordVisitReps(edge, fn); + } + } else if (rep->tag == cord_internal::CONCAT) { + CordVisitReps(rep->concat()->left, fn); + CordVisitReps(rep->concat()->right, fn); + } +} + +template <typename Predicate> +inline std::vector<cord_internal::CordRep*> CordCollectRepsIf( + Predicate&& predicate, cord_internal::CordRep* rep) { + std::vector<cord_internal::CordRep*> reps; + CordVisitReps(rep, [&reps, &predicate](cord_internal::CordRep* rep) { + if (predicate(rep)) reps.push_back(rep); + }); + return reps; +} + +inline std::vector<cord_internal::CordRep*> CordCollectReps( + cord_internal::CordRep* rep) { + std::vector<cord_internal::CordRep*> reps; + auto fn = [&reps](cord_internal::CordRep* rep) { reps.push_back(rep); }; + CordVisitReps(rep, fn); + return reps; +} + +inline void CordToString(cord_internal::CordRep* rep, std::string& s) { + size_t offset = 0; + size_t length = rep->length; + while (rep->tag == cord_internal::SUBSTRING) { + offset += rep->substring()->start; + rep = rep->substring()->child; + } + if (rep->tag == cord_internal::BTREE) { + for (cord_internal::CordRep* edge : rep->btree()->Edges()) { + CordToString(edge, s); + } + } else if (rep->tag >= cord_internal::FLAT) { + s.append(rep->flat()->Data() + offset, length); + } else if (rep->tag == cord_internal::EXTERNAL) { + s.append(rep->external()->base + offset, length); + } else { + ABSL_RAW_LOG(FATAL, "Unsupported tag %d", rep->tag); + } +} + +inline std::string CordToString(cord_internal::CordRep* rep) { + std::string s; + s.reserve(rep->length); + CordToString(rep, s); + return s; +} + +// RAII Helper class to automatically unref reps on destruction. +class AutoUnref { + public: + ~AutoUnref() { + for (CordRep* rep : unrefs_) CordRep::Unref(rep); + } + + // Adds `rep` to the list of reps to be unreffed at destruction. + template <typename CordRepType> + CordRepType* Add(CordRepType* rep) { + unrefs_.push_back(rep); + return rep; + } + + // Increments the reference count of `rep` by one, and adds it to + // the list of reps to be unreffed at destruction. + template <typename CordRepType> + CordRepType* Ref(CordRepType* rep) { + unrefs_.push_back(CordRep::Ref(rep)); + return rep; + } + + // Increments the reference count of `rep` by one if `condition` is true, + // and adds it to the list of reps to be unreffed at destruction. + template <typename CordRepType> + CordRepType* RefIf(bool condition, CordRepType* rep) { + if (condition) unrefs_.push_back(CordRep::Ref(rep)); + return rep; + } + + private: + using CordRep = absl::cord_internal::CordRep; + + std::vector<CordRep*> unrefs_; +}; + +} // namespace cordrep_testing +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_CORD_REP_TEST_UTIL_H_ diff --git a/third_party/abseil-cpp/absl/strings/internal/cordz_functions.cc b/third_party/abseil-cpp/absl/strings/internal/cordz_functions.cc new file mode 100644 index 0000000000..20d314f03c --- /dev/null +++ b/third_party/abseil-cpp/absl/strings/internal/cordz_functions.cc @@ -0,0 +1,96 @@ +// Copyright 2019 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/cordz_functions.h" + +#include <atomic> +#include <cmath> +#include <limits> +#include <random> + +#include "absl/base/attributes.h" +#include "absl/base/config.h" +#include "absl/base/internal/raw_logging.h" +#include "absl/profiling/internal/exponential_biased.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { +namespace { + +// The average interval until the next sample. A value of 0 disables profiling +// while a value of 1 will profile all Cords. +std::atomic<int> g_cordz_mean_interval(50000); + +} // namespace + +#ifdef ABSL_INTERNAL_CORDZ_ENABLED + +// Special negative 'not initialized' per thread value for cordz_next_sample. +static constexpr int64_t kInitCordzNextSample = -1; + +ABSL_CONST_INIT thread_local int64_t cordz_next_sample = kInitCordzNextSample; + +// kIntervalIfDisabled is the number of profile-eligible events need to occur +// before the code will confirm that cordz is still disabled. +constexpr int64_t kIntervalIfDisabled = 1 << 16; + +ABSL_ATTRIBUTE_NOINLINE bool cordz_should_profile_slow() { + + thread_local absl::profiling_internal::ExponentialBiased + exponential_biased_generator; + int32_t mean_interval = get_cordz_mean_interval(); + + // Check if we disabled profiling. If so, set the next sample to a "large" + // number to minimize the overhead of the should_profile codepath. + if (mean_interval <= 0) { + cordz_next_sample = kIntervalIfDisabled; + return false; + } + + // Check if we're always sampling. + if (mean_interval == 1) { + cordz_next_sample = 1; + return true; + } + + if (cordz_next_sample <= 0) { + // If first check on current thread, check cordz_should_profile() + // again using the created (initial) stride in cordz_next_sample. + const bool initialized = cordz_next_sample != kInitCordzNextSample; + cordz_next_sample = exponential_biased_generator.GetStride(mean_interval); + return initialized || cordz_should_profile(); + } + + --cordz_next_sample; + return false; +} + +void cordz_set_next_sample_for_testing(int64_t next_sample) { + cordz_next_sample = next_sample; +} + +#endif // ABSL_INTERNAL_CORDZ_ENABLED + +int32_t get_cordz_mean_interval() { + return g_cordz_mean_interval.load(std::memory_order_acquire); +} + +void set_cordz_mean_interval(int32_t mean_interval) { + g_cordz_mean_interval.store(mean_interval, std::memory_order_release); +} + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil-cpp/absl/strings/internal/cordz_functions.h b/third_party/abseil-cpp/absl/strings/internal/cordz_functions.h new file mode 100644 index 0000000000..c9ba14508a --- /dev/null +++ b/third_party/abseil-cpp/absl/strings/internal/cordz_functions.h @@ -0,0 +1,85 @@ +// Copyright 2019 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_CORDZ_FUNCTIONS_H_ +#define ABSL_STRINGS_CORDZ_FUNCTIONS_H_ + +#include <stdint.h> + +#include "absl/base/attributes.h" +#include "absl/base/config.h" +#include "absl/base/optimization.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +// Returns the current sample rate. This represents the average interval +// between samples. +int32_t get_cordz_mean_interval(); + +// Sets the sample rate with the average interval between samples. +void set_cordz_mean_interval(int32_t mean_interval); + +// Enable cordz unless any of the following applies: +// - no thread local support +// - MSVC build +// - Android build +// - Apple build +// - DLL build +// Hashtablez is turned off completely in opensource builds. +// MSVC's static atomics are dynamically initialized in debug mode, which breaks +// sampling. +#if defined(ABSL_HAVE_THREAD_LOCAL) && !defined(_MSC_VER) && \ + !defined(ABSL_BUILD_DLL) && !defined(ABSL_CONSUME_DLL) && \ + !defined(__ANDROID__) && !defined(__APPLE__) +#define ABSL_INTERNAL_CORDZ_ENABLED 1 +#endif + +#ifdef ABSL_INTERNAL_CORDZ_ENABLED + +// cordz_next_sample is the number of events until the next sample event. If +// the value is 1 or less, the code will check on the next event if cordz is +// enabled, and if so, will sample the Cord. cordz is only enabled when we can +// use thread locals. +ABSL_CONST_INIT extern thread_local int64_t cordz_next_sample; + +// Determines if the next sample should be profiled. If it is, the value pointed +// at by next_sample will be set with the interval until the next sample. +bool cordz_should_profile_slow(); + +// Returns true if the next cord should be sampled. +inline bool cordz_should_profile() { + if (ABSL_PREDICT_TRUE(cordz_next_sample > 1)) { + cordz_next_sample--; + return false; + } + return cordz_should_profile_slow(); +} + +// Sets the interval until the next sample (for testing only) +void cordz_set_next_sample_for_testing(int64_t next_sample); + +#else // ABSL_INTERNAL_CORDZ_ENABLED + +inline bool cordz_should_profile() { return false; } +inline void cordz_set_next_sample_for_testing(int64_t) {} + +#endif // ABSL_INTERNAL_CORDZ_ENABLED + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_CORDZ_FUNCTIONS_H_ diff --git a/third_party/abseil-cpp/absl/strings/internal/cordz_functions_test.cc b/third_party/abseil-cpp/absl/strings/internal/cordz_functions_test.cc new file mode 100644 index 0000000000..350623c1f3 --- /dev/null +++ b/third_party/abseil-cpp/absl/strings/internal/cordz_functions_test.cc @@ -0,0 +1,149 @@ +// Copyright 2019 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/cordz_functions.h" + +#include <thread> // NOLINT we need real clean new threads + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/base/config.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { +namespace { + +using ::testing::Eq; +using ::testing::Ge; +using ::testing::Le; + +TEST(CordzFunctionsTest, SampleRate) { + int32_t orig_sample_rate = get_cordz_mean_interval(); + int32_t expected_sample_rate = 123; + set_cordz_mean_interval(expected_sample_rate); + EXPECT_THAT(get_cordz_mean_interval(), Eq(expected_sample_rate)); + set_cordz_mean_interval(orig_sample_rate); +} + +// Cordz is disabled when we don't have thread_local. All calls to +// should_profile will return false when cordz is diabled, so we might want to +// avoid those tests. +#ifdef ABSL_INTERNAL_CORDZ_ENABLED + +TEST(CordzFunctionsTest, ShouldProfileDisable) { + int32_t orig_sample_rate = get_cordz_mean_interval(); + + set_cordz_mean_interval(0); + cordz_set_next_sample_for_testing(0); + EXPECT_FALSE(cordz_should_profile()); + // 1 << 16 is from kIntervalIfDisabled in cordz_functions.cc. + EXPECT_THAT(cordz_next_sample, Eq(1 << 16)); + + set_cordz_mean_interval(orig_sample_rate); +} + +TEST(CordzFunctionsTest, ShouldProfileAlways) { + int32_t orig_sample_rate = get_cordz_mean_interval(); + + set_cordz_mean_interval(1); + cordz_set_next_sample_for_testing(1); + EXPECT_TRUE(cordz_should_profile()); + EXPECT_THAT(cordz_next_sample, Le(1)); + + set_cordz_mean_interval(orig_sample_rate); +} + +TEST(CordzFunctionsTest, DoesNotAlwaysSampleFirstCord) { + // Set large enough interval such that the chance of 'tons' of threads + // randomly sampling the first call is infinitely small. + set_cordz_mean_interval(10000); + int tries = 0; + bool sampled = false; + do { + ++tries; + ASSERT_THAT(tries, Le(1000)); + std::thread thread([&sampled] { + sampled = cordz_should_profile(); + }); + thread.join(); + } while (sampled); +} + +TEST(CordzFunctionsTest, ShouldProfileRate) { + static constexpr int kDesiredMeanInterval = 1000; + static constexpr int kSamples = 10000; + int32_t orig_sample_rate = get_cordz_mean_interval(); + + set_cordz_mean_interval(kDesiredMeanInterval); + + int64_t sum_of_intervals = 0; + for (int i = 0; i < kSamples; i++) { + // Setting next_sample to 0 will force cordz_should_profile to generate a + // new value for next_sample each iteration. + cordz_set_next_sample_for_testing(0); + cordz_should_profile(); + sum_of_intervals += cordz_next_sample; + } + + // The sum of independent exponential variables is an Erlang distribution, + // which is a gamma distribution where the shape parameter is equal to the + // number of summands. The distribution used for cordz_should_profile is + // actually floor(Exponential(1/mean)) which introduces bias. However, we can + // apply the squint-really-hard correction factor. That is, when mean is + // large, then if we squint really hard the shape of the distribution between + // N and N+1 looks like a uniform distribution. On average, each value for + // next_sample will be about 0.5 lower than we would expect from an + // exponential distribution. This squint-really-hard correction approach won't + // work when mean is smaller than about 10 but works fine when mean is 1000. + // + // We can use R to calculate a confidence interval. This + // shows how to generate a confidence interval with a false positive rate of + // one in a billion. + // + // $ R -q + // > mean = 1000 + // > kSamples = 10000 + // > errorRate = 1e-9 + // > correction = -kSamples / 2 + // > low = qgamma(errorRate/2, kSamples, 1/mean) + correction + // > high = qgamma(1 - errorRate/2, kSamples, 1/mean) + correction + // > low + // [1] 9396115 + // > high + // [1] 10618100 + EXPECT_THAT(sum_of_intervals, Ge(9396115)); + EXPECT_THAT(sum_of_intervals, Le(10618100)); + + set_cordz_mean_interval(orig_sample_rate); +} + +#else // ABSL_INTERNAL_CORDZ_ENABLED + +TEST(CordzFunctionsTest, ShouldProfileDisabled) { + int32_t orig_sample_rate = get_cordz_mean_interval(); + + set_cordz_mean_interval(1); + cordz_set_next_sample_for_testing(0); + EXPECT_FALSE(cordz_should_profile()); + + set_cordz_mean_interval(orig_sample_rate); +} + +#endif // ABSL_INTERNAL_CORDZ_ENABLED + +} // namespace +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil-cpp/absl/strings/internal/cordz_handle.cc b/third_party/abseil-cpp/absl/strings/internal/cordz_handle.cc new file mode 100644 index 0000000000..a73fefed59 --- /dev/null +++ b/third_party/abseil-cpp/absl/strings/internal/cordz_handle.cc @@ -0,0 +1,139 @@ +// Copyright 2019 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "absl/strings/internal/cordz_handle.h" + +#include <atomic> + +#include "absl/base/internal/raw_logging.h" // For ABSL_RAW_CHECK +#include "absl/base/internal/spinlock.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +using ::absl::base_internal::SpinLockHolder; + +ABSL_CONST_INIT CordzHandle::Queue CordzHandle::global_queue_(absl::kConstInit); + +CordzHandle::CordzHandle(bool is_snapshot) : is_snapshot_(is_snapshot) { + if (is_snapshot) { + SpinLockHolder lock(&queue_->mutex); + CordzHandle* dq_tail = queue_->dq_tail.load(std::memory_order_acquire); + if (dq_tail != nullptr) { + dq_prev_ = dq_tail; + dq_tail->dq_next_ = this; + } + queue_->dq_tail.store(this, std::memory_order_release); + } +} + +CordzHandle::~CordzHandle() { + ODRCheck(); + if (is_snapshot_) { + std::vector<CordzHandle*> to_delete; + { + SpinLockHolder lock(&queue_->mutex); + CordzHandle* next = dq_next_; + if (dq_prev_ == nullptr) { + // We were head of the queue, delete every CordzHandle until we reach + // either the end of the list, or a snapshot handle. + while (next && !next->is_snapshot_) { + to_delete.push_back(next); + next = next->dq_next_; + } + } else { + // Another CordzHandle existed before this one, don't delete anything. + dq_prev_->dq_next_ = next; + } + if (next) { + next->dq_prev_ = dq_prev_; + } else { + queue_->dq_tail.store(dq_prev_, std::memory_order_release); + } + } + for (CordzHandle* handle : to_delete) { + delete handle; + } + } +} + +bool CordzHandle::SafeToDelete() const { + return is_snapshot_ || queue_->IsEmpty(); +} + +void CordzHandle::Delete(CordzHandle* handle) { + assert(handle); + if (handle) { + handle->ODRCheck(); + Queue* const queue = handle->queue_; + if (!handle->SafeToDelete()) { + SpinLockHolder lock(&queue->mutex); + CordzHandle* dq_tail = queue->dq_tail.load(std::memory_order_acquire); + if (dq_tail != nullptr) { + handle->dq_prev_ = dq_tail; + dq_tail->dq_next_ = handle; + queue->dq_tail.store(handle, std::memory_order_release); + return; + } + } + delete handle; + } +} + +std::vector<const CordzHandle*> CordzHandle::DiagnosticsGetDeleteQueue() { + std::vector<const CordzHandle*> handles; + SpinLockHolder lock(&global_queue_.mutex); + CordzHandle* dq_tail = global_queue_.dq_tail.load(std::memory_order_acquire); + for (const CordzHandle* p = dq_tail; p; p = p->dq_prev_) { + handles.push_back(p); + } + return handles; +} + +bool CordzHandle::DiagnosticsHandleIsSafeToInspect( + const CordzHandle* handle) const { + ODRCheck(); + if (!is_snapshot_) return false; + if (handle == nullptr) return true; + if (handle->is_snapshot_) return false; + bool snapshot_found = false; + SpinLockHolder lock(&queue_->mutex); + for (const CordzHandle* p = queue_->dq_tail; p; p = p->dq_prev_) { + if (p == handle) return !snapshot_found; + if (p == this) snapshot_found = true; + } + ABSL_ASSERT(snapshot_found); // Assert that 'this' is in delete queue. + return true; +} + +std::vector<const CordzHandle*> +CordzHandle::DiagnosticsGetSafeToInspectDeletedHandles() { + ODRCheck(); + std::vector<const CordzHandle*> handles; + if (!is_snapshot()) { + return handles; + } + + SpinLockHolder lock(&queue_->mutex); + for (const CordzHandle* p = dq_next_; p != nullptr; p = p->dq_next_) { + if (!p->is_snapshot()) { + handles.push_back(p); + } + } + return handles; +} + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil-cpp/absl/strings/internal/cordz_handle.h b/third_party/abseil-cpp/absl/strings/internal/cordz_handle.h new file mode 100644 index 0000000000..5df53c782a --- /dev/null +++ b/third_party/abseil-cpp/absl/strings/internal/cordz_handle.h @@ -0,0 +1,131 @@ +// Copyright 2019 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_CORDZ_HANDLE_H_ +#define ABSL_STRINGS_CORDZ_HANDLE_H_ + +#include <atomic> +#include <vector> + +#include "absl/base/config.h" +#include "absl/base/internal/raw_logging.h" +#include "absl/base/internal/spinlock.h" +#include "absl/synchronization/mutex.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +// This base class allows multiple types of object (CordzInfo and +// CordzSampleToken) to exist simultaneously on the delete queue (pointed to by +// global_dq_tail and traversed using dq_prev_ and dq_next_). The +// delete queue guarantees that once a profiler creates a CordzSampleToken and +// has gained visibility into a CordzInfo object, that CordzInfo object will not +// be deleted prematurely. This allows the profiler to inspect all CordzInfo +// objects that are alive without needing to hold a global lock. +class CordzHandle { + public: + CordzHandle() : CordzHandle(false) {} + + bool is_snapshot() const { return is_snapshot_; } + + // Returns true if this instance is safe to be deleted because it is either a + // snapshot, which is always safe to delete, or not included in the global + // delete queue and thus not included in any snapshot. + // Callers are responsible for making sure this instance can not be newly + // discovered by other threads. For example, CordzInfo instances first de-list + // themselves from the global CordzInfo list before determining if they are + // safe to be deleted directly. + // If SafeToDelete returns false, callers MUST use the Delete() method to + // safely queue CordzHandle instances for deletion. + bool SafeToDelete() const; + + // Deletes the provided instance, or puts it on the delete queue to be deleted + // once there are no more sample tokens (snapshot) instances potentially + // referencing the instance. `handle` should not be null. + static void Delete(CordzHandle* handle); + + // Returns the current entries in the delete queue in LIFO order. + static std::vector<const CordzHandle*> DiagnosticsGetDeleteQueue(); + + // Returns true if the provided handle is nullptr or guarded by this handle. + // Since the CordzSnapshot token is itself a CordzHandle, this method will + // allow tests to check if that token is keeping an arbitrary CordzHandle + // alive. + bool DiagnosticsHandleIsSafeToInspect(const CordzHandle* handle) const; + + // Returns the current entries in the delete queue, in LIFO order, that are + // protected by this. CordzHandle objects are only placed on the delete queue + // after CordzHandle::Delete is called with them as an argument. Only + // CordzHandle objects that are not also CordzSnapshot objects will be + // included in the return vector. For each of the handles in the return + // vector, the earliest that their memory can be freed is when this + // CordzSnapshot object is deleted. + std::vector<const CordzHandle*> DiagnosticsGetSafeToInspectDeletedHandles(); + + protected: + explicit CordzHandle(bool is_snapshot); + virtual ~CordzHandle(); + + private: + // Global queue data. CordzHandle stores a pointer to the global queue + // instance to harden against ODR violations. + struct Queue { + constexpr explicit Queue(absl::ConstInitType) + : mutex(absl::kConstInit, + absl::base_internal::SCHEDULE_COOPERATIVE_AND_KERNEL) {} + + absl::base_internal::SpinLock mutex; + std::atomic<CordzHandle*> dq_tail ABSL_GUARDED_BY(mutex){nullptr}; + + // Returns true if this delete queue is empty. This method does not acquire + // the lock, but does a 'load acquire' observation on the delete queue tail. + // It is used inside Delete() to check for the presence of a delete queue + // without holding the lock. The assumption is that the caller is in the + // state of 'being deleted', and can not be newly discovered by a concurrent + // 'being constructed' snapshot instance. Practically, this means that any + // such discovery (`find`, 'first' or 'next', etc) must have proper 'happens + // before / after' semantics and atomic fences. + bool IsEmpty() const ABSL_NO_THREAD_SAFETY_ANALYSIS { + return dq_tail.load(std::memory_order_acquire) == nullptr; + } + }; + + void ODRCheck() const { +#ifndef NDEBUG + ABSL_RAW_CHECK(queue_ == &global_queue_, "ODR violation in Cord"); +#endif + } + + ABSL_CONST_INIT static Queue global_queue_; + Queue* const queue_ = &global_queue_; + const bool is_snapshot_; + + // dq_prev_ and dq_next_ require the global queue mutex to be held. + // Unfortunately we can't use thread annotations such that the thread safety + // analysis understands that queue_ and global_queue_ are one and the same. + CordzHandle* dq_prev_ = nullptr; + CordzHandle* dq_next_ = nullptr; +}; + +class CordzSnapshot : public CordzHandle { + public: + CordzSnapshot() : CordzHandle(true) {} +}; + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_CORDZ_HANDLE_H_ diff --git a/third_party/abseil-cpp/absl/strings/internal/cordz_handle_test.cc b/third_party/abseil-cpp/absl/strings/internal/cordz_handle_test.cc new file mode 100644 index 0000000000..fd68e06b3e --- /dev/null +++ b/third_party/abseil-cpp/absl/strings/internal/cordz_handle_test.cc @@ -0,0 +1,265 @@ +// Copyright 2019 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "absl/strings/internal/cordz_handle.h" + +#include <random> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/memory/memory.h" +#include "absl/synchronization/internal/thread_pool.h" +#include "absl/synchronization/notification.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { +namespace { + +using ::testing::ElementsAre; +using ::testing::Gt; +using ::testing::IsEmpty; +using ::testing::SizeIs; + +// Local less verbose helper +std::vector<const CordzHandle*> DeleteQueue() { + return CordzHandle::DiagnosticsGetDeleteQueue(); +} + +struct CordzHandleDeleteTracker : public CordzHandle { + bool* deleted; + explicit CordzHandleDeleteTracker(bool* deleted) : deleted(deleted) {} + ~CordzHandleDeleteTracker() override { *deleted = true; } +}; + +TEST(CordzHandleTest, DeleteQueueIsEmpty) { + EXPECT_THAT(DeleteQueue(), SizeIs(0)); +} + +TEST(CordzHandleTest, CordzHandleCreateDelete) { + bool deleted = false; + auto* handle = new CordzHandleDeleteTracker(&deleted); + EXPECT_FALSE(handle->is_snapshot()); + EXPECT_TRUE(handle->SafeToDelete()); + EXPECT_THAT(DeleteQueue(), SizeIs(0)); + + CordzHandle::Delete(handle); + EXPECT_THAT(DeleteQueue(), SizeIs(0)); + EXPECT_TRUE(deleted); +} + +TEST(CordzHandleTest, CordzSnapshotCreateDelete) { + auto* snapshot = new CordzSnapshot(); + EXPECT_TRUE(snapshot->is_snapshot()); + EXPECT_TRUE(snapshot->SafeToDelete()); + EXPECT_THAT(DeleteQueue(), ElementsAre(snapshot)); + delete snapshot; + EXPECT_THAT(DeleteQueue(), SizeIs(0)); +} + +TEST(CordzHandleTest, CordzHandleCreateDeleteWithSnapshot) { + bool deleted = false; + auto* snapshot = new CordzSnapshot(); + auto* handle = new CordzHandleDeleteTracker(&deleted); + EXPECT_FALSE(handle->SafeToDelete()); + + CordzHandle::Delete(handle); + EXPECT_THAT(DeleteQueue(), ElementsAre(handle, snapshot)); + EXPECT_FALSE(deleted); + EXPECT_FALSE(handle->SafeToDelete()); + + delete snapshot; + EXPECT_THAT(DeleteQueue(), SizeIs(0)); + EXPECT_TRUE(deleted); +} + +TEST(CordzHandleTest, MultiSnapshot) { + bool deleted[3] = {false, false, false}; + + CordzSnapshot* snapshot[3]; + CordzHandleDeleteTracker* handle[3]; + for (int i = 0; i < 3; ++i) { + snapshot[i] = new CordzSnapshot(); + handle[i] = new CordzHandleDeleteTracker(&deleted[i]); + CordzHandle::Delete(handle[i]); + } + + EXPECT_THAT(DeleteQueue(), ElementsAre(handle[2], snapshot[2], handle[1], + snapshot[1], handle[0], snapshot[0])); + EXPECT_THAT(deleted, ElementsAre(false, false, false)); + + delete snapshot[1]; + EXPECT_THAT(DeleteQueue(), ElementsAre(handle[2], snapshot[2], handle[1], + handle[0], snapshot[0])); + EXPECT_THAT(deleted, ElementsAre(false, false, false)); + + delete snapshot[0]; + EXPECT_THAT(DeleteQueue(), ElementsAre(handle[2], snapshot[2])); + EXPECT_THAT(deleted, ElementsAre(true, true, false)); + + delete snapshot[2]; + EXPECT_THAT(DeleteQueue(), SizeIs(0)); + EXPECT_THAT(deleted, ElementsAre(true, true, deleted)); +} + +TEST(CordzHandleTest, DiagnosticsHandleIsSafeToInspect) { + CordzSnapshot snapshot1; + EXPECT_TRUE(snapshot1.DiagnosticsHandleIsSafeToInspect(nullptr)); + + auto* handle1 = new CordzHandle(); + EXPECT_TRUE(snapshot1.DiagnosticsHandleIsSafeToInspect(handle1)); + + CordzHandle::Delete(handle1); + EXPECT_TRUE(snapshot1.DiagnosticsHandleIsSafeToInspect(handle1)); + + CordzSnapshot snapshot2; + auto* handle2 = new CordzHandle(); + EXPECT_TRUE(snapshot1.DiagnosticsHandleIsSafeToInspect(handle1)); + EXPECT_TRUE(snapshot1.DiagnosticsHandleIsSafeToInspect(handle2)); + EXPECT_FALSE(snapshot2.DiagnosticsHandleIsSafeToInspect(handle1)); + EXPECT_TRUE(snapshot2.DiagnosticsHandleIsSafeToInspect(handle2)); + + CordzHandle::Delete(handle2); + EXPECT_TRUE(snapshot1.DiagnosticsHandleIsSafeToInspect(handle1)); +} + +TEST(CordzHandleTest, DiagnosticsGetSafeToInspectDeletedHandles) { + EXPECT_THAT(DeleteQueue(), IsEmpty()); + + auto* handle = new CordzHandle(); + auto* snapshot1 = new CordzSnapshot(); + + // snapshot1 should be able to see handle. + EXPECT_THAT(DeleteQueue(), ElementsAre(snapshot1)); + EXPECT_TRUE(snapshot1->DiagnosticsHandleIsSafeToInspect(handle)); + EXPECT_THAT(snapshot1->DiagnosticsGetSafeToInspectDeletedHandles(), + IsEmpty()); + + // This handle will be safe to inspect as long as snapshot1 is alive. However, + // since only snapshot1 can prove that it's alive, it will be hidden from + // snapshot2. + CordzHandle::Delete(handle); + + // This snapshot shouldn't be able to see handle because handle was already + // sent to Delete. + auto* snapshot2 = new CordzSnapshot(); + + // DeleteQueue elements are LIFO order. + EXPECT_THAT(DeleteQueue(), ElementsAre(snapshot2, handle, snapshot1)); + + EXPECT_TRUE(snapshot1->DiagnosticsHandleIsSafeToInspect(handle)); + EXPECT_FALSE(snapshot2->DiagnosticsHandleIsSafeToInspect(handle)); + + EXPECT_THAT(snapshot1->DiagnosticsGetSafeToInspectDeletedHandles(), + ElementsAre(handle)); + EXPECT_THAT(snapshot2->DiagnosticsGetSafeToInspectDeletedHandles(), + IsEmpty()); + + CordzHandle::Delete(snapshot1); + EXPECT_THAT(DeleteQueue(), ElementsAre(snapshot2)); + + CordzHandle::Delete(snapshot2); + EXPECT_THAT(DeleteQueue(), IsEmpty()); +} + +// Create and delete CordzHandle and CordzSnapshot objects in multiple threads +// so that tsan has some time to chew on it and look for memory problems. +TEST(CordzHandleTest, MultiThreaded) { + Notification stop; + static constexpr int kNumThreads = 4; + // Keep the number of handles relatively small so that the test will naturally + // transition to an empty delete queue during the test. If there are, say, 100 + // handles, that will virtually never happen. With 10 handles and around 50k + // iterations in each of 4 threads, the delete queue appears to become empty + // around 200 times. + static constexpr int kNumHandles = 10; + + // Each thread is going to pick a random index and atomically swap its + // CordzHandle with one in handles. This way, each thread can avoid + // manipulating a CordzHandle that might be operated upon in another thread. + std::vector<std::atomic<CordzHandle*>> handles(kNumHandles); + + // global bool which is set when any thread did get some 'safe to inspect' + // handles. On some platforms and OSS tests, we might risk that some pool + // threads are starved, stalled, or just got a few unlikely random 'handle' + // coin tosses, so we satisfy this test with simply observing 'some' thread + // did something meaningful, which should minimize the potential for flakes. + std::atomic<bool> found_safe_to_inspect(false); + + { + absl::synchronization_internal::ThreadPool pool(kNumThreads); + for (int i = 0; i < kNumThreads; ++i) { + pool.Schedule([&stop, &handles, &found_safe_to_inspect]() { + std::minstd_rand gen; + std::uniform_int_distribution<int> dist_type(0, 2); + std::uniform_int_distribution<int> dist_handle(0, kNumHandles - 1); + + while (!stop.HasBeenNotified()) { + CordzHandle* handle; + switch (dist_type(gen)) { + case 0: + handle = new CordzHandle(); + break; + case 1: + handle = new CordzSnapshot(); + break; + default: + handle = nullptr; + break; + } + CordzHandle* old_handle = handles[dist_handle(gen)].exchange(handle); + if (old_handle != nullptr) { + std::vector<const CordzHandle*> safe_to_inspect = + old_handle->DiagnosticsGetSafeToInspectDeletedHandles(); + for (const CordzHandle* handle : safe_to_inspect) { + // We're in a tight loop, so don't generate too many error + // messages. + ASSERT_FALSE(handle->is_snapshot()); + } + if (!safe_to_inspect.empty()) { + found_safe_to_inspect.store(true); + } + CordzHandle::Delete(old_handle); + } + } + + // Have each thread attempt to clean up everything. Some thread will be + // the last to reach this cleanup code, and it will be guaranteed to + // clean up everything because nothing remains to create new handles. + for (auto& h : handles) { + if (CordzHandle* handle = h.exchange(nullptr)) { + CordzHandle::Delete(handle); + } + } + }); + } + + // The threads will hammer away. Give it a little bit of time for tsan to + // spot errors. + absl::SleepFor(absl::Seconds(3)); + stop.Notify(); + } + + // Confirm that the test did *something*. This check will be satisfied as + // long as any thread has deleted a CordzSnapshot object and a non-snapshot + // CordzHandle was deleted after the CordzSnapshot was created. + // See also comments on `found_safe_to_inspect` + EXPECT_TRUE(found_safe_to_inspect.load()); +} + +} // namespace +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil-cpp/absl/strings/internal/cordz_info.cc b/third_party/abseil-cpp/absl/strings/internal/cordz_info.cc new file mode 100644 index 0000000000..5c18bbc566 --- /dev/null +++ b/third_party/abseil-cpp/absl/strings/internal/cordz_info.cc @@ -0,0 +1,445 @@ +// Copyright 2019 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/cordz_info.h" + +#include "absl/base/config.h" +#include "absl/base/internal/spinlock.h" +#include "absl/container/inlined_vector.h" +#include "absl/debugging/stacktrace.h" +#include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/cord_rep_btree.h" +#include "absl/strings/internal/cord_rep_ring.h" +#include "absl/strings/internal/cordz_handle.h" +#include "absl/strings/internal/cordz_statistics.h" +#include "absl/strings/internal/cordz_update_tracker.h" +#include "absl/synchronization/mutex.h" +#include "absl/types/span.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +using ::absl::base_internal::SpinLockHolder; + +constexpr int CordzInfo::kMaxStackDepth; + +ABSL_CONST_INIT CordzInfo::List CordzInfo::global_list_{absl::kConstInit}; + +namespace { + +// CordRepAnalyzer performs the analysis of a cord. +// +// It computes absolute node counts and total memory usage, and an 'estimated +// fair share memory usage` statistic. +// Conceptually, it divides the 'memory usage' at each location in the 'cord +// graph' by the cumulative reference count of that location. The cumulative +// reference count is the factored total of all edges leading into that node. +// +// The top level node is treated specially: we assume the current thread +// (typically called from the CordzHandler) to hold a reference purely to +// perform a safe analysis, and not being part of the application. So we +// substract 1 from the reference count of the top node to compute the +// 'application fair share' excluding the reference of the current thread. +// +// An example of fair sharing, and why we multiply reference counts: +// Assume we have 2 CordReps, both being a Substring referencing a Flat: +// CordSubstring A (refcount = 5) --> child Flat C (refcount = 2) +// CordSubstring B (refcount = 9) --> child Flat C (refcount = 2) +// +// Flat C has 2 incoming edges from the 2 substrings (refcount = 2) and is not +// referenced directly anywhere else. Translated into a 'fair share', we then +// attribute 50% of the memory (memory / refcount = 2) to each incoming edge. +// Rep A has a refcount of 5, so we attribute each incoming edge 1 / 5th of the +// memory cost below it, i.e.: the fair share of Rep A of the memory used by C +// is then 'memory C / (refcount C * refcount A) + (memory A / refcount A)'. +// It is also easy to see how all incoming edges add up to 100%. +class CordRepAnalyzer { + public: + // Creates an analyzer instance binding to `statistics`. + explicit CordRepAnalyzer(CordzStatistics& statistics) + : statistics_(statistics) {} + + // Analyzes the memory statistics and node counts for the provided `rep`, and + // adds the results to `statistics`. Note that node counts and memory sizes + // are not initialized, computed values are added to any existing values. + void AnalyzeCordRep(const CordRep* rep) { + // Process all linear nodes. + // As per the class comments, use refcout - 1 on the top level node, as the + // top level node is assumed to be referenced only for analysis purposes. + size_t refcount = rep->refcount.Get(); + RepRef repref{rep, (refcount > 1) ? refcount - 1 : 1}; + + // Process all top level linear nodes (substrings and flats). + repref = CountLinearReps(repref, memory_usage_); + + if (repref.rep != nullptr) { + if (repref.rep->tag == RING) { + AnalyzeRing(repref); + } else if (repref.rep->tag == BTREE) { + AnalyzeBtree(repref); + } else if (repref.rep->tag == CONCAT) { + AnalyzeConcat(repref); + } else { + // We should have either a concat, btree, or ring node if not null. + assert(false); + } + } + + // Adds values to output + statistics_.estimated_memory_usage += memory_usage_.total; + statistics_.estimated_fair_share_memory_usage += + static_cast<size_t>(memory_usage_.fair_share); + } + + private: + // RepRef identifies a CordRep* inside the Cord tree with its cumulative + // refcount including itself. For example, a tree consisting of a substring + // with a refcount of 3 and a child flat with a refcount of 4 will have RepRef + // refcounts of 3 and 12 respectively. + struct RepRef { + const CordRep* rep; + size_t refcount; + + // Returns a 'child' RepRef which contains the cumulative reference count of + // this instance multiplied by the child's reference count. + RepRef Child(const CordRep* child) const { + return RepRef{child, refcount * child->refcount.Get()}; + } + }; + + // Memory usage values + struct MemoryUsage { + size_t total = 0; + double fair_share = 0.0; + + // Adds 'size` memory usage to this class, with a cumulative (recursive) + // reference count of `refcount` + void Add(size_t size, size_t refcount) { + total += size; + fair_share += static_cast<double>(size) / refcount; + } + }; + + // Returns `rr` if `rr.rep` is not null and a CONCAT type. + // Asserts that `rr.rep` is a concat node or null. + static RepRef AssertConcat(RepRef repref) { + const CordRep* rep = repref.rep; + assert(rep == nullptr || rep->tag == CONCAT); + return (rep != nullptr && rep->tag == CONCAT) ? repref : RepRef{nullptr, 0}; + } + + // Counts a flat of the provide allocated size + void CountFlat(size_t size) { + statistics_.node_count++; + statistics_.node_counts.flat++; + if (size <= 64) { + statistics_.node_counts.flat_64++; + } else if (size <= 128) { + statistics_.node_counts.flat_128++; + } else if (size <= 256) { + statistics_.node_counts.flat_256++; + } else if (size <= 512) { + statistics_.node_counts.flat_512++; + } else if (size <= 1024) { + statistics_.node_counts.flat_1k++; + } + } + + // Processes 'linear' reps (substring, flat, external) not requiring iteration + // or recursion. Returns RefRep{null} if all reps were processed, else returns + // the top-most non-linear concat or ring cordrep. + // Node counts are updated into `statistics_`, memory usage is update into + // `memory_usage`, which typically references `memory_usage_` except for ring + // buffers where we count children unrounded. + RepRef CountLinearReps(RepRef rep, MemoryUsage& memory_usage) { + // Consume all substrings + while (rep.rep->tag == SUBSTRING) { + statistics_.node_count++; + statistics_.node_counts.substring++; + memory_usage.Add(sizeof(CordRepSubstring), rep.refcount); + rep = rep.Child(rep.rep->substring()->child); + } + + // Consume possible FLAT + if (rep.rep->tag >= FLAT) { + size_t size = rep.rep->flat()->AllocatedSize(); + CountFlat(size); + memory_usage.Add(size, rep.refcount); + return RepRef{nullptr, 0}; + } + + // Consume possible external + if (rep.rep->tag == EXTERNAL) { + statistics_.node_count++; + statistics_.node_counts.external++; + size_t size = rep.rep->length + sizeof(CordRepExternalImpl<intptr_t>); + memory_usage.Add(size, rep.refcount); + return RepRef{nullptr, 0}; + } + + return rep; + } + + // Analyzes the provided concat node in a flattened recursive way. + void AnalyzeConcat(RepRef rep) { + absl::InlinedVector<RepRef, 47> pending; + + while (rep.rep != nullptr) { + const CordRepConcat* concat = rep.rep->concat(); + RepRef left = rep.Child(concat->left); + RepRef right = rep.Child(concat->right); + + statistics_.node_count++; + statistics_.node_counts.concat++; + memory_usage_.Add(sizeof(CordRepConcat), rep.refcount); + + right = AssertConcat(CountLinearReps(right, memory_usage_)); + rep = AssertConcat(CountLinearReps(left, memory_usage_)); + if (rep.rep != nullptr) { + if (right.rep != nullptr) { + pending.push_back(right); + } + } else if (right.rep != nullptr) { + rep = right; + } else if (!pending.empty()) { + rep = pending.back(); + pending.pop_back(); + } + } + } + + // Analyzes the provided ring. + void AnalyzeRing(RepRef rep) { + statistics_.node_count++; + statistics_.node_counts.ring++; + const CordRepRing* ring = rep.rep->ring(); + memory_usage_.Add(CordRepRing::AllocSize(ring->capacity()), rep.refcount); + ring->ForEach([&](CordRepRing::index_type pos) { + CountLinearReps(rep.Child(ring->entry_child(pos)), memory_usage_); + }); + } + + // Analyzes the provided btree. + void AnalyzeBtree(RepRef rep) { + statistics_.node_count++; + statistics_.node_counts.btree++; + memory_usage_.Add(sizeof(CordRepBtree), rep.refcount); + const CordRepBtree* tree = rep.rep->btree(); + if (tree->height() > 0) { + for (CordRep* edge : tree->Edges()) { + AnalyzeBtree(rep.Child(edge)); + } + } else { + for (CordRep* edge : tree->Edges()) { + CountLinearReps(rep.Child(edge), memory_usage_); + } + } + } + + CordzStatistics& statistics_; + MemoryUsage memory_usage_; +}; + +} // namespace + +CordzInfo* CordzInfo::Head(const CordzSnapshot& snapshot) { + ABSL_ASSERT(snapshot.is_snapshot()); + + // We can do an 'unsafe' load of 'head', as we are guaranteed that the + // instance it points to is kept alive by the provided CordzSnapshot, so we + // can simply return the current value using an acquire load. + // We do enforce in DEBUG builds that the 'head' value is present in the + // delete queue: ODR violations may lead to 'snapshot' and 'global_list_' + // being in different libraries / modules. + CordzInfo* head = global_list_.head.load(std::memory_order_acquire); + ABSL_ASSERT(snapshot.DiagnosticsHandleIsSafeToInspect(head)); + return head; +} + +CordzInfo* CordzInfo::Next(const CordzSnapshot& snapshot) const { + ABSL_ASSERT(snapshot.is_snapshot()); + + // Similar to the 'Head()' function, we do not need a mutex here. + CordzInfo* next = ci_next_.load(std::memory_order_acquire); + ABSL_ASSERT(snapshot.DiagnosticsHandleIsSafeToInspect(this)); + ABSL_ASSERT(snapshot.DiagnosticsHandleIsSafeToInspect(next)); + return next; +} + +void CordzInfo::TrackCord(InlineData& cord, MethodIdentifier method) { + assert(cord.is_tree()); + assert(!cord.is_profiled()); + CordzInfo* cordz_info = new CordzInfo(cord.as_tree(), nullptr, method); + cord.set_cordz_info(cordz_info); + cordz_info->Track(); +} + +void CordzInfo::TrackCord(InlineData& cord, const InlineData& src, + MethodIdentifier method) { + assert(cord.is_tree()); + assert(src.is_tree()); + + // Unsample current as we the current cord is being replaced with 'src', + // so any method history is no longer relevant. + CordzInfo* cordz_info = cord.cordz_info(); + if (cordz_info != nullptr) cordz_info->Untrack(); + + // Start new cord sample + cordz_info = new CordzInfo(cord.as_tree(), src.cordz_info(), method); + cord.set_cordz_info(cordz_info); + cordz_info->Track(); +} + +void CordzInfo::MaybeTrackCordImpl(InlineData& cord, const InlineData& src, + MethodIdentifier method) { + if (src.is_profiled()) { + TrackCord(cord, src, method); + } else if (cord.is_profiled()) { + cord.cordz_info()->Untrack(); + cord.clear_cordz_info(); + } +} + +CordzInfo::MethodIdentifier CordzInfo::GetParentMethod(const CordzInfo* src) { + if (src == nullptr) return MethodIdentifier::kUnknown; + return src->parent_method_ != MethodIdentifier::kUnknown ? src->parent_method_ + : src->method_; +} + +int CordzInfo::FillParentStack(const CordzInfo* src, void** stack) { + assert(stack); + if (src == nullptr) return 0; + if (src->parent_stack_depth_) { + memcpy(stack, src->parent_stack_, src->parent_stack_depth_ * sizeof(void*)); + return src->parent_stack_depth_; + } + memcpy(stack, src->stack_, src->stack_depth_ * sizeof(void*)); + return src->stack_depth_; +} + +CordzInfo::CordzInfo(CordRep* rep, const CordzInfo* src, + MethodIdentifier method) + : rep_(rep), + stack_depth_(absl::GetStackTrace(stack_, /*max_depth=*/kMaxStackDepth, + /*skip_count=*/1)), + parent_stack_depth_(FillParentStack(src, parent_stack_)), + method_(method), + parent_method_(GetParentMethod(src)), + create_time_(absl::Now()) { + update_tracker_.LossyAdd(method); + if (src) { + // Copy parent counters. + update_tracker_.LossyAdd(src->update_tracker_); + } +} + +CordzInfo::~CordzInfo() { + // `rep_` is potentially kept alive if CordzInfo is included + // in a collection snapshot (which should be rare). + if (ABSL_PREDICT_FALSE(rep_)) { + CordRep::Unref(rep_); + } +} + +void CordzInfo::Track() { + SpinLockHolder l(&list_->mutex); + + CordzInfo* const head = list_->head.load(std::memory_order_acquire); + if (head != nullptr) { + head->ci_prev_.store(this, std::memory_order_release); + } + ci_next_.store(head, std::memory_order_release); + list_->head.store(this, std::memory_order_release); +} + +void CordzInfo::Untrack() { + ODRCheck(); + { + SpinLockHolder l(&list_->mutex); + + CordzInfo* const head = list_->head.load(std::memory_order_acquire); + CordzInfo* const next = ci_next_.load(std::memory_order_acquire); + CordzInfo* const prev = ci_prev_.load(std::memory_order_acquire); + + if (next) { + ABSL_ASSERT(next->ci_prev_.load(std::memory_order_acquire) == this); + next->ci_prev_.store(prev, std::memory_order_release); + } + if (prev) { + ABSL_ASSERT(head != this); + ABSL_ASSERT(prev->ci_next_.load(std::memory_order_acquire) == this); + prev->ci_next_.store(next, std::memory_order_release); + } else { + ABSL_ASSERT(head == this); + list_->head.store(next, std::memory_order_release); + } + } + + // We can no longer be discovered: perform a fast path check if we are not + // listed on any delete queue, so we can directly delete this instance. + if (SafeToDelete()) { + UnsafeSetCordRep(nullptr); + delete this; + return; + } + + // We are likely part of a snapshot, extend the life of the CordRep + { + absl::MutexLock lock(&mutex_); + if (rep_) CordRep::Ref(rep_); + } + CordzHandle::Delete(this); +} + +void CordzInfo::Lock(MethodIdentifier method) + ABSL_EXCLUSIVE_LOCK_FUNCTION(mutex_) { + mutex_.Lock(); + update_tracker_.LossyAdd(method); + assert(rep_); +} + +void CordzInfo::Unlock() ABSL_UNLOCK_FUNCTION(mutex_) { + bool tracked = rep_ != nullptr; + mutex_.Unlock(); + if (!tracked) { + Untrack(); + } +} + +absl::Span<void* const> CordzInfo::GetStack() const { + return absl::MakeConstSpan(stack_, stack_depth_); +} + +absl::Span<void* const> CordzInfo::GetParentStack() const { + return absl::MakeConstSpan(parent_stack_, parent_stack_depth_); +} + +CordzStatistics CordzInfo::GetCordzStatistics() const { + CordzStatistics stats; + stats.method = method_; + stats.parent_method = parent_method_; + stats.update_tracker = update_tracker_; + if (CordRep* rep = RefCordRep()) { + stats.size = rep->length; + CordRepAnalyzer analyzer(stats); + analyzer.AnalyzeCordRep(rep); + CordRep::Unref(rep); + } + return stats; +} + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil-cpp/absl/strings/internal/cordz_info.h b/third_party/abseil-cpp/absl/strings/internal/cordz_info.h new file mode 100644 index 0000000000..026d5b9981 --- /dev/null +++ b/third_party/abseil-cpp/absl/strings/internal/cordz_info.h @@ -0,0 +1,298 @@ +// Copyright 2019 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_CORDZ_INFO_H_ +#define ABSL_STRINGS_CORDZ_INFO_H_ + +#include <atomic> +#include <cstdint> +#include <functional> + +#include "absl/base/config.h" +#include "absl/base/internal/raw_logging.h" +#include "absl/base/internal/spinlock.h" +#include "absl/base/thread_annotations.h" +#include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/cordz_functions.h" +#include "absl/strings/internal/cordz_handle.h" +#include "absl/strings/internal/cordz_statistics.h" +#include "absl/strings/internal/cordz_update_tracker.h" +#include "absl/synchronization/mutex.h" +#include "absl/types/span.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +// CordzInfo tracks a profiled Cord. Each of these objects can be in two places. +// If a Cord is alive, the CordzInfo will be in the global_cordz_infos map, and +// can also be retrieved via the linked list starting with +// global_cordz_infos_head and continued via the cordz_info_next() method. When +// a Cord has reached the end of its lifespan, the CordzInfo object will be +// migrated out of the global_cordz_infos list and the global_cordz_infos_map, +// and will either be deleted or appended to the global_delete_queue. If it is +// placed on the global_delete_queue, the CordzInfo object will be cleaned in +// the destructor of a CordzSampleToken object. +class ABSL_LOCKABLE CordzInfo : public CordzHandle { + public: + using MethodIdentifier = CordzUpdateTracker::MethodIdentifier; + + // TrackCord creates a CordzInfo instance which tracks important metrics of + // a sampled cord, and stores the created CordzInfo instance into `cord'. All + // CordzInfo instances are placed in a global list which is used to discover + // and snapshot all actively tracked cords. Callers are responsible for + // calling UntrackCord() before the tracked Cord instance is deleted, or to + // stop tracking the sampled Cord. Callers are also responsible for guarding + // changes to the 'tree' value of a Cord (InlineData.tree) through the Lock() + // and Unlock() calls. Any change resulting in a new tree value for the cord + // requires a call to SetCordRep() before the old tree has been unreffed + // and/or deleted. `method` identifies the Cord public API method initiating + // the cord to be sampled. + // Requires `cord` to hold a tree, and `cord.cordz_info()` to be null. + static void TrackCord(InlineData& cord, MethodIdentifier method); + + // Identical to TrackCord(), except that this function fills the + // `parent_stack` and `parent_method` properties of the returned CordzInfo + // instance from the provided `src` instance if `src` is sampled. + // This function should be used for sampling 'copy constructed' and 'copy + // assigned' cords. This function allows 'cord` to be already sampled, in + // which case the CordzInfo will be newly created from `src`. + static void TrackCord(InlineData& cord, const InlineData& src, + MethodIdentifier method); + + // Maybe sample the cord identified by 'cord' for method 'method'. + // Uses `cordz_should_profile` to randomly pick cords to be sampled, and if + // so, invokes `TrackCord` to start sampling `cord`. + static void MaybeTrackCord(InlineData& cord, MethodIdentifier method); + + // Maybe sample the cord identified by 'cord' for method 'method'. + // `src` identifies a 'parent' cord which is assigned to `cord`, typically the + // input cord for a copy constructor, or an assign method such as `operator=` + // `cord` will be sampled if (and only if) `src` is sampled. + // If `cord` is currently being sampled and `src` is not being sampled, then + // this function will stop sampling the cord and reset the cord's cordz_info. + // + // Previously this function defined that `cord` will be sampled if either + // `src` is sampled, or if `cord` is randomly picked for sampling. However, + // this can cause issues, as there may be paths where some cord is assigned an + // indirect copy of it's own value. As such a 'string of copies' would then + // remain sampled (`src.is_profiled`), then assigning such a cord back to + // 'itself' creates a cycle where the cord will converge to 'always sampled`. + // + // For example: + // + // Cord x; + // for (...) { + // // Copy ctor --> y.is_profiled := x.is_profiled | random(...) + // Cord y = x; + // ... + // // Assign x = y --> x.is_profiled = y.is_profiled | random(...) + // // ==> x.is_profiled |= random(...) + // // ==> x converges to 'always profiled' + // x = y; + // } + static void MaybeTrackCord(InlineData& cord, const InlineData& src, + MethodIdentifier method); + + // Stops tracking changes for a sampled cord, and deletes the provided info. + // This function must be called before the sampled cord instance is deleted, + // and before the root cordrep of the sampled cord is unreffed. + // This function may extend the lifetime of the cordrep in cases where the + // CordInfo instance is being held by a concurrent collection thread. + void Untrack(); + + // Invokes UntrackCord() on `info` if `info` is not null. + static void MaybeUntrackCord(CordzInfo* info); + + CordzInfo() = delete; + CordzInfo(const CordzInfo&) = delete; + CordzInfo& operator=(const CordzInfo&) = delete; + + // Retrieves the oldest existing CordzInfo. + static CordzInfo* Head(const CordzSnapshot& snapshot) + ABSL_NO_THREAD_SAFETY_ANALYSIS; + + // Retrieves the next oldest existing CordzInfo older than 'this' instance. + CordzInfo* Next(const CordzSnapshot& snapshot) const + ABSL_NO_THREAD_SAFETY_ANALYSIS; + + // Locks this instance for the update identified by `method`. + // Increases the count for `method` in `update_tracker`. + void Lock(MethodIdentifier method) ABSL_EXCLUSIVE_LOCK_FUNCTION(mutex_); + + // Unlocks this instance. If the contained `rep` has been set to null + // indicating the Cord has been cleared or is otherwise no longer sampled, + // then this method will delete this CordzInfo instance. + void Unlock() ABSL_UNLOCK_FUNCTION(mutex_); + + // Asserts that this CordzInfo instance is locked. + void AssertHeld() ABSL_ASSERT_EXCLUSIVE_LOCK(mutex_); + + // Updates the `rep` property of this instance. This methods is invoked by + // Cord logic each time the root node of a sampled Cord changes, and before + // the old root reference count is deleted. This guarantees that collection + // code can always safely take a reference on the tracked cord. + // Requires a lock to be held through the `Lock()` method. + // TODO(b/117940323): annotate with ABSL_EXCLUSIVE_LOCKS_REQUIRED once all + // Cord code is in a state where this can be proven true by the compiler. + void SetCordRep(CordRep* rep); + + // Returns the current `rep` property of this instance with a reference + // added, or null if this instance represents a cord that has since been + // deleted or untracked. + CordRep* RefCordRep() const ABSL_LOCKS_EXCLUDED(mutex_); + + // Returns the current value of `rep_` for testing purposes only. + CordRep* GetCordRepForTesting() const ABSL_NO_THREAD_SAFETY_ANALYSIS { + return rep_; + } + + // Sets the current value of `rep_` for testing purposes only. + void SetCordRepForTesting(CordRep* rep) ABSL_NO_THREAD_SAFETY_ANALYSIS { + rep_ = rep; + } + + // Returns the stack trace for where the cord was first sampled. Cords are + // potentially sampled when they promote from an inlined cord to a tree or + // ring representation, which is not necessarily the location where the cord + // was first created. Some cords are created as inlined cords, and only as + // data is added do they become a non-inlined cord. However, typically the + // location represents reasonably well where the cord is 'created'. + absl::Span<void* const> GetStack() const; + + // Returns the stack trace for a sampled cord's 'parent stack trace'. This + // value may be set if the cord is sampled (promoted) after being created + // from, or being assigned the value of an existing (sampled) cord. + absl::Span<void* const> GetParentStack() const; + + // Retrieves the CordzStatistics associated with this Cord. The statistics + // are only updated when a Cord goes through a mutation, such as an Append + // or RemovePrefix. + CordzStatistics GetCordzStatistics() const; + + private: + using SpinLock = absl::base_internal::SpinLock; + using SpinLockHolder = ::absl::base_internal::SpinLockHolder; + + // Global cordz info list. CordzInfo stores a pointer to the global list + // instance to harden against ODR violations. + struct List { + constexpr explicit List(absl::ConstInitType) + : mutex(absl::kConstInit, + absl::base_internal::SCHEDULE_COOPERATIVE_AND_KERNEL) {} + + SpinLock mutex; + std::atomic<CordzInfo*> head ABSL_GUARDED_BY(mutex){nullptr}; + }; + + static constexpr int kMaxStackDepth = 64; + + explicit CordzInfo(CordRep* rep, const CordzInfo* src, + MethodIdentifier method); + ~CordzInfo() override; + + // Sets `rep_` without holding a lock. + void UnsafeSetCordRep(CordRep* rep) ABSL_NO_THREAD_SAFETY_ANALYSIS; + + void Track(); + + // Returns the parent method from `src`, which is either `parent_method_` or + // `method_` depending on `parent_method_` being kUnknown. + // Returns kUnknown if `src` is null. + static MethodIdentifier GetParentMethod(const CordzInfo* src); + + // Fills the provided stack from `src`, copying either `parent_stack_` or + // `stack_` depending on `parent_stack_` being empty, returning the size of + // the parent stack. + // Returns 0 if `src` is null. + static int FillParentStack(const CordzInfo* src, void** stack); + + void ODRCheck() const { +#ifndef NDEBUG + ABSL_RAW_CHECK(list_ == &global_list_, "ODR violation in Cord"); +#endif + } + + // Non-inlined implementation of `MaybeTrackCord`, which is executed if + // either `src` is sampled or `cord` is sampled, and either untracks or + // tracks `cord` as documented per `MaybeTrackCord`. + static void MaybeTrackCordImpl(InlineData& cord, const InlineData& src, + MethodIdentifier method); + + ABSL_CONST_INIT static List global_list_; + List* const list_ = &global_list_; + + // ci_prev_ and ci_next_ require the global list mutex to be held. + // Unfortunately we can't use thread annotations such that the thread safety + // analysis understands that list_ and global_list_ are one and the same. + std::atomic<CordzInfo*> ci_prev_{nullptr}; + std::atomic<CordzInfo*> ci_next_{nullptr}; + + mutable absl::Mutex mutex_; + CordRep* rep_ ABSL_GUARDED_BY(mutex_); + + void* stack_[kMaxStackDepth]; + void* parent_stack_[kMaxStackDepth]; + const int stack_depth_; + const int parent_stack_depth_; + const MethodIdentifier method_; + const MethodIdentifier parent_method_; + CordzUpdateTracker update_tracker_; + const absl::Time create_time_; +}; + +inline ABSL_ATTRIBUTE_ALWAYS_INLINE void CordzInfo::MaybeTrackCord( + InlineData& cord, MethodIdentifier method) { + if (ABSL_PREDICT_FALSE(cordz_should_profile())) { + TrackCord(cord, method); + } +} + +inline ABSL_ATTRIBUTE_ALWAYS_INLINE void CordzInfo::MaybeTrackCord( + InlineData& cord, const InlineData& src, MethodIdentifier method) { + if (ABSL_PREDICT_FALSE(InlineData::is_either_profiled(cord, src))) { + MaybeTrackCordImpl(cord, src, method); + } +} + +inline ABSL_ATTRIBUTE_ALWAYS_INLINE void CordzInfo::MaybeUntrackCord( + CordzInfo* info) { + if (ABSL_PREDICT_FALSE(info)) { + info->Untrack(); + } +} + +inline void CordzInfo::AssertHeld() ABSL_ASSERT_EXCLUSIVE_LOCK(mutex_) { +#ifndef NDEBUG + mutex_.AssertHeld(); +#endif +} + +inline void CordzInfo::SetCordRep(CordRep* rep) { + AssertHeld(); + rep_ = rep; +} + +inline void CordzInfo::UnsafeSetCordRep(CordRep* rep) { rep_ = rep; } + +inline CordRep* CordzInfo::RefCordRep() const ABSL_LOCKS_EXCLUDED(mutex_) { + MutexLock lock(&mutex_); + return rep_ ? CordRep::Ref(rep_) : nullptr; +} + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_CORDZ_INFO_H_ diff --git a/third_party/abseil-cpp/absl/strings/internal/cordz_info_statistics_test.cc b/third_party/abseil-cpp/absl/strings/internal/cordz_info_statistics_test.cc new file mode 100644 index 0000000000..7430d281ca --- /dev/null +++ b/third_party/abseil-cpp/absl/strings/internal/cordz_info_statistics_test.cc @@ -0,0 +1,625 @@ +// Copyright 2021 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <iostream> +#include <random> +#include <vector> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/base/config.h" +#include "absl/strings/cord.h" +#include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/cord_rep_btree.h" +#include "absl/strings/internal/cord_rep_flat.h" +#include "absl/strings/internal/cord_rep_ring.h" +#include "absl/strings/internal/cordz_info.h" +#include "absl/strings/internal/cordz_sample_token.h" +#include "absl/strings/internal/cordz_statistics.h" +#include "absl/strings/internal/cordz_update_scope.h" +#include "absl/strings/internal/cordz_update_tracker.h" +#include "absl/synchronization/internal/thread_pool.h" +#include "absl/synchronization/notification.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +// Do not print statistics contents, the matcher prints them as needed. +inline void PrintTo(const CordzStatistics& stats, std::ostream* s) { + if (s) *s << "CordzStatistics{...}"; +} + +namespace { + +using ::testing::Ge; + +// Creates a flat of the specified allocated size +CordRepFlat* Flat(size_t size) { + // Round up to a tag size, as we are going to poke an exact tag size back into + // the allocated flat. 'size returning allocators' could grant us more than we + // wanted, but we are ok to poke the 'requested' size in the tag, even in the + // presence of sized deletes, so we need to make sure the size rounds + // perfectly to a tag value. + assert(size >= kMinFlatSize); + size = RoundUpForTag(size); + CordRepFlat* flat = CordRepFlat::New(size - kFlatOverhead); + flat->tag = AllocatedSizeToTag(size); + flat->length = size - kFlatOverhead; + return flat; +} + +// Creates an external of the specified length +CordRepExternal* External(int length = 512) { + return static_cast<CordRepExternal*>( + NewExternalRep(absl::string_view("", length), [](absl::string_view) {})); +} + +// Creates a substring on the provided rep of length - 1 +CordRepSubstring* Substring(CordRep* rep) { + auto* substring = new CordRepSubstring; + substring->length = rep->length - 1; + substring->tag = SUBSTRING; + substring->child = rep; + return substring; +} + +// Creates a concat on the provided reps +CordRepConcat* Concat(CordRep* left, CordRep* right) { + auto* concat = new CordRepConcat; + concat->length = left->length + right->length; + concat->tag = CONCAT; + concat->left = left; + concat->right = right; + return concat; +} + +// Reference count helper +struct RefHelper { + std::vector<CordRep*> refs; + + ~RefHelper() { + for (CordRep* rep : refs) { + CordRep::Unref(rep); + } + } + + // Invokes CordRep::Unref() on `rep` when this instance is destroyed. + template <typename T> + T* NeedsUnref(T* rep) { + refs.push_back(rep); + return rep; + } + + // Adds `n` reference counts to `rep` which will be unreffed when this + // instance is destroyed. + template <typename T> + T* Ref(T* rep, size_t n = 1) { + while (n--) { + NeedsUnref(CordRep::Ref(rep)); + } + return rep; + } +}; + +// Sizeof helper. Returns the allocated size of `p`, excluding any child +// elements for substring, concat and ring cord reps. +template <typename T> +size_t SizeOf(const T* rep) { + return sizeof(T); +} + +template <> +size_t SizeOf(const CordRepFlat* rep) { + return rep->AllocatedSize(); +} + +template <> +size_t SizeOf(const CordRepExternal* rep) { + // See cord.cc + return sizeof(CordRepExternalImpl<intptr_t>) + rep->length; +} + +template <> +size_t SizeOf(const CordRepRing* rep) { + return CordRepRing::AllocSize(rep->capacity()); +} + +// Computes fair share memory used in a naive 'we dare to recurse' way. +double FairShareImpl(CordRep* rep, size_t ref) { + double self = 0.0, children = 0.0; + ref *= rep->refcount.Get(); + if (rep->tag >= FLAT) { + self = SizeOf(rep->flat()); + } else if (rep->tag == EXTERNAL) { + self = SizeOf(rep->external()); + } else if (rep->tag == SUBSTRING) { + self = SizeOf(rep->substring()); + children = FairShareImpl(rep->substring()->child, ref); + } else if (rep->tag == BTREE) { + self = SizeOf(rep->btree()); + for (CordRep*edge : rep->btree()->Edges()) { + children += FairShareImpl(edge, ref); + } + } else if (rep->tag == RING) { + self = SizeOf(rep->ring()); + rep->ring()->ForEach([&](CordRepRing::index_type i) { + self += FairShareImpl(rep->ring()->entry_child(i), 1); + }); + } else if (rep->tag == CONCAT) { + self = SizeOf(rep->concat()); + children = FairShareImpl(rep->concat()->left, ref) + + FairShareImpl(rep->concat()->right, ref); + } else { + assert(false); + } + return self / ref + children; +} + +// Returns the fair share memory size from `ShareFhareImpl()` as a size_t. +size_t FairShare(CordRep* rep, size_t ref = 1) { + return static_cast<size_t>(FairShareImpl(rep, ref)); +} + +// Samples the cord and returns CordzInfo::GetStatistics() +CordzStatistics SampleCord(CordRep* rep) { + InlineData cord(rep); + CordzInfo::TrackCord(cord, CordzUpdateTracker::kUnknown); + CordzStatistics stats = cord.cordz_info()->GetCordzStatistics(); + cord.cordz_info()->Untrack(); + return stats; +} + +MATCHER_P(EqStatistics, stats, "Statistics equal expected values") { + bool ok = true; + +#define STATS_MATCHER_EXPECT_EQ(member) \ + if (stats.member != arg.member) { \ + *result_listener << "\n stats." << #member \ + << ": actual = " << arg.member << ", expected " \ + << stats.member; \ + ok = false; \ + } + + STATS_MATCHER_EXPECT_EQ(size); + STATS_MATCHER_EXPECT_EQ(node_count); + STATS_MATCHER_EXPECT_EQ(node_counts.flat); + STATS_MATCHER_EXPECT_EQ(node_counts.flat_64); + STATS_MATCHER_EXPECT_EQ(node_counts.flat_128); + STATS_MATCHER_EXPECT_EQ(node_counts.flat_256); + STATS_MATCHER_EXPECT_EQ(node_counts.flat_512); + STATS_MATCHER_EXPECT_EQ(node_counts.flat_1k); + STATS_MATCHER_EXPECT_EQ(node_counts.external); + STATS_MATCHER_EXPECT_EQ(node_counts.concat); + STATS_MATCHER_EXPECT_EQ(node_counts.substring); + STATS_MATCHER_EXPECT_EQ(node_counts.ring); + STATS_MATCHER_EXPECT_EQ(node_counts.btree); + STATS_MATCHER_EXPECT_EQ(estimated_memory_usage); + STATS_MATCHER_EXPECT_EQ(estimated_fair_share_memory_usage); + +#undef STATS_MATCHER_EXPECT_EQ + + return ok; +} + +TEST(CordzInfoStatisticsTest, Flat) { + RefHelper ref; + auto* flat = ref.NeedsUnref(Flat(512)); + + CordzStatistics expected; + expected.size = flat->length; + expected.estimated_memory_usage = SizeOf(flat); + expected.estimated_fair_share_memory_usage = expected.estimated_memory_usage; + expected.node_count = 1; + expected.node_counts.flat = 1; + expected.node_counts.flat_512 = 1; + + EXPECT_THAT(SampleCord(flat), EqStatistics(expected)); +} + +TEST(CordzInfoStatisticsTest, SharedFlat) { + RefHelper ref; + auto* flat = ref.Ref(ref.NeedsUnref(Flat(64))); + + CordzStatistics expected; + expected.size = flat->length; + expected.estimated_memory_usage = SizeOf(flat); + expected.estimated_fair_share_memory_usage = SizeOf(flat) / 2; + expected.node_count = 1; + expected.node_counts.flat = 1; + expected.node_counts.flat_64 = 1; + + EXPECT_THAT(SampleCord(flat), EqStatistics(expected)); +} + +TEST(CordzInfoStatisticsTest, External) { + RefHelper ref; + auto* external = ref.NeedsUnref(External()); + + CordzStatistics expected; + expected.size = external->length; + expected.estimated_memory_usage = SizeOf(external); + expected.estimated_fair_share_memory_usage = SizeOf(external); + expected.node_count = 1; + expected.node_counts.external = 1; + + EXPECT_THAT(SampleCord(external), EqStatistics(expected)); +} + +TEST(CordzInfoStatisticsTest, SharedExternal) { + RefHelper ref; + auto* external = ref.Ref(ref.NeedsUnref(External())); + + CordzStatistics expected; + expected.size = external->length; + expected.estimated_memory_usage = SizeOf(external); + expected.estimated_fair_share_memory_usage = SizeOf(external) / 2; + expected.node_count = 1; + expected.node_counts.external = 1; + + EXPECT_THAT(SampleCord(external), EqStatistics(expected)); +} + +TEST(CordzInfoStatisticsTest, Substring) { + RefHelper ref; + auto* flat = Flat(1024); + auto* substring = ref.NeedsUnref(Substring(flat)); + + CordzStatistics expected; + expected.size = substring->length; + expected.estimated_memory_usage = SizeOf(substring) + SizeOf(flat); + expected.estimated_fair_share_memory_usage = expected.estimated_memory_usage; + expected.node_count = 2; + expected.node_counts.flat = 1; + expected.node_counts.flat_1k = 1; + expected.node_counts.substring = 1; + + EXPECT_THAT(SampleCord(substring), EqStatistics(expected)); +} + +TEST(CordzInfoStatisticsTest, SharedSubstring) { + RefHelper ref; + auto* flat = ref.Ref(Flat(511), 2); + auto* substring = ref.Ref(ref.NeedsUnref(Substring(flat))); + + CordzStatistics expected; + expected.size = substring->length; + expected.estimated_memory_usage = SizeOf(flat) + SizeOf(substring); + expected.estimated_fair_share_memory_usage = + SizeOf(substring) / 2 + SizeOf(flat) / 6; + expected.node_count = 2; + expected.node_counts.flat = 1; + expected.node_counts.flat_512 = 1; + expected.node_counts.substring = 1; + + EXPECT_THAT(SampleCord(substring), EqStatistics(expected)); +} + +TEST(CordzInfoStatisticsTest, Concat) { + RefHelper ref; + auto* flat1 = Flat(300); + auto* flat2 = Flat(2000); + auto* concat = ref.NeedsUnref(Concat(flat1, flat2)); + + CordzStatistics expected; + expected.size = concat->length; + expected.estimated_memory_usage = + SizeOf(concat) + SizeOf(flat1) + SizeOf(flat2); + expected.estimated_fair_share_memory_usage = expected.estimated_memory_usage; + expected.node_count = 3; + expected.node_counts.flat = 2; + expected.node_counts.flat_512 = 1; + expected.node_counts.concat = 1; + + EXPECT_THAT(SampleCord(concat), EqStatistics(expected)); +} + +TEST(CordzInfoStatisticsTest, DeepConcat) { + RefHelper ref; + auto* flat1 = Flat(300); + auto* flat2 = Flat(2000); + auto* flat3 = Flat(400); + auto* external = External(3000); + auto* substring = Substring(external); + auto* concat1 = Concat(flat1, flat2); + auto* concat2 = Concat(flat3, substring); + auto* concat = ref.NeedsUnref(Concat(concat1, concat2)); + + CordzStatistics expected; + expected.size = concat->length; + expected.estimated_memory_usage = SizeOf(concat) * 3 + SizeOf(flat1) + + SizeOf(flat2) + SizeOf(flat3) + + SizeOf(external) + SizeOf(substring); + expected.estimated_fair_share_memory_usage = expected.estimated_memory_usage; + + expected.node_count = 8; + expected.node_counts.flat = 3; + expected.node_counts.flat_512 = 2; + expected.node_counts.external = 1; + expected.node_counts.concat = 3; + expected.node_counts.substring = 1; + + EXPECT_THAT(SampleCord(concat), EqStatistics(expected)); +} + +TEST(CordzInfoStatisticsTest, DeepSharedConcat) { + RefHelper ref; + auto* flat1 = Flat(40); + auto* flat2 = ref.Ref(Flat(2000), 4); + auto* flat3 = Flat(70); + auto* external = ref.Ref(External(3000)); + auto* substring = ref.Ref(Substring(external), 3); + auto* concat1 = Concat(flat1, flat2); + auto* concat2 = Concat(flat3, substring); + auto* concat = ref.Ref(ref.NeedsUnref(Concat(concat1, concat2))); + + CordzStatistics expected; + expected.size = concat->length; + expected.estimated_memory_usage = SizeOf(concat) * 3 + SizeOf(flat1) + + SizeOf(flat2) + SizeOf(flat3) + + SizeOf(external) + SizeOf(substring); + expected.estimated_fair_share_memory_usage = FairShare(concat); + expected.node_count = 8; + expected.node_counts.flat = 3; + expected.node_counts.flat_64 = 1; + expected.node_counts.flat_128 = 1; + expected.node_counts.external = 1; + expected.node_counts.concat = 3; + expected.node_counts.substring = 1; + + EXPECT_THAT(SampleCord(concat), EqStatistics(expected)); +} + +TEST(CordzInfoStatisticsTest, Ring) { + RefHelper ref; + auto* flat1 = Flat(240); + auto* flat2 = Flat(2000); + auto* flat3 = Flat(70); + auto* external = External(3000); + CordRepRing* ring = CordRepRing::Create(flat1); + ring = CordRepRing::Append(ring, flat2); + ring = CordRepRing::Append(ring, flat3); + ring = ref.NeedsUnref(CordRepRing::Append(ring, external)); + + CordzStatistics expected; + expected.size = ring->length; + expected.estimated_memory_usage = SizeOf(ring) + SizeOf(flat1) + + SizeOf(flat2) + SizeOf(flat3) + + SizeOf(external); + expected.estimated_fair_share_memory_usage = expected.estimated_memory_usage; + expected.node_count = 5; + expected.node_counts.flat = 3; + expected.node_counts.flat_128 = 1; + expected.node_counts.flat_256 = 1; + expected.node_counts.external = 1; + expected.node_counts.ring = 1; + + EXPECT_THAT(SampleCord(ring), EqStatistics(expected)); +} + +TEST(CordzInfoStatisticsTest, SharedSubstringRing) { + RefHelper ref; + auto* flat1 = ref.Ref(Flat(240)); + auto* flat2 = Flat(200); + auto* flat3 = Flat(70); + auto* external = ref.Ref(External(3000), 5); + CordRepRing* ring = CordRepRing::Create(flat1); + ring = CordRepRing::Append(ring, flat2); + ring = CordRepRing::Append(ring, flat3); + ring = ref.Ref(CordRepRing::Append(ring, external), 4); + auto* substring = ref.Ref(ref.NeedsUnref(Substring(ring))); + + + CordzStatistics expected; + expected.size = substring->length; + expected.estimated_memory_usage = SizeOf(ring) + SizeOf(flat1) + + SizeOf(flat2) + SizeOf(flat3) + + SizeOf(external) + SizeOf(substring); + expected.estimated_fair_share_memory_usage = FairShare(substring); + expected.node_count = 6; + expected.node_counts.flat = 3; + expected.node_counts.flat_128 = 1; + expected.node_counts.flat_256 = 2; + expected.node_counts.external = 1; + expected.node_counts.ring = 1; + expected.node_counts.substring = 1; + + EXPECT_THAT(SampleCord(substring), EqStatistics(expected)); +} + +TEST(CordzInfoStatisticsTest, BtreeLeaf) { + ASSERT_THAT(CordRepBtree::kMaxCapacity, Ge(3)); + RefHelper ref; + auto* flat1 = Flat(2000); + auto* flat2 = Flat(200); + auto* substr = Substring(flat2); + auto* external = External(3000); + + CordRepBtree* tree = CordRepBtree::Create(flat1); + tree = CordRepBtree::Append(tree, substr); + tree = CordRepBtree::Append(tree, external); + size_t flat3_count = CordRepBtree::kMaxCapacity - 3; + size_t flat3_size = 0; + for (size_t i = 0; i < flat3_count; ++i) { + auto* flat3 = Flat(70); + flat3_size += SizeOf(flat3); + tree = CordRepBtree::Append(tree, flat3); + } + ref.NeedsUnref(tree); + + CordzStatistics expected; + expected.size = tree->length; + expected.estimated_memory_usage = SizeOf(tree) + SizeOf(flat1) + + SizeOf(flat2) + SizeOf(substr) + + flat3_size + SizeOf(external); + expected.estimated_fair_share_memory_usage = expected.estimated_memory_usage; + expected.node_count = 1 + 3 + 1 + flat3_count; + expected.node_counts.flat = 2 + flat3_count; + expected.node_counts.flat_128 = flat3_count; + expected.node_counts.flat_256 = 1; + expected.node_counts.external = 1; + expected.node_counts.substring = 1; + expected.node_counts.btree = 1; + + EXPECT_THAT(SampleCord(tree), EqStatistics(expected)); +} + +TEST(CordzInfoStatisticsTest, BtreeNodeShared) { + RefHelper ref; + static constexpr int leaf_count = 3; + const size_t flat3_count = CordRepBtree::kMaxCapacity - 3; + ASSERT_THAT(flat3_count, Ge(0)); + + CordRepBtree* tree = nullptr; + size_t mem_size = 0; + for (int i = 0; i < leaf_count; ++i) { + auto* flat1 = ref.Ref(Flat(2000), 9); + mem_size += SizeOf(flat1); + if (i == 0) { + tree = CordRepBtree::Create(flat1); + } else { + tree = CordRepBtree::Append(tree, flat1); + } + + auto* flat2 = Flat(200); + auto* substr = Substring(flat2); + mem_size += SizeOf(flat2) + SizeOf(substr); + tree = CordRepBtree::Append(tree, substr); + + auto* external = External(30); + mem_size += SizeOf(external); + tree = CordRepBtree::Append(tree, external); + + for (size_t i = 0; i < flat3_count; ++i) { + auto* flat3 = Flat(70); + mem_size += SizeOf(flat3); + tree = CordRepBtree::Append(tree, flat3); + } + + if (i == 0) { + mem_size += SizeOf(tree); + } else { + mem_size += SizeOf(tree->Edges().back()->btree()); + } + } + ref.NeedsUnref(tree); + + // Ref count: 2 for top (add 1), 5 for leaf 0 (add 4). + ref.Ref(tree, 1); + ref.Ref(tree->Edges().front(), 4); + + CordzStatistics expected; + expected.size = tree->length; + expected.estimated_memory_usage = SizeOf(tree) + mem_size; + expected.estimated_fair_share_memory_usage = FairShare(tree); + + expected.node_count = 1 + leaf_count * (1 + 3 + 1 + flat3_count); + expected.node_counts.flat = leaf_count * (2 + flat3_count); + expected.node_counts.flat_128 = leaf_count * flat3_count; + expected.node_counts.flat_256 = leaf_count; + expected.node_counts.external = leaf_count; + expected.node_counts.substring = leaf_count; + expected.node_counts.btree = 1 + leaf_count; + + EXPECT_THAT(SampleCord(tree), EqStatistics(expected)); +} + +TEST(CordzInfoStatisticsTest, ThreadSafety) { + Notification stop; + static constexpr int kNumThreads = 8; + int64_t sampled_node_count = 0; + + { + absl::synchronization_internal::ThreadPool pool(kNumThreads); + + // Run analyzer thread emulating a CordzHandler collection. + pool.Schedule([&]() { + while (!stop.HasBeenNotified()) { + // Run every 10us (about 100K total collections). + absl::SleepFor(absl::Microseconds(10)); + CordzSampleToken token; + for (const CordzInfo& cord_info : token) { + CordzStatistics stats = cord_info.GetCordzStatistics(); + sampled_node_count += stats.node_count; + } + } + }); + + // Run 'application threads' + for (int i = 0; i < kNumThreads; ++i) { + pool.Schedule([&]() { + // Track 0 - 2 cordz infos at a time, providing permutations of 0, 1 + // and 2 CordzHandle and CordzInfo queues being active, with plenty of + // 'empty to non empty' transitions. + InlineData cords[2]; + std::minstd_rand gen; + std::uniform_int_distribution<int> coin_toss(0, 1); + + while (!stop.HasBeenNotified()) { + for (InlineData& cord : cords) { + // 50/50 flip the state of the cord + if (coin_toss(gen) != 0) { + if (cord.is_tree()) { + // 50/50 simulate delete (untrack) or 'edit to empty' + if (coin_toss(gen) != 0) { + CordzInfo::MaybeUntrackCord(cord.cordz_info()); + } else { + CordzUpdateScope scope(cord.cordz_info(), + CordzUpdateTracker::kUnknown); + scope.SetCordRep(nullptr); + } + CordRep::Unref(cord.as_tree()); + cord.set_inline_size(0); + } else { + // Coin toss to 25% ring, 25% btree, and 50% flat. + CordRep* rep = Flat(256); + if (coin_toss(gen) != 0) { + if (coin_toss(gen) != 0) { + rep = CordRepRing::Create(rep); + } else { + rep = CordRepBtree::Create(rep); + } + } + cord.make_tree(rep); + + // 50/50 sample + if (coin_toss(gen) != 0) { + CordzInfo::TrackCord(cord, CordzUpdateTracker::kUnknown); + } + } + } + } + } + for (InlineData& cord : cords) { + if (cord.is_tree()) { + CordzInfo::MaybeUntrackCord(cord.cordz_info()); + CordRep::Unref(cord.as_tree()); + } + } + }); + } + + // Run for 1 second to give memory and thread safety analyzers plenty of + // time to detect any mishaps or undefined behaviors. + absl::SleepFor(absl::Seconds(1)); + stop.Notify(); + } + + std::cout << "Sampled " << sampled_node_count << " nodes\n"; +} + +} // namespace +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil-cpp/absl/strings/internal/cordz_info_test.cc b/third_party/abseil-cpp/absl/strings/internal/cordz_info_test.cc new file mode 100644 index 0000000000..b98343ae79 --- /dev/null +++ b/third_party/abseil-cpp/absl/strings/internal/cordz_info_test.cc @@ -0,0 +1,341 @@ +// Copyright 2019 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/cordz_info.h" + +#include <vector> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/base/config.h" +#include "absl/debugging/stacktrace.h" +#include "absl/debugging/symbolize.h" +#include "absl/strings/cordz_test_helpers.h" +#include "absl/strings/internal/cord_rep_flat.h" +#include "absl/strings/internal/cordz_handle.h" +#include "absl/strings/internal/cordz_statistics.h" +#include "absl/strings/internal/cordz_update_tracker.h" +#include "absl/strings/str_cat.h" +#include "absl/types/span.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { +namespace { + +using ::testing::ElementsAre; +using ::testing::Eq; +using ::testing::HasSubstr; +using ::testing::Ne; +using ::testing::SizeIs; + +// Used test values +auto constexpr kUnknownMethod = CordzUpdateTracker::kUnknown; +auto constexpr kTrackCordMethod = CordzUpdateTracker::kConstructorString; +auto constexpr kChildMethod = CordzUpdateTracker::kConstructorCord; +auto constexpr kUpdateMethod = CordzUpdateTracker::kAppendString; + +// Local less verbose helper +std::vector<const CordzHandle*> DeleteQueue() { + return CordzHandle::DiagnosticsGetDeleteQueue(); +} + +std::string FormatStack(absl::Span<void* const> raw_stack) { + static constexpr size_t buf_size = 1 << 14; + std::unique_ptr<char[]> buf(new char[buf_size]); + std::string output; + for (void* stackp : raw_stack) { + if (absl::Symbolize(stackp, buf.get(), buf_size)) { + absl::StrAppend(&output, " ", buf.get(), "\n"); + } + } + return output; +} + +TEST(CordzInfoTest, TrackCord) { + TestCordData data; + CordzInfo::TrackCord(data.data, kTrackCordMethod); + CordzInfo* info = data.data.cordz_info(); + ASSERT_THAT(info, Ne(nullptr)); + EXPECT_FALSE(info->is_snapshot()); + EXPECT_THAT(CordzInfo::Head(CordzSnapshot()), Eq(info)); + EXPECT_THAT(info->GetCordRepForTesting(), Eq(data.rep.rep)); + info->Untrack(); +} + +TEST(CordzInfoTest, MaybeTrackChildCordWithoutSampling) { + CordzSamplingIntervalHelper sample_none(99999); + TestCordData parent, child; + CordzInfo::MaybeTrackCord(child.data, parent.data, kTrackCordMethod); + EXPECT_THAT(child.data.cordz_info(), Eq(nullptr)); +} + +TEST(CordzInfoTest, MaybeTrackChildCordWithSampling) { + CordzSamplingIntervalHelper sample_all(1); + TestCordData parent, child; + CordzInfo::MaybeTrackCord(child.data, parent.data, kTrackCordMethod); + EXPECT_THAT(child.data.cordz_info(), Eq(nullptr)); +} + +TEST(CordzInfoTest, MaybeTrackChildCordWithoutSamplingParentSampled) { + CordzSamplingIntervalHelper sample_none(99999); + TestCordData parent, child; + CordzInfo::TrackCord(parent.data, kTrackCordMethod); + CordzInfo::MaybeTrackCord(child.data, parent.data, kTrackCordMethod); + CordzInfo* parent_info = parent.data.cordz_info(); + CordzInfo* child_info = child.data.cordz_info(); + ASSERT_THAT(child_info, Ne(nullptr)); + EXPECT_THAT(child_info->GetCordRepForTesting(), Eq(child.rep.rep)); + EXPECT_THAT(child_info->GetParentStack(), parent_info->GetStack()); + parent_info->Untrack(); + child_info->Untrack(); +} + +TEST(CordzInfoTest, MaybeTrackChildCordWithoutSamplingChildSampled) { + CordzSamplingIntervalHelper sample_none(99999); + TestCordData parent, child; + CordzInfo::TrackCord(child.data, kTrackCordMethod); + CordzInfo::MaybeTrackCord(child.data, parent.data, kTrackCordMethod); + EXPECT_THAT(child.data.cordz_info(), Eq(nullptr)); +} + +TEST(CordzInfoTest, MaybeTrackChildCordWithSamplingChildSampled) { + CordzSamplingIntervalHelper sample_all(1); + TestCordData parent, child; + CordzInfo::TrackCord(child.data, kTrackCordMethod); + CordzInfo::MaybeTrackCord(child.data, parent.data, kTrackCordMethod); + EXPECT_THAT(child.data.cordz_info(), Eq(nullptr)); +} + +TEST(CordzInfoTest, UntrackCord) { + TestCordData data; + CordzInfo::TrackCord(data.data, kTrackCordMethod); + CordzInfo* info = data.data.cordz_info(); + + info->Untrack(); + EXPECT_THAT(DeleteQueue(), SizeIs(0)); +} + +TEST(CordzInfoTest, UntrackCordWithSnapshot) { + TestCordData data; + CordzInfo::TrackCord(data.data, kTrackCordMethod); + CordzInfo* info = data.data.cordz_info(); + + CordzSnapshot snapshot; + info->Untrack(); + EXPECT_THAT(CordzInfo::Head(CordzSnapshot()), Eq(nullptr)); + EXPECT_THAT(info->GetCordRepForTesting(), Eq(data.rep.rep)); + EXPECT_THAT(DeleteQueue(), ElementsAre(info, &snapshot)); +} + +TEST(CordzInfoTest, SetCordRep) { + TestCordData data; + CordzInfo::TrackCord(data.data, kTrackCordMethod); + CordzInfo* info = data.data.cordz_info(); + + TestCordRep rep; + info->Lock(CordzUpdateTracker::kAppendCord); + info->SetCordRep(rep.rep); + info->Unlock(); + EXPECT_THAT(info->GetCordRepForTesting(), Eq(rep.rep)); + + info->Untrack(); +} + +TEST(CordzInfoTest, SetCordRepNullUntracksCordOnUnlock) { + TestCordData data; + CordzInfo::TrackCord(data.data, kTrackCordMethod); + CordzInfo* info = data.data.cordz_info(); + + info->Lock(CordzUpdateTracker::kAppendString); + info->SetCordRep(nullptr); + EXPECT_THAT(info->GetCordRepForTesting(), Eq(nullptr)); + EXPECT_THAT(CordzInfo::Head(CordzSnapshot()), Eq(info)); + + info->Unlock(); + EXPECT_THAT(CordzInfo::Head(CordzSnapshot()), Eq(nullptr)); +} + +TEST(CordzInfoTest, RefCordRep) { + TestCordData data; + CordzInfo::TrackCord(data.data, kTrackCordMethod); + CordzInfo* info = data.data.cordz_info(); + + size_t refcount = data.rep.rep->refcount.Get(); + EXPECT_THAT(info->RefCordRep(), Eq(data.rep.rep)); + EXPECT_THAT(data.rep.rep->refcount.Get(), Eq(refcount + 1)); + CordRep::Unref(data.rep.rep); + info->Untrack(); +} + +#if GTEST_HAS_DEATH_TEST + +TEST(CordzInfoTest, SetCordRepRequiresMutex) { + TestCordData data; + CordzInfo::TrackCord(data.data, kTrackCordMethod); + CordzInfo* info = data.data.cordz_info(); + TestCordRep rep; + EXPECT_DEBUG_DEATH(info->SetCordRep(rep.rep), ".*"); + info->Untrack(); +} + +#endif // GTEST_HAS_DEATH_TEST + +TEST(CordzInfoTest, TrackUntrackHeadFirstV2) { + CordzSnapshot snapshot; + EXPECT_THAT(CordzInfo::Head(snapshot), Eq(nullptr)); + + TestCordData data; + CordzInfo::TrackCord(data.data, kTrackCordMethod); + CordzInfo* info1 = data.data.cordz_info(); + ASSERT_THAT(CordzInfo::Head(snapshot), Eq(info1)); + EXPECT_THAT(info1->Next(snapshot), Eq(nullptr)); + + TestCordData data2; + CordzInfo::TrackCord(data2.data, kTrackCordMethod); + CordzInfo* info2 = data2.data.cordz_info(); + ASSERT_THAT(CordzInfo::Head(snapshot), Eq(info2)); + EXPECT_THAT(info2->Next(snapshot), Eq(info1)); + EXPECT_THAT(info1->Next(snapshot), Eq(nullptr)); + + info2->Untrack(); + ASSERT_THAT(CordzInfo::Head(snapshot), Eq(info1)); + EXPECT_THAT(info1->Next(snapshot), Eq(nullptr)); + + info1->Untrack(); + ASSERT_THAT(CordzInfo::Head(snapshot), Eq(nullptr)); +} + +TEST(CordzInfoTest, TrackUntrackTailFirstV2) { + CordzSnapshot snapshot; + EXPECT_THAT(CordzInfo::Head(snapshot), Eq(nullptr)); + + TestCordData data; + CordzInfo::TrackCord(data.data, kTrackCordMethod); + CordzInfo* info1 = data.data.cordz_info(); + ASSERT_THAT(CordzInfo::Head(snapshot), Eq(info1)); + EXPECT_THAT(info1->Next(snapshot), Eq(nullptr)); + + TestCordData data2; + CordzInfo::TrackCord(data2.data, kTrackCordMethod); + CordzInfo* info2 = data2.data.cordz_info(); + ASSERT_THAT(CordzInfo::Head(snapshot), Eq(info2)); + EXPECT_THAT(info2->Next(snapshot), Eq(info1)); + EXPECT_THAT(info1->Next(snapshot), Eq(nullptr)); + + info1->Untrack(); + ASSERT_THAT(CordzInfo::Head(snapshot), Eq(info2)); + EXPECT_THAT(info2->Next(snapshot), Eq(nullptr)); + + info2->Untrack(); + ASSERT_THAT(CordzInfo::Head(snapshot), Eq(nullptr)); +} + +TEST(CordzInfoTest, StackV2) { + TestCordData data; + // kMaxStackDepth is intentionally less than 64 (which is the max depth that + // Cordz will record) because if the actual stack depth is over 64 + // (which it is on Apple platforms) then the expected_stack will end up + // catching a few frames at the end that the actual_stack didn't get and + // it will no longer be subset. At the time of this writing 58 is the max + // that will allow this test to pass (with a minimum os version of iOS 9), so + // rounded down to 50 to hopefully not run into this in the future if Apple + // makes small modifications to its testing stack. 50 is sufficient to prove + // that we got a decent stack. + static constexpr int kMaxStackDepth = 50; + CordzInfo::TrackCord(data.data, kTrackCordMethod); + CordzInfo* info = data.data.cordz_info(); + std::vector<void*> local_stack; + local_stack.resize(kMaxStackDepth); + // In some environments we don't get stack traces. For example in Android + // absl::GetStackTrace will return 0 indicating it didn't find any stack. The + // resultant formatted stack will be "", but that still equals the stack + // recorded in CordzInfo, which is also empty. The skip_count is 1 so that the + // line number of the current stack isn't included in the HasSubstr check. + local_stack.resize(absl::GetStackTrace(local_stack.data(), kMaxStackDepth, + /*skip_count=*/1)); + + std::string got_stack = FormatStack(info->GetStack()); + std::string expected_stack = FormatStack(local_stack); + // If TrackCord is inlined, got_stack should match expected_stack. If it isn't + // inlined, got_stack should include an additional frame not present in + // expected_stack. Either way, expected_stack should be a substring of + // got_stack. + EXPECT_THAT(got_stack, HasSubstr(expected_stack)); + + info->Untrack(); +} + +// Local helper functions to get different stacks for child and parent. +CordzInfo* TrackChildCord(InlineData& data, const InlineData& parent) { + CordzInfo::TrackCord(data, parent, kChildMethod); + return data.cordz_info(); +} +CordzInfo* TrackParentCord(InlineData& data) { + CordzInfo::TrackCord(data, kTrackCordMethod); + return data.cordz_info(); +} + +TEST(CordzInfoTest, GetStatistics) { + TestCordData data; + CordzInfo* info = TrackParentCord(data.data); + + CordzStatistics statistics = info->GetCordzStatistics(); + EXPECT_THAT(statistics.size, Eq(data.rep.rep->length)); + EXPECT_THAT(statistics.method, Eq(kTrackCordMethod)); + EXPECT_THAT(statistics.parent_method, Eq(kUnknownMethod)); + EXPECT_THAT(statistics.update_tracker.Value(kTrackCordMethod), Eq(1)); + + info->Untrack(); +} + +TEST(CordzInfoTest, LockCountsMethod) { + TestCordData data; + CordzInfo* info = TrackParentCord(data.data); + + info->Lock(kUpdateMethod); + info->Unlock(); + info->Lock(kUpdateMethod); + info->Unlock(); + + CordzStatistics statistics = info->GetCordzStatistics(); + EXPECT_THAT(statistics.update_tracker.Value(kUpdateMethod), Eq(2)); + + info->Untrack(); +} + +TEST(CordzInfoTest, FromParent) { + TestCordData parent; + TestCordData child; + CordzInfo* info_parent = TrackParentCord(parent.data); + CordzInfo* info_child = TrackChildCord(child.data, parent.data); + + std::string stack = FormatStack(info_parent->GetStack()); + std::string parent_stack = FormatStack(info_child->GetParentStack()); + EXPECT_THAT(stack, Eq(parent_stack)); + + CordzStatistics statistics = info_child->GetCordzStatistics(); + EXPECT_THAT(statistics.size, Eq(child.rep.rep->length)); + EXPECT_THAT(statistics.method, Eq(kChildMethod)); + EXPECT_THAT(statistics.parent_method, Eq(kTrackCordMethod)); + EXPECT_THAT(statistics.update_tracker.Value(kChildMethod), Eq(1)); + + info_parent->Untrack(); + info_child->Untrack(); +} + +} // namespace +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil-cpp/absl/strings/internal/cordz_sample_token.cc b/third_party/abseil-cpp/absl/strings/internal/cordz_sample_token.cc new file mode 100644 index 0000000000..ba1270d8f0 --- /dev/null +++ b/third_party/abseil-cpp/absl/strings/internal/cordz_sample_token.cc @@ -0,0 +1,64 @@ +// Copyright 2019 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/cordz_sample_token.h" + +#include "absl/base/config.h" +#include "absl/strings/internal/cordz_handle.h" +#include "absl/strings/internal/cordz_info.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +CordzSampleToken::Iterator& CordzSampleToken::Iterator::operator++() { + if (current_) { + current_ = current_->Next(*token_); + } + return *this; +} + +CordzSampleToken::Iterator CordzSampleToken::Iterator::operator++(int) { + Iterator it(*this); + operator++(); + return it; +} + +bool operator==(const CordzSampleToken::Iterator& lhs, + const CordzSampleToken::Iterator& rhs) { + return lhs.current_ == rhs.current_ && + (lhs.current_ == nullptr || lhs.token_ == rhs.token_); +} + +bool operator!=(const CordzSampleToken::Iterator& lhs, + const CordzSampleToken::Iterator& rhs) { + return !(lhs == rhs); +} + +CordzSampleToken::Iterator::reference CordzSampleToken::Iterator::operator*() + const { + return *current_; +} + +CordzSampleToken::Iterator::pointer CordzSampleToken::Iterator::operator->() + const { + return current_; +} + +CordzSampleToken::Iterator::Iterator(const CordzSampleToken* token) + : token_(token), current_(CordzInfo::Head(*token)) {} + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil-cpp/absl/strings/internal/cordz_sample_token.h b/third_party/abseil-cpp/absl/strings/internal/cordz_sample_token.h new file mode 100644 index 0000000000..28a1d70ccc --- /dev/null +++ b/third_party/abseil-cpp/absl/strings/internal/cordz_sample_token.h @@ -0,0 +1,97 @@ +// Copyright 2019 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/base/config.h" +#include "absl/strings/internal/cordz_handle.h" +#include "absl/strings/internal/cordz_info.h" + +#ifndef ABSL_STRINGS_CORDZ_SAMPLE_TOKEN_H_ +#define ABSL_STRINGS_CORDZ_SAMPLE_TOKEN_H_ + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +// The existence of a CordzSampleToken guarantees that a reader can traverse the +// global_cordz_infos_head linked-list without needing to hold a mutex. When a +// CordzSampleToken exists, all CordzInfo objects that would be destroyed are +// instead appended to a deletion queue. When the CordzSampleToken is destroyed, +// it will also clean up any of these CordzInfo objects. +// +// E.g., ST are CordzSampleToken objects and CH are CordzHandle objects. +// ST1 <- CH1 <- CH2 <- ST2 <- CH3 <- global_delete_queue_tail +// +// This list tracks that CH1 and CH2 were created after ST1, so the thread +// holding ST1 might have a referece to CH1, CH2, ST2, and CH3. However, ST2 was +// created later, so the thread holding the ST2 token cannot have a reference to +// ST1, CH1, or CH2. If ST1 is cleaned up first, that thread will delete ST1, +// CH1, and CH2. If instead ST2 is cleaned up first, that thread will only +// delete ST2. +// +// If ST1 is cleaned up first, the new list will be: +// ST2 <- CH3 <- global_delete_queue_tail +// +// If ST2 is cleaned up first, the new list will be: +// ST1 <- CH1 <- CH2 <- CH3 <- global_delete_queue_tail +// +// All new CordzHandle objects are appended to the list, so if a new thread +// comes along before either ST1 or ST2 are cleaned up, the new list will be: +// ST1 <- CH1 <- CH2 <- ST2 <- CH3 <- ST3 <- global_delete_queue_tail +// +// A thread must hold the global_delete_queue_mu mutex whenever it's altering +// this list. +// +// It is safe for thread that holds a CordzSampleToken to read +// global_cordz_infos at any time since the objects it is able to retrieve will +// not be deleted while the CordzSampleToken exists. +class CordzSampleToken : public CordzSnapshot { + public: + class Iterator { + public: + using iterator_category = std::input_iterator_tag; + using value_type = const CordzInfo&; + using difference_type = ptrdiff_t; + using pointer = const CordzInfo*; + using reference = value_type; + + Iterator() = default; + + Iterator& operator++(); + Iterator operator++(int); + friend bool operator==(const Iterator& lhs, const Iterator& rhs); + friend bool operator!=(const Iterator& lhs, const Iterator& rhs); + reference operator*() const; + pointer operator->() const; + + private: + friend class CordzSampleToken; + explicit Iterator(const CordzSampleToken* token); + + const CordzSampleToken* token_ = nullptr; + pointer current_ = nullptr; + }; + + CordzSampleToken() = default; + CordzSampleToken(const CordzSampleToken&) = delete; + CordzSampleToken& operator=(const CordzSampleToken&) = delete; + + Iterator begin() { return Iterator(this); } + Iterator end() { return Iterator(); } +}; + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_CORDZ_SAMPLE_TOKEN_H_ diff --git a/third_party/abseil-cpp/absl/strings/internal/cordz_sample_token_test.cc b/third_party/abseil-cpp/absl/strings/internal/cordz_sample_token_test.cc new file mode 100644 index 0000000000..9f54301d68 --- /dev/null +++ b/third_party/abseil-cpp/absl/strings/internal/cordz_sample_token_test.cc @@ -0,0 +1,208 @@ +// Copyright 2019 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/cordz_sample_token.h" + +#include <memory> +#include <type_traits> +#include <vector> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/memory/memory.h" +#include "absl/random/random.h" +#include "absl/strings/cordz_test_helpers.h" +#include "absl/strings/internal/cord_rep_flat.h" +#include "absl/strings/internal/cordz_handle.h" +#include "absl/strings/internal/cordz_info.h" +#include "absl/synchronization/internal/thread_pool.h" +#include "absl/synchronization/notification.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { +namespace { + +using ::testing::ElementsAre; +using ::testing::Eq; +using ::testing::Ne; + +// Used test values +auto constexpr kTrackCordMethod = CordzUpdateTracker::kConstructorString; + +TEST(CordzSampleTokenTest, IteratorTraits) { + static_assert(std::is_copy_constructible<CordzSampleToken::Iterator>::value, + ""); + static_assert(std::is_copy_assignable<CordzSampleToken::Iterator>::value, ""); + static_assert(std::is_move_constructible<CordzSampleToken::Iterator>::value, + ""); + static_assert(std::is_move_assignable<CordzSampleToken::Iterator>::value, ""); + static_assert( + std::is_same< + std::iterator_traits<CordzSampleToken::Iterator>::iterator_category, + std::input_iterator_tag>::value, + ""); + static_assert( + std::is_same<std::iterator_traits<CordzSampleToken::Iterator>::value_type, + const CordzInfo&>::value, + ""); + static_assert( + std::is_same< + std::iterator_traits<CordzSampleToken::Iterator>::difference_type, + ptrdiff_t>::value, + ""); + static_assert( + std::is_same<std::iterator_traits<CordzSampleToken::Iterator>::pointer, + const CordzInfo*>::value, + ""); + static_assert( + std::is_same<std::iterator_traits<CordzSampleToken::Iterator>::reference, + const CordzInfo&>::value, + ""); +} + +TEST(CordzSampleTokenTest, IteratorEmpty) { + CordzSampleToken token; + EXPECT_THAT(token.begin(), Eq(token.end())); +} + +TEST(CordzSampleTokenTest, Iterator) { + TestCordData cord1, cord2, cord3; + CordzInfo::TrackCord(cord1.data, kTrackCordMethod); + CordzInfo* info1 = cord1.data.cordz_info(); + CordzInfo::TrackCord(cord2.data, kTrackCordMethod); + CordzInfo* info2 = cord2.data.cordz_info(); + CordzInfo::TrackCord(cord3.data, kTrackCordMethod); + CordzInfo* info3 = cord3.data.cordz_info(); + + CordzSampleToken token; + std::vector<const CordzInfo*> found; + for (const CordzInfo& cord_info : token) { + found.push_back(&cord_info); + } + + EXPECT_THAT(found, ElementsAre(info3, info2, info1)); + + info1->Untrack(); + info2->Untrack(); + info3->Untrack(); +} + +TEST(CordzSampleTokenTest, IteratorEquality) { + TestCordData cord1; + TestCordData cord2; + TestCordData cord3; + CordzInfo::TrackCord(cord1.data, kTrackCordMethod); + CordzInfo* info1 = cord1.data.cordz_info(); + + CordzSampleToken token1; + // lhs starts with the CordzInfo corresponding to cord1 at the head. + CordzSampleToken::Iterator lhs = token1.begin(); + + CordzInfo::TrackCord(cord2.data, kTrackCordMethod); + CordzInfo* info2 = cord2.data.cordz_info(); + + CordzSampleToken token2; + // rhs starts with the CordzInfo corresponding to cord2 at the head. + CordzSampleToken::Iterator rhs = token2.begin(); + + CordzInfo::TrackCord(cord3.data, kTrackCordMethod); + CordzInfo* info3 = cord3.data.cordz_info(); + + // lhs is on cord1 while rhs is on cord2. + EXPECT_THAT(lhs, Ne(rhs)); + + rhs++; + // lhs and rhs are both on cord1, but they didn't come from the same + // CordzSampleToken. + EXPECT_THAT(lhs, Ne(rhs)); + + lhs++; + rhs++; + // Both lhs and rhs are done, so they are on nullptr. + EXPECT_THAT(lhs, Eq(rhs)); + + info1->Untrack(); + info2->Untrack(); + info3->Untrack(); +} + +TEST(CordzSampleTokenTest, MultiThreaded) { + Notification stop; + static constexpr int kNumThreads = 4; + static constexpr int kNumCords = 3; + static constexpr int kNumTokens = 3; + absl::synchronization_internal::ThreadPool pool(kNumThreads); + + for (int i = 0; i < kNumThreads; ++i) { + pool.Schedule([&stop]() { + absl::BitGen gen; + TestCordData cords[kNumCords]; + std::unique_ptr<CordzSampleToken> tokens[kNumTokens]; + + while (!stop.HasBeenNotified()) { + // Randomly perform one of five actions: + // 1) Untrack + // 2) Track + // 3) Iterate over Cords visible to a token. + // 4) Unsample + // 5) Sample + int index = absl::Uniform(gen, 0, kNumCords); + if (absl::Bernoulli(gen, 0.5)) { + TestCordData& cord = cords[index]; + // Track/untrack. + if (cord.data.is_profiled()) { + // 1) Untrack + cord.data.cordz_info()->Untrack(); + cord.data.clear_cordz_info();; + } else { + // 2) Track + CordzInfo::TrackCord(cord.data, kTrackCordMethod); + } + } else { + std::unique_ptr<CordzSampleToken>& token = tokens[index]; + if (token) { + if (absl::Bernoulli(gen, 0.5)) { + // 3) Iterate over Cords visible to a token. + for (const CordzInfo& info : *token) { + // This is trivial work to allow us to compile the loop. + EXPECT_THAT(info.Next(*token), Ne(&info)); + } + } else { + // 4) Unsample + token = nullptr; + } + } else { + // 5) Sample + token = absl::make_unique<CordzSampleToken>(); + } + } + } + for (TestCordData& cord : cords) { + CordzInfo::MaybeUntrackCord(cord.data.cordz_info()); + } + }); + } + // The threads will hammer away. Give it a little bit of time for tsan to + // spot errors. + absl::SleepFor(absl::Seconds(3)); + stop.Notify(); +} + +} // namespace +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil-cpp/absl/strings/internal/cordz_statistics.h b/third_party/abseil-cpp/absl/strings/internal/cordz_statistics.h new file mode 100644 index 0000000000..da4c7dbb8c --- /dev/null +++ b/third_party/abseil-cpp/absl/strings/internal/cordz_statistics.h @@ -0,0 +1,87 @@ +// Copyright 2019 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_CORDZ_STATISTICS_H_ +#define ABSL_STRINGS_INTERNAL_CORDZ_STATISTICS_H_ + +#include <cstdint> + +#include "absl/base/config.h" +#include "absl/strings/internal/cordz_update_tracker.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +// CordzStatistics captures some meta information about a Cord's shape. +struct CordzStatistics { + using MethodIdentifier = CordzUpdateTracker::MethodIdentifier; + + // Node counts information + struct NodeCounts { + size_t flat = 0; // #flats + size_t flat_64 = 0; // #flats up to 64 bytes + size_t flat_128 = 0; // #flats up to 128 bytes + size_t flat_256 = 0; // #flats up to 256 bytes + size_t flat_512 = 0; // #flats up to 512 bytes + size_t flat_1k = 0; // #flats up to 1K bytes + size_t external = 0; // #external reps + size_t substring = 0; // #substring reps + size_t concat = 0; // #concat reps + size_t ring = 0; // #ring buffer reps + size_t btree = 0; // #btree reps + }; + + // The size of the cord in bytes. This matches the result of Cord::size(). + int64_t size = 0; + + // The estimated memory used by the sampled cord. This value matches the + // value as reported by Cord::EstimatedMemoryUsage(). + // A value of 0 implies the property has not been recorded. + int64_t estimated_memory_usage = 0; + + // The effective memory used by the sampled cord, inversely weighted by the + // effective indegree of each allocated node. This is a representation of the + // fair share of memory usage that should be attributed to the sampled cord. + // This value is more useful for cases where one or more nodes are referenced + // by multiple Cord instances, and for cases where a Cord includes the same + // node multiple times (either directly or indirectly). + // A value of 0 implies the property has not been recorded. + int64_t estimated_fair_share_memory_usage = 0; + + // The total number of nodes referenced by this cord. + // For ring buffer Cords, this includes the 'ring buffer' node. + // For btree Cords, this includes all 'CordRepBtree' tree nodes as well as all + // the substring, flat and external nodes referenced by the tree. + // A value of 0 implies the property has not been recorded. + int64_t node_count = 0; + + // Detailed node counts per type + NodeCounts node_counts; + + // The cord method responsible for sampling the cord. + MethodIdentifier method = MethodIdentifier::kUnknown; + + // The cord method responsible for sampling the parent cord if applicable. + MethodIdentifier parent_method = MethodIdentifier::kUnknown; + + // Update tracker tracking invocation count per cord method. + CordzUpdateTracker update_tracker; +}; + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_CORDZ_STATISTICS_H_ diff --git a/third_party/abseil-cpp/absl/strings/internal/cordz_update_scope.h b/third_party/abseil-cpp/absl/strings/internal/cordz_update_scope.h new file mode 100644 index 0000000000..57ba75de93 --- /dev/null +++ b/third_party/abseil-cpp/absl/strings/internal/cordz_update_scope.h @@ -0,0 +1,71 @@ +// Copyright 2021 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_CORDZ_UPDATE_SCOPE_H_ +#define ABSL_STRINGS_INTERNAL_CORDZ_UPDATE_SCOPE_H_ + +#include "absl/base/config.h" +#include "absl/base/optimization.h" +#include "absl/base/thread_annotations.h" +#include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/cordz_info.h" +#include "absl/strings/internal/cordz_update_tracker.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +// CordzUpdateScope scopes an update to the provided CordzInfo. +// The class invokes `info->Lock(method)` and `info->Unlock()` to guard +// cordrep updates. This class does nothing if `info` is null. +// See also the 'Lock`, `Unlock` and `SetCordRep` methods in `CordzInfo`. +class ABSL_SCOPED_LOCKABLE CordzUpdateScope { + public: + CordzUpdateScope(CordzInfo* info, CordzUpdateTracker::MethodIdentifier method) + ABSL_EXCLUSIVE_LOCK_FUNCTION(info) + : info_(info) { + if (ABSL_PREDICT_FALSE(info_)) { + info->Lock(method); + } + } + + // CordzUpdateScope can not be copied or assigned to. + CordzUpdateScope(CordzUpdateScope&& rhs) = delete; + CordzUpdateScope(const CordzUpdateScope&) = delete; + CordzUpdateScope& operator=(CordzUpdateScope&& rhs) = delete; + CordzUpdateScope& operator=(const CordzUpdateScope&) = delete; + + ~CordzUpdateScope() ABSL_UNLOCK_FUNCTION() { + if (ABSL_PREDICT_FALSE(info_)) { + info_->Unlock(); + } + } + + void SetCordRep(CordRep* rep) const { + if (ABSL_PREDICT_FALSE(info_)) { + info_->SetCordRep(rep); + } + } + + CordzInfo* info() const { return info_; } + + private: + CordzInfo* info_; +}; + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_CORDZ_UPDATE_SCOPE_H_ diff --git a/third_party/abseil-cpp/absl/strings/internal/cordz_update_scope_test.cc b/third_party/abseil-cpp/absl/strings/internal/cordz_update_scope_test.cc new file mode 100644 index 0000000000..3d08c622d0 --- /dev/null +++ b/third_party/abseil-cpp/absl/strings/internal/cordz_update_scope_test.cc @@ -0,0 +1,49 @@ +// Copyright 2021 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/cordz_update_scope.h" + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/base/config.h" +#include "absl/strings/cordz_test_helpers.h" +#include "absl/strings/internal/cord_rep_flat.h" +#include "absl/strings/internal/cordz_info.h" +#include "absl/strings/internal/cordz_update_tracker.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +namespace { + +// Used test values +auto constexpr kTrackCordMethod = CordzUpdateTracker::kConstructorString; + +TEST(CordzUpdateScopeTest, ScopeNullptr) { + CordzUpdateScope scope(nullptr, kTrackCordMethod); +} + +TEST(CordzUpdateScopeTest, ScopeSampledCord) { + TestCordData cord; + CordzInfo::TrackCord(cord.data, kTrackCordMethod); + CordzUpdateScope scope(cord.data.cordz_info(), kTrackCordMethod); + cord.data.cordz_info()->SetCordRep(nullptr); +} + +} // namespace +ABSL_NAMESPACE_END +} // namespace cord_internal + +} // namespace absl diff --git a/third_party/abseil-cpp/absl/strings/internal/cordz_update_tracker.h b/third_party/abseil-cpp/absl/strings/internal/cordz_update_tracker.h new file mode 100644 index 0000000000..1f764486eb --- /dev/null +++ b/third_party/abseil-cpp/absl/strings/internal/cordz_update_tracker.h @@ -0,0 +1,121 @@ +// Copyright 2021 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_CORDZ_UPDATE_TRACKER_H_ +#define ABSL_STRINGS_INTERNAL_CORDZ_UPDATE_TRACKER_H_ + +#include <atomic> +#include <cstdint> + +#include "absl/base/config.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +// CordzUpdateTracker tracks counters for Cord update methods. +// +// The purpose of CordzUpdateTracker is to track the number of calls to methods +// updating Cord data for sampled cords. The class internally uses 'lossy' +// atomic operations: Cord is thread-compatible, so there is no need to +// synchronize updates. However, Cordz collection threads may call 'Value()' at +// any point, so the class needs to provide thread safe access. +// +// This class is thread-safe. But as per above comments, all non-const methods +// should be used single-threaded only: updates are thread-safe but lossy. +class CordzUpdateTracker { + public: + // Tracked update methods. + enum MethodIdentifier { + kUnknown, + kAppendBuffer, + kAppendCord, + kAppendExternalMemory, + kAppendString, + kAssignCord, + kAssignString, + kClear, + kConstructorCord, + kConstructorString, + kCordReader, + kFlatten, + kGetAppendRegion, + kMakeCordFromExternal, + kMoveAppendCord, + kMoveAssignCord, + kMovePrependCord, + kPrependBuffer, + kPrependCord, + kPrependString, + kRemovePrefix, + kRemoveSuffix, + kSubCord, + + // kNumMethods defines the number of entries: must be the last entry. + kNumMethods, + }; + + // Constructs a new instance. All counters are zero-initialized. + constexpr CordzUpdateTracker() noexcept : values_{} {} + + // Copy constructs a new instance. + CordzUpdateTracker(const CordzUpdateTracker& rhs) noexcept { *this = rhs; } + + // Assigns the provided value to this instance. + CordzUpdateTracker& operator=(const CordzUpdateTracker& rhs) noexcept { + for (int i = 0; i < kNumMethods; ++i) { + values_[i].store(rhs.values_[i].load(std::memory_order_relaxed), + std::memory_order_relaxed); + } + return *this; + } + + // Returns the value for the specified method. + int64_t Value(MethodIdentifier method) const { + return values_[method].load(std::memory_order_relaxed); + } + + // Increases the value for the specified method by `n` + void LossyAdd(MethodIdentifier method, int64_t n = 1) { + auto& value = values_[method]; + value.store(value.load(std::memory_order_relaxed) + n, + std::memory_order_relaxed); + } + + // Adds all the values from `src` to this instance + void LossyAdd(const CordzUpdateTracker& src) { + for (int i = 0; i < kNumMethods; ++i) { + MethodIdentifier method = static_cast<MethodIdentifier>(i); + if (int64_t value = src.Value(method)) { + LossyAdd(method, value); + } + } + } + + private: + // Until C++20 std::atomic is not constexpr default-constructible, so we need + // a wrapper for this class to be constexpr constructible. + class Counter : public std::atomic<int64_t> { + public: + constexpr Counter() noexcept : std::atomic<int64_t>(0) {} + }; + + Counter values_[kNumMethods]; +}; + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_CORDZ_UPDATE_TRACKER_H_ diff --git a/third_party/abseil-cpp/absl/strings/internal/cordz_update_tracker_test.cc b/third_party/abseil-cpp/absl/strings/internal/cordz_update_tracker_test.cc new file mode 100644 index 0000000000..2348a17585 --- /dev/null +++ b/third_party/abseil-cpp/absl/strings/internal/cordz_update_tracker_test.cc @@ -0,0 +1,145 @@ +// Copyright 2021 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/cordz_update_tracker.h" + +#include <array> +#include <thread> // NOLINT + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/base/attributes.h" +#include "absl/base/config.h" +#include "absl/synchronization/notification.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { +namespace { + +using ::testing::AnyOf; +using ::testing::Eq; + +using Method = CordzUpdateTracker::MethodIdentifier; +using Methods = std::array<Method, Method::kNumMethods>; + +// Returns an array of all methods defined in `MethodIdentifier` +Methods AllMethods() { + return Methods{Method::kUnknown, + Method::kAppendBuffer, + Method::kAppendCord, + Method::kAppendExternalMemory, + Method::kAppendString, + Method::kAssignCord, + Method::kAssignString, + Method::kClear, + Method::kConstructorCord, + Method::kConstructorString, + Method::kCordReader, + Method::kFlatten, + Method::kGetAppendRegion, + Method::kMakeCordFromExternal, + Method::kMoveAppendCord, + Method::kMoveAssignCord, + Method::kMovePrependCord, + Method::kPrependBuffer, + Method::kPrependCord, + Method::kPrependString, + Method::kRemovePrefix, + Method::kRemoveSuffix, + Method::kSubCord}; +} + +TEST(CordzUpdateTracker, IsConstExprAndInitializesToZero) { + constexpr CordzUpdateTracker tracker; + for (Method method : AllMethods()) { + ASSERT_THAT(tracker.Value(method), Eq(0)); + } +} + +TEST(CordzUpdateTracker, LossyAdd) { + int64_t n = 1; + CordzUpdateTracker tracker; + for (Method method : AllMethods()) { + tracker.LossyAdd(method, n); + EXPECT_THAT(tracker.Value(method), Eq(n)); + n += 2; + } +} + +TEST(CordzUpdateTracker, CopyConstructor) { + int64_t n = 1; + CordzUpdateTracker src; + for (Method method : AllMethods()) { + src.LossyAdd(method, n); + n += 2; + } + + n = 1; + CordzUpdateTracker tracker(src); + for (Method method : AllMethods()) { + EXPECT_THAT(tracker.Value(method), Eq(n)); + n += 2; + } +} + +TEST(CordzUpdateTracker, OperatorAssign) { + int64_t n = 1; + CordzUpdateTracker src; + CordzUpdateTracker tracker; + for (Method method : AllMethods()) { + src.LossyAdd(method, n); + n += 2; + } + + n = 1; + tracker = src; + for (Method method : AllMethods()) { + EXPECT_THAT(tracker.Value(method), Eq(n)); + n += 2; + } +} + +TEST(CordzUpdateTracker, ThreadSanitizedValueCheck) { + absl::Notification done; + CordzUpdateTracker tracker; + + std::thread reader([&done, &tracker] { + while (!done.HasBeenNotified()) { + int n = 1; + for (Method method : AllMethods()) { + EXPECT_THAT(tracker.Value(method), AnyOf(Eq(n), Eq(0))); + n += 2; + } + } + int n = 1; + for (Method method : AllMethods()) { + EXPECT_THAT(tracker.Value(method), Eq(n)); + n += 2; + } + }); + + int64_t n = 1; + for (Method method : AllMethods()) { + tracker.LossyAdd(method, n); + n += 2; + } + done.Notify(); + reader.join(); +} + +} // namespace +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil-cpp/absl/strings/internal/numbers_test_common.h b/third_party/abseil-cpp/absl/strings/internal/numbers_test_common.h index 1a1e50c4d8..eaa88a8897 100644 --- a/third_party/abseil-cpp/absl/strings/internal/numbers_test_common.h +++ b/third_party/abseil-cpp/absl/strings/internal/numbers_test_common.h @@ -170,7 +170,7 @@ inline const std::array<uint64_test_case, 34>& strtouint64_test_cases() { {"0x1234", true, 16, 0x1234}, - // Base-10 std::string version. + // Base-10 string version. {"1234", true, 0, 1234}, {nullptr, false, 0, 0}, }}; diff --git a/third_party/abseil-cpp/absl/strings/internal/resize_uninitialized.h b/third_party/abseil-cpp/absl/strings/internal/resize_uninitialized.h index e42628e394..49859dcc7d 100644 --- a/third_party/abseil-cpp/absl/strings/internal/resize_uninitialized.h +++ b/third_party/abseil-cpp/absl/strings/internal/resize_uninitialized.h @@ -17,6 +17,7 @@ #ifndef ABSL_STRINGS_INTERNAL_RESIZE_UNINITIALIZED_H_ #define ABSL_STRINGS_INTERNAL_RESIZE_UNINITIALIZED_H_ +#include <algorithm> #include <string> #include <type_traits> #include <utility> @@ -28,8 +29,9 @@ namespace absl { ABSL_NAMESPACE_BEGIN namespace strings_internal { -// Is a subclass of true_type or false_type, depending on whether or not -// T has a __resize_default_init member. +// In this type trait, we look for a __resize_default_init member function, and +// we use it if available, otherwise, we use resize. We provide HasMember to +// indicate whether __resize_default_init is present. template <typename string_type, typename = void> struct ResizeUninitializedTraits { using HasMember = std::false_type; @@ -66,6 +68,50 @@ inline void STLStringResizeUninitialized(string_type* s, size_t new_size) { ResizeUninitializedTraits<string_type>::Resize(s, new_size); } +// Used to ensure exponential growth so that the amortized complexity of +// increasing the string size by a small amount is O(1), in contrast to +// O(str->size()) in the case of precise growth. +template <typename string_type> +void STLStringReserveAmortized(string_type* s, size_t new_size) { + const size_t cap = s->capacity(); + if (new_size > cap) { + // Make sure to always grow by at least a factor of 2x. + s->reserve((std::max)(new_size, 2 * cap)); + } +} + +// In this type trait, we look for an __append_default_init member function, and +// we use it if available, otherwise, we use append. +template <typename string_type, typename = void> +struct AppendUninitializedTraits { + static void Append(string_type* s, size_t n) { + s->append(n, typename string_type::value_type()); + } +}; + +template <typename string_type> +struct AppendUninitializedTraits< + string_type, absl::void_t<decltype(std::declval<string_type&>() + .__append_default_init(237))> > { + static void Append(string_type* s, size_t n) { + s->__append_default_init(n); + } +}; + +// Like STLStringResizeUninitialized(str, new_size), except guaranteed to use +// exponential growth so that the amortized complexity of increasing the string +// size by a small amount is O(1), in contrast to O(str->size()) in the case of +// precise growth. +template <typename string_type> +void STLStringResizeUninitializedAmortized(string_type* s, size_t new_size) { + const size_t size = s->size(); + if (new_size > size) { + AppendUninitializedTraits<string_type>::Append(s, new_size - size); + } else { + s->erase(new_size); + } +} + } // namespace strings_internal ABSL_NAMESPACE_END } // namespace absl diff --git a/third_party/abseil-cpp/absl/strings/internal/resize_uninitialized_test.cc b/third_party/abseil-cpp/absl/strings/internal/resize_uninitialized_test.cc index 0f8b3c2a95..ad1b9c58f3 100644 --- a/third_party/abseil-cpp/absl/strings/internal/resize_uninitialized_test.cc +++ b/third_party/abseil-cpp/absl/strings/internal/resize_uninitialized_test.cc @@ -19,64 +19,115 @@ namespace { int resize_call_count = 0; +int append_call_count = 0; // A mock string class whose only purpose is to track how many times its -// resize() method has been called. +// resize()/append() methods have been called. struct resizable_string { + using value_type = char; size_t size() const { return 0; } + size_t capacity() const { return 0; } char& operator[](size_t) { static char c = '\0'; return c; } void resize(size_t) { resize_call_count += 1; } + void append(size_t, value_type) { append_call_count += 1; } + void reserve(size_t) {} + resizable_string& erase(size_t = 0, size_t = 0) { return *this; } }; int resize_default_init_call_count = 0; +int append_default_init_call_count = 0; // A mock string class whose only purpose is to track how many times its -// resize() and __resize_default_init() methods have been called. -struct resize_default_init_string { +// resize()/__resize_default_init()/append()/__append_default_init() methods +// have been called. +struct default_init_string { size_t size() const { return 0; } + size_t capacity() const { return 0; } char& operator[](size_t) { static char c = '\0'; return c; } void resize(size_t) { resize_call_count += 1; } void __resize_default_init(size_t) { resize_default_init_call_count += 1; } + void __append_default_init(size_t) { append_default_init_call_count += 1; } + void reserve(size_t) {} + default_init_string& erase(size_t = 0, size_t = 0) { return *this; } }; TEST(ResizeUninit, WithAndWithout) { resize_call_count = 0; + append_call_count = 0; resize_default_init_call_count = 0; + append_default_init_call_count = 0; { resizable_string rs; EXPECT_EQ(resize_call_count, 0); + EXPECT_EQ(append_call_count, 0); EXPECT_EQ(resize_default_init_call_count, 0); + EXPECT_EQ(append_default_init_call_count, 0); EXPECT_FALSE( absl::strings_internal::STLStringSupportsNontrashingResize(&rs)); EXPECT_EQ(resize_call_count, 0); + EXPECT_EQ(append_call_count, 0); EXPECT_EQ(resize_default_init_call_count, 0); + EXPECT_EQ(append_default_init_call_count, 0); absl::strings_internal::STLStringResizeUninitialized(&rs, 237); EXPECT_EQ(resize_call_count, 1); + EXPECT_EQ(append_call_count, 0); EXPECT_EQ(resize_default_init_call_count, 0); + EXPECT_EQ(append_default_init_call_count, 0); + absl::strings_internal::STLStringResizeUninitializedAmortized(&rs, 1000); + EXPECT_EQ(resize_call_count, 1); + EXPECT_EQ(append_call_count, 1); + EXPECT_EQ(resize_default_init_call_count, 0); + EXPECT_EQ(append_default_init_call_count, 0); } resize_call_count = 0; + append_call_count = 0; resize_default_init_call_count = 0; + append_default_init_call_count = 0; { - resize_default_init_string rus; + default_init_string rus; EXPECT_EQ(resize_call_count, 0); + EXPECT_EQ(append_call_count, 0); EXPECT_EQ(resize_default_init_call_count, 0); + EXPECT_EQ(append_default_init_call_count, 0); EXPECT_TRUE( absl::strings_internal::STLStringSupportsNontrashingResize(&rus)); EXPECT_EQ(resize_call_count, 0); + EXPECT_EQ(append_call_count, 0); EXPECT_EQ(resize_default_init_call_count, 0); + EXPECT_EQ(append_default_init_call_count, 0); absl::strings_internal::STLStringResizeUninitialized(&rus, 237); EXPECT_EQ(resize_call_count, 0); + EXPECT_EQ(append_call_count, 0); + EXPECT_EQ(resize_default_init_call_count, 1); + EXPECT_EQ(append_default_init_call_count, 0); + absl::strings_internal::STLStringResizeUninitializedAmortized(&rus, 1000); + EXPECT_EQ(resize_call_count, 0); + EXPECT_EQ(append_call_count, 0); EXPECT_EQ(resize_default_init_call_count, 1); + EXPECT_EQ(append_default_init_call_count, 1); + } +} + +TEST(ResizeUninit, Amortized) { + std::string str; + size_t prev_cap = str.capacity(); + int cap_increase_count = 0; + for (int i = 0; i < 1000; ++i) { + absl::strings_internal::STLStringResizeUninitializedAmortized(&str, i); + size_t new_cap = str.capacity(); + if (new_cap > prev_cap) ++cap_increase_count; + prev_cap = new_cap; } + EXPECT_LT(cap_increase_count, 50); } } // namespace diff --git a/third_party/abseil-cpp/absl/strings/internal/str_format/arg.cc b/third_party/abseil-cpp/absl/strings/internal/str_format/arg.cc index 4d0604e00c..e28a29b171 100644 --- a/third_party/abseil-cpp/absl/strings/internal/str_format/arg.cc +++ b/third_party/abseil-cpp/absl/strings/internal/str_format/arg.cc @@ -1,3 +1,17 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + // // POSIX spec: // http://pubs.opengroup.org/onlinepubs/009695399/functions/fprintf.html @@ -12,14 +26,13 @@ #include "absl/base/port.h" #include "absl/strings/internal/str_format/float_conversion.h" +#include "absl/strings/numbers.h" namespace absl { ABSL_NAMESPACE_BEGIN namespace str_format_internal { namespace { -const char kDigit[2][32] = { "0123456789abcdef", "0123456789ABCDEF" }; - // Reduce *capacity by s.size(), clipped to a 0 minimum. void ReducePadding(string_view s, size_t *capacity) { *capacity = Excess(s.size(), *capacity); @@ -48,125 +61,179 @@ struct IsSigned<absl::int128> : std::true_type {}; template <> struct IsSigned<absl::uint128> : std::false_type {}; -class ConvertedIntInfo { +// Integral digit printer. +// Call one of the PrintAs* routines after construction once. +// Use with_neg_and_zero/without_neg_or_zero/is_negative to access the results. +class IntDigits { public: + // Print the unsigned integer as octal. + // Supports unsigned integral types and uint128. template <typename T> - ConvertedIntInfo(T v, ConversionChar conv) { - using Unsigned = typename MakeUnsigned<T>::type; - auto u = static_cast<Unsigned>(v); - if (IsNeg(v)) { - is_neg_ = true; - u = Unsigned{} - u; - } else { - is_neg_ = false; + void PrintAsOct(T v) { + static_assert(!IsSigned<T>::value, ""); + char *p = storage_ + sizeof(storage_); + do { + *--p = static_cast<char>('0' + (static_cast<size_t>(v) & 7)); + v >>= 3; + } while (v); + start_ = p; + size_ = storage_ + sizeof(storage_) - p; + } + + // Print the signed or unsigned integer as decimal. + // Supports all integral types. + template <typename T> + void PrintAsDec(T v) { + static_assert(std::is_integral<T>::value, ""); + start_ = storage_; + size_ = numbers_internal::FastIntToBuffer(v, storage_) - storage_; + } + + void PrintAsDec(int128 v) { + auto u = static_cast<uint128>(v); + bool add_neg = false; + if (v < 0) { + add_neg = true; + u = uint128{} - u; } - UnsignedToStringRight(u, conv); + PrintAsDec(u, add_neg); } - string_view digits() const { - return {end() - size_, static_cast<size_t>(size_)}; + void PrintAsDec(uint128 v, bool add_neg = false) { + // This function can be sped up if needed. We can call FastIntToBuffer + // twice, or fix FastIntToBuffer to support uint128. + char *p = storage_ + sizeof(storage_); + do { + p -= 2; + numbers_internal::PutTwoDigits(static_cast<size_t>(v % 100), p); + v /= 100; + } while (v); + if (p[0] == '0') { + // We printed one too many hexits. + ++p; + } + if (add_neg) { + *--p = '-'; + } + size_ = storage_ + sizeof(storage_) - p; + start_ = p; } - bool is_neg() const { return is_neg_; } - private: - template <typename T, bool IsSigned> - struct IsNegImpl { - static bool Eval(T v) { return v < 0; } - }; + // Print the unsigned integer as hex using lowercase. + // Supports unsigned integral types and uint128. template <typename T> - struct IsNegImpl<T, false> { - static bool Eval(T) { - return false; + void PrintAsHexLower(T v) { + static_assert(!IsSigned<T>::value, ""); + char *p = storage_ + sizeof(storage_); + + do { + p -= 2; + constexpr const char* table = numbers_internal::kHexTable; + std::memcpy(p, table + 2 * (static_cast<size_t>(v) & 0xFF), 2); + if (sizeof(T) == 1) break; + v >>= 8; + } while (v); + if (p[0] == '0') { + // We printed one too many digits. + ++p; } - }; + start_ = p; + size_ = storage_ + sizeof(storage_) - p; + } + // Print the unsigned integer as hex using uppercase. + // Supports unsigned integral types and uint128. template <typename T> - bool IsNeg(T v) { - return IsNegImpl<T, IsSigned<T>::value>::Eval(v); + void PrintAsHexUpper(T v) { + static_assert(!IsSigned<T>::value, ""); + char *p = storage_ + sizeof(storage_); + + // kHexTable is only lowercase, so do it manually for uppercase. + do { + *--p = "0123456789ABCDEF"[static_cast<size_t>(v) & 15]; + v >>= 4; + } while (v); + start_ = p; + size_ = storage_ + sizeof(storage_) - p; } - template <typename T> - void UnsignedToStringRight(T u, ConversionChar conv) { - char *p = end(); - switch (FormatConversionCharRadix(conv)) { - default: - case 10: - for (; u; u /= 10) - *--p = static_cast<char>('0' + static_cast<size_t>(u % 10)); - break; - case 8: - for (; u; u /= 8) - *--p = static_cast<char>('0' + static_cast<size_t>(u % 8)); - break; - case 16: { - const char *digits = kDigit[FormatConversionCharIsUpper(conv) ? 1 : 0]; - for (; u; u /= 16) *--p = digits[static_cast<size_t>(u % 16)]; - break; - } - } - size_ = static_cast<int>(end() - p); + // The printed value including the '-' sign if available. + // For inputs of value `0`, this will return "0" + string_view with_neg_and_zero() const { return {start_, size_}; } + + // The printed value not including the '-' sign. + // For inputs of value `0`, this will return "". + string_view without_neg_or_zero() const { + static_assert('-' < '0', "The check below verifies both."); + size_t advance = start_[0] <= '0' ? 1 : 0; + return {start_ + advance, size_ - advance}; } - const char *end() const { return storage_ + sizeof(storage_); } - char *end() { return storage_ + sizeof(storage_); } + bool is_negative() const { return start_[0] == '-'; } - bool is_neg_; - int size_; - // Max size: 128 bit value as octal -> 43 digits - char storage_[128 / 3 + 1]; + private: + const char *start_; + size_t size_; + // Max size: 128 bit value as octal -> 43 digits, plus sign char + char storage_[128 / 3 + 1 + 1]; }; // Note: 'o' conversions do not have a base indicator, it's just that // the '#' flag is specified to modify the precision for 'o' conversions. -string_view BaseIndicator(const ConvertedIntInfo &info, - const ConversionSpec conv) { - bool alt = conv.flags().alt; - int radix = FormatConversionCharRadix(conv.conv()); - if (conv.conv() == ConversionChar::p) alt = true; // always show 0x for %p. +string_view BaseIndicator(const IntDigits &as_digits, + const FormatConversionSpecImpl conv) { + // always show 0x for %p. + bool alt = conv.has_alt_flag() || + conv.conversion_char() == FormatConversionCharInternal::p; + bool hex = (conv.conversion_char() == FormatConversionCharInternal::x || + conv.conversion_char() == FormatConversionCharInternal::X || + conv.conversion_char() == FormatConversionCharInternal::p); // From the POSIX description of '#' flag: // "For x or X conversion specifiers, a non-zero result shall have // 0x (or 0X) prefixed to it." - if (alt && radix == 16 && !info.digits().empty()) { - if (FormatConversionCharIsUpper(conv.conv())) return "0X"; - return "0x"; + if (alt && hex && !as_digits.without_neg_or_zero().empty()) { + return conv.conversion_char() == FormatConversionCharInternal::X ? "0X" + : "0x"; } return {}; } -string_view SignColumn(bool neg, const ConversionSpec conv) { - if (FormatConversionCharIsSigned(conv.conv())) { +string_view SignColumn(bool neg, const FormatConversionSpecImpl conv) { + if (conv.conversion_char() == FormatConversionCharInternal::d || + conv.conversion_char() == FormatConversionCharInternal::i) { if (neg) return "-"; - if (conv.flags().show_pos) return "+"; - if (conv.flags().sign_col) return " "; + if (conv.has_show_pos_flag()) return "+"; + if (conv.has_sign_col_flag()) return " "; } return {}; } -bool ConvertCharImpl(unsigned char v, const ConversionSpec conv, +bool ConvertCharImpl(unsigned char v, const FormatConversionSpecImpl conv, FormatSinkImpl *sink) { size_t fill = 0; if (conv.width() >= 0) fill = conv.width(); ReducePadding(1, &fill); - if (!conv.flags().left) sink->Append(fill, ' '); + if (!conv.has_left_flag()) sink->Append(fill, ' '); sink->Append(1, v); - if (conv.flags().left) sink->Append(fill, ' '); + if (conv.has_left_flag()) sink->Append(fill, ' '); return true; } -bool ConvertIntImplInner(const ConvertedIntInfo &info, - const ConversionSpec conv, FormatSinkImpl *sink) { +bool ConvertIntImplInnerSlow(const IntDigits &as_digits, + const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { // Print as a sequence of Substrings: // [left_spaces][sign][base_indicator][zeroes][formatted][right_spaces] size_t fill = 0; if (conv.width() >= 0) fill = conv.width(); - string_view formatted = info.digits(); + string_view formatted = as_digits.without_neg_or_zero(); ReducePadding(formatted, &fill); - string_view sign = SignColumn(info.is_neg(), conv); + string_view sign = SignColumn(as_digits.is_negative(), conv); ReducePadding(sign, &fill); - string_view base_indicator = BaseIndicator(info, conv); + string_view base_indicator = BaseIndicator(as_digits, conv); ReducePadding(base_indicator, &fill); int precision = conv.precision(); @@ -174,7 +241,8 @@ bool ConvertIntImplInner(const ConvertedIntInfo &info, if (!precision_specified) precision = 1; - if (conv.flags().alt && conv.conv() == ConversionChar::o) { + if (conv.has_alt_flag() && + conv.conversion_char() == FormatConversionCharInternal::o) { // From POSIX description of the '#' (alt) flag: // "For o conversion, it increases the precision (if necessary) to // force the first digit of the result to be zero." @@ -187,13 +255,13 @@ bool ConvertIntImplInner(const ConvertedIntInfo &info, size_t num_zeroes = Excess(formatted.size(), precision); ReducePadding(num_zeroes, &fill); - size_t num_left_spaces = !conv.flags().left ? fill : 0; - size_t num_right_spaces = conv.flags().left ? fill : 0; + size_t num_left_spaces = !conv.has_left_flag() ? fill : 0; + size_t num_right_spaces = conv.has_left_flag() ? fill : 0; // From POSIX description of the '0' (zero) flag: // "For d, i, o, u, x, and X conversion specifiers, if a precision // is specified, the '0' flag is ignored." - if (!precision_specified && conv.flags().zero) { + if (!precision_specified && conv.has_zero_flag()) { num_zeroes += num_left_spaces; num_left_spaces = 0; } @@ -208,71 +276,97 @@ bool ConvertIntImplInner(const ConvertedIntInfo &info, } template <typename T> -bool ConvertIntImplInner(T v, const ConversionSpec conv, FormatSinkImpl *sink) { - ConvertedIntInfo info(v, conv.conv()); - if (conv.flags().basic && (conv.conv() != ConversionChar::p)) { - if (info.is_neg()) sink->Append(1, '-'); - if (info.digits().empty()) { - sink->Append(1, '0'); - } else { - sink->Append(info.digits()); - } - return true; +bool ConvertIntArg(T v, const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { + using U = typename MakeUnsigned<T>::type; + IntDigits as_digits; + + // This odd casting is due to a bug in -Wswitch behavior in gcc49 which causes + // it to complain about a switch/case type mismatch, even though both are + // FormatConverionChar. Likely this is because at this point + // FormatConversionChar is declared, but not defined. + switch (static_cast<uint8_t>(conv.conversion_char())) { + case static_cast<uint8_t>(FormatConversionCharInternal::c): + return ConvertCharImpl(static_cast<unsigned char>(v), conv, sink); + + case static_cast<uint8_t>(FormatConversionCharInternal::o): + as_digits.PrintAsOct(static_cast<U>(v)); + break; + + case static_cast<uint8_t>(FormatConversionCharInternal::x): + as_digits.PrintAsHexLower(static_cast<U>(v)); + break; + case static_cast<uint8_t>(FormatConversionCharInternal::X): + as_digits.PrintAsHexUpper(static_cast<U>(v)); + break; + + case static_cast<uint8_t>(FormatConversionCharInternal::u): + as_digits.PrintAsDec(static_cast<U>(v)); + break; + + case static_cast<uint8_t>(FormatConversionCharInternal::d): + case static_cast<uint8_t>(FormatConversionCharInternal::i): + as_digits.PrintAsDec(v); + break; + + case static_cast<uint8_t>(FormatConversionCharInternal::a): + case static_cast<uint8_t>(FormatConversionCharInternal::e): + case static_cast<uint8_t>(FormatConversionCharInternal::f): + case static_cast<uint8_t>(FormatConversionCharInternal::g): + case static_cast<uint8_t>(FormatConversionCharInternal::A): + case static_cast<uint8_t>(FormatConversionCharInternal::E): + case static_cast<uint8_t>(FormatConversionCharInternal::F): + case static_cast<uint8_t>(FormatConversionCharInternal::G): + return ConvertFloatImpl(static_cast<double>(v), conv, sink); + + default: + ABSL_INTERNAL_ASSUME(false); } - return ConvertIntImplInner(info, conv, sink); -} -template <typename T> -bool ConvertIntArg(T v, const ConversionSpec conv, FormatSinkImpl *sink) { - if (FormatConversionCharIsFloat(conv.conv())) { - return FormatConvertImpl(static_cast<double>(v), conv, sink).value; - } - if (conv.conv() == ConversionChar::c) - return ConvertCharImpl(static_cast<unsigned char>(v), conv, sink); - if (!FormatConversionCharIsIntegral(conv.conv())) return false; - if (!FormatConversionCharIsSigned(conv.conv()) && IsSigned<T>::value) { - using U = typename MakeUnsigned<T>::type; - return FormatConvertImpl(static_cast<U>(v), conv, sink).value; + if (conv.is_basic()) { + sink->Append(as_digits.with_neg_and_zero()); + return true; } - return ConvertIntImplInner(v, conv, sink); + return ConvertIntImplInnerSlow(as_digits, conv, sink); } template <typename T> -bool ConvertFloatArg(T v, const ConversionSpec conv, FormatSinkImpl *sink) { - return FormatConversionCharIsFloat(conv.conv()) && +bool ConvertFloatArg(T v, const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { + return FormatConversionCharIsFloat(conv.conversion_char()) && ConvertFloatImpl(v, conv, sink); } -inline bool ConvertStringArg(string_view v, const ConversionSpec conv, +inline bool ConvertStringArg(string_view v, const FormatConversionSpecImpl conv, FormatSinkImpl *sink) { - if (conv.conv() != ConversionChar::s) return false; - if (conv.flags().basic) { + if (conv.is_basic()) { sink->Append(v); return true; } return sink->PutPaddedString(v, conv.width(), conv.precision(), - conv.flags().left); + conv.has_left_flag()); } } // namespace // ==================== Strings ==================== -ConvertResult<Conv::s> FormatConvertImpl(const std::string &v, - const ConversionSpec conv, - FormatSinkImpl *sink) { +StringConvertResult FormatConvertImpl(const std::string &v, + const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { return {ConvertStringArg(v, conv, sink)}; } -ConvertResult<Conv::s> FormatConvertImpl(string_view v, - const ConversionSpec conv, - FormatSinkImpl *sink) { +StringConvertResult FormatConvertImpl(string_view v, + const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { return {ConvertStringArg(v, conv, sink)}; } -ConvertResult<Conv::s | Conv::p> FormatConvertImpl(const char *v, - const ConversionSpec conv, - FormatSinkImpl *sink) { - if (conv.conv() == ConversionChar::p) +ArgConvertResult<FormatConversionCharSetUnion( + FormatConversionCharSetInternal::s, FormatConversionCharSetInternal::p)> +FormatConvertImpl(const char *v, const FormatConversionSpecImpl conv, + FormatSinkImpl *sink) { + if (conv.conversion_char() == FormatConversionCharInternal::p) return {FormatConvertImpl(VoidPtr(v), conv, sink).value}; size_t len; if (v == nullptr) { @@ -287,93 +381,99 @@ ConvertResult<Conv::s | Conv::p> FormatConvertImpl(const char *v, } // ==================== Raw pointers ==================== -ConvertResult<Conv::p> FormatConvertImpl(VoidPtr v, const ConversionSpec conv, - FormatSinkImpl *sink) { - if (conv.conv() != ConversionChar::p) return {false}; +ArgConvertResult<FormatConversionCharSetInternal::p> FormatConvertImpl( + VoidPtr v, const FormatConversionSpecImpl conv, FormatSinkImpl *sink) { if (!v.value) { sink->Append("(nil)"); return {true}; } - return {ConvertIntImplInner(v.value, conv, sink)}; + IntDigits as_digits; + as_digits.PrintAsHexLower(v.value); + return {ConvertIntImplInnerSlow(as_digits, conv, sink)}; } // ==================== Floats ==================== -FloatingConvertResult FormatConvertImpl(float v, const ConversionSpec conv, +FloatingConvertResult FormatConvertImpl(float v, + const FormatConversionSpecImpl conv, FormatSinkImpl *sink) { return {ConvertFloatArg(v, conv, sink)}; } -FloatingConvertResult FormatConvertImpl(double v, const ConversionSpec conv, +FloatingConvertResult FormatConvertImpl(double v, + const FormatConversionSpecImpl conv, FormatSinkImpl *sink) { return {ConvertFloatArg(v, conv, sink)}; } FloatingConvertResult FormatConvertImpl(long double v, - const ConversionSpec conv, + const FormatConversionSpecImpl conv, FormatSinkImpl *sink) { return {ConvertFloatArg(v, conv, sink)}; } // ==================== Chars ==================== -IntegralConvertResult FormatConvertImpl(char v, const ConversionSpec conv, +IntegralConvertResult FormatConvertImpl(char v, + const FormatConversionSpecImpl conv, FormatSinkImpl *sink) { return {ConvertIntArg(v, conv, sink)}; } IntegralConvertResult FormatConvertImpl(signed char v, - const ConversionSpec conv, + const FormatConversionSpecImpl conv, FormatSinkImpl *sink) { return {ConvertIntArg(v, conv, sink)}; } IntegralConvertResult FormatConvertImpl(unsigned char v, - const ConversionSpec conv, + const FormatConversionSpecImpl conv, FormatSinkImpl *sink) { return {ConvertIntArg(v, conv, sink)}; } // ==================== Ints ==================== IntegralConvertResult FormatConvertImpl(short v, // NOLINT - const ConversionSpec conv, + const FormatConversionSpecImpl conv, FormatSinkImpl *sink) { return {ConvertIntArg(v, conv, sink)}; } IntegralConvertResult FormatConvertImpl(unsigned short v, // NOLINT - const ConversionSpec conv, + const FormatConversionSpecImpl conv, FormatSinkImpl *sink) { return {ConvertIntArg(v, conv, sink)}; } -IntegralConvertResult FormatConvertImpl(int v, const ConversionSpec conv, +IntegralConvertResult FormatConvertImpl(int v, + const FormatConversionSpecImpl conv, FormatSinkImpl *sink) { return {ConvertIntArg(v, conv, sink)}; } -IntegralConvertResult FormatConvertImpl(unsigned v, const ConversionSpec conv, +IntegralConvertResult FormatConvertImpl(unsigned v, + const FormatConversionSpecImpl conv, FormatSinkImpl *sink) { return {ConvertIntArg(v, conv, sink)}; } IntegralConvertResult FormatConvertImpl(long v, // NOLINT - const ConversionSpec conv, + const FormatConversionSpecImpl conv, FormatSinkImpl *sink) { return {ConvertIntArg(v, conv, sink)}; } IntegralConvertResult FormatConvertImpl(unsigned long v, // NOLINT - const ConversionSpec conv, + const FormatConversionSpecImpl conv, FormatSinkImpl *sink) { return {ConvertIntArg(v, conv, sink)}; } IntegralConvertResult FormatConvertImpl(long long v, // NOLINT - const ConversionSpec conv, + const FormatConversionSpecImpl conv, FormatSinkImpl *sink) { return {ConvertIntArg(v, conv, sink)}; } IntegralConvertResult FormatConvertImpl(unsigned long long v, // NOLINT - const ConversionSpec conv, + const FormatConversionSpecImpl conv, FormatSinkImpl *sink) { return {ConvertIntArg(v, conv, sink)}; } IntegralConvertResult FormatConvertImpl(absl::int128 v, - const ConversionSpec conv, + const FormatConversionSpecImpl conv, FormatSinkImpl *sink) { return {ConvertIntArg(v, conv, sink)}; } IntegralConvertResult FormatConvertImpl(absl::uint128 v, - const ConversionSpec conv, + const FormatConversionSpecImpl conv, FormatSinkImpl *sink) { return {ConvertIntArg(v, conv, sink)}; } diff --git a/third_party/abseil-cpp/absl/strings/internal/str_format/arg.h b/third_party/abseil-cpp/absl/strings/internal/str_format/arg.h index 7a93756305..3c91be701f 100644 --- a/third_party/abseil-cpp/absl/strings/internal/str_format/arg.h +++ b/third_party/abseil-cpp/absl/strings/internal/str_format/arg.h @@ -1,3 +1,17 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + #ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_ARG_H_ #define ABSL_STRINGS_INTERNAL_STR_FORMAT_ARG_H_ @@ -25,16 +39,37 @@ class Cord; class FormatCountCapture; class FormatSink; +template <absl::FormatConversionCharSet C> +struct FormatConvertResult; +class FormatConversionSpec; + namespace str_format_internal { template <typename T, typename = void> struct HasUserDefinedConvert : std::false_type {}; template <typename T> -struct HasUserDefinedConvert< - T, void_t<decltype(AbslFormatConvert( - std::declval<const T&>(), std::declval<ConversionSpec>(), - std::declval<FormatSink*>()))>> : std::true_type {}; +struct HasUserDefinedConvert<T, void_t<decltype(AbslFormatConvert( + std::declval<const T&>(), + std::declval<const FormatConversionSpec&>(), + std::declval<FormatSink*>()))>> + : std::true_type {}; + +void AbslFormatConvert(); // Stops the lexical name lookup +template <typename T> +auto FormatConvertImpl(const T& v, FormatConversionSpecImpl conv, + FormatSinkImpl* sink) + -> decltype(AbslFormatConvert(v, + std::declval<const FormatConversionSpec&>(), + std::declval<FormatSink*>())) { + using FormatConversionSpecT = + absl::enable_if_t<sizeof(const T& (*)()) != 0, FormatConversionSpec>; + using FormatSinkT = + absl::enable_if_t<sizeof(const T& (*)()) != 0, FormatSink>; + auto fcs = conv.Wrap<FormatConversionSpecT>(); + auto fs = sink->Wrap<FormatSinkT>(); + return AbslFormatConvert(v, fcs, &fs); +} template <typename T> class StreamedWrapper; @@ -43,6 +78,13 @@ class StreamedWrapper; // then convert it, appending to `sink` and return `true`. // Otherwise fail and return `false`. +// AbslFormatConvert(v, conv, sink) is intended to be found by ADL on 'v' +// as an extension mechanism. These FormatConvertImpl functions are the default +// implementations. +// The ADL search is augmented via the 'Sink*' parameter, which also +// serves as a disambiguator to reject possible unintended 'AbslFormatConvert' +// functions in the namespaces associated with 'v'. + // Raw pointers. struct VoidPtr { VoidPtr() = default; @@ -52,27 +94,53 @@ struct VoidPtr { : value(ptr ? reinterpret_cast<uintptr_t>(ptr) : 0) {} uintptr_t value; }; -ConvertResult<Conv::p> FormatConvertImpl(VoidPtr v, ConversionSpec conv, - FormatSinkImpl* sink); + +template <FormatConversionCharSet C> +struct ArgConvertResult { + bool value; +}; + +template <FormatConversionCharSet C> +constexpr FormatConversionCharSet ExtractCharSet(FormatConvertResult<C>) { + return C; +} + +template <FormatConversionCharSet C> +constexpr FormatConversionCharSet ExtractCharSet(ArgConvertResult<C>) { + return C; +} + +using StringConvertResult = + ArgConvertResult<FormatConversionCharSetInternal::s>; +ArgConvertResult<FormatConversionCharSetInternal::p> FormatConvertImpl( + VoidPtr v, FormatConversionSpecImpl conv, FormatSinkImpl* sink); // Strings. -ConvertResult<Conv::s> FormatConvertImpl(const std::string& v, - ConversionSpec conv, - FormatSinkImpl* sink); -ConvertResult<Conv::s> FormatConvertImpl(string_view v, ConversionSpec conv, - FormatSinkImpl* sink); -ConvertResult<Conv::s | Conv::p> FormatConvertImpl(const char* v, - ConversionSpec conv, - FormatSinkImpl* sink); -template <class AbslCord, - typename std::enable_if< - std::is_same<AbslCord, absl::Cord>::value>::type* = nullptr> -ConvertResult<Conv::s> FormatConvertImpl(const AbslCord& value, - ConversionSpec conv, - FormatSinkImpl* sink) { - if (conv.conv() != ConversionChar::s) return {false}; - - bool is_left = conv.flags().left; +StringConvertResult FormatConvertImpl(const std::string& v, + FormatConversionSpecImpl conv, + FormatSinkImpl* sink); +StringConvertResult FormatConvertImpl(string_view v, + FormatConversionSpecImpl conv, + FormatSinkImpl* sink); +#if defined(ABSL_HAVE_STD_STRING_VIEW) && !defined(ABSL_USES_STD_STRING_VIEW) +inline StringConvertResult FormatConvertImpl(std::string_view v, + FormatConversionSpecImpl conv, + FormatSinkImpl* sink) { + return FormatConvertImpl(absl::string_view(v.data(), v.size()), conv, sink); +} +#endif // ABSL_HAVE_STD_STRING_VIEW && !ABSL_USES_STD_STRING_VIEW + +ArgConvertResult<FormatConversionCharSetUnion( + FormatConversionCharSetInternal::s, FormatConversionCharSetInternal::p)> +FormatConvertImpl(const char* v, const FormatConversionSpecImpl conv, + FormatSinkImpl* sink); + +template <class AbslCord, typename std::enable_if<std::is_same< + AbslCord, absl::Cord>::value>::type* = nullptr> +StringConvertResult FormatConvertImpl(const AbslCord& value, + FormatConversionSpecImpl conv, + FormatSinkImpl* sink) { + bool is_left = conv.has_left_flag(); size_t space_remaining = 0; int width = conv.width(); @@ -105,55 +173,63 @@ ConvertResult<Conv::s> FormatConvertImpl(const AbslCord& value, return {true}; } -using IntegralConvertResult = - ConvertResult<Conv::c | Conv::numeric | Conv::star>; -using FloatingConvertResult = ConvertResult<Conv::floating>; +using IntegralConvertResult = ArgConvertResult<FormatConversionCharSetUnion( + FormatConversionCharSetInternal::c, + FormatConversionCharSetInternal::kNumeric, + FormatConversionCharSetInternal::kStar)>; +using FloatingConvertResult = + ArgConvertResult<FormatConversionCharSetInternal::kFloating>; // Floats. -FloatingConvertResult FormatConvertImpl(float v, ConversionSpec conv, +FloatingConvertResult FormatConvertImpl(float v, FormatConversionSpecImpl conv, FormatSinkImpl* sink); -FloatingConvertResult FormatConvertImpl(double v, ConversionSpec conv, +FloatingConvertResult FormatConvertImpl(double v, FormatConversionSpecImpl conv, FormatSinkImpl* sink); -FloatingConvertResult FormatConvertImpl(long double v, ConversionSpec conv, +FloatingConvertResult FormatConvertImpl(long double v, + FormatConversionSpecImpl conv, FormatSinkImpl* sink); // Chars. -IntegralConvertResult FormatConvertImpl(char v, ConversionSpec conv, +IntegralConvertResult FormatConvertImpl(char v, FormatConversionSpecImpl conv, FormatSinkImpl* sink); -IntegralConvertResult FormatConvertImpl(signed char v, ConversionSpec conv, +IntegralConvertResult FormatConvertImpl(signed char v, + FormatConversionSpecImpl conv, FormatSinkImpl* sink); -IntegralConvertResult FormatConvertImpl(unsigned char v, ConversionSpec conv, +IntegralConvertResult FormatConvertImpl(unsigned char v, + FormatConversionSpecImpl conv, FormatSinkImpl* sink); // Ints. IntegralConvertResult FormatConvertImpl(short v, // NOLINT - ConversionSpec conv, + FormatConversionSpecImpl conv, FormatSinkImpl* sink); IntegralConvertResult FormatConvertImpl(unsigned short v, // NOLINT - ConversionSpec conv, + FormatConversionSpecImpl conv, FormatSinkImpl* sink); -IntegralConvertResult FormatConvertImpl(int v, ConversionSpec conv, +IntegralConvertResult FormatConvertImpl(int v, FormatConversionSpecImpl conv, FormatSinkImpl* sink); -IntegralConvertResult FormatConvertImpl(unsigned v, ConversionSpec conv, +IntegralConvertResult FormatConvertImpl(unsigned v, + FormatConversionSpecImpl conv, FormatSinkImpl* sink); IntegralConvertResult FormatConvertImpl(long v, // NOLINT - ConversionSpec conv, + FormatConversionSpecImpl conv, FormatSinkImpl* sink); IntegralConvertResult FormatConvertImpl(unsigned long v, // NOLINT - ConversionSpec conv, + FormatConversionSpecImpl conv, FormatSinkImpl* sink); IntegralConvertResult FormatConvertImpl(long long v, // NOLINT - ConversionSpec conv, + FormatConversionSpecImpl conv, FormatSinkImpl* sink); IntegralConvertResult FormatConvertImpl(unsigned long long v, // NOLINT - ConversionSpec conv, + FormatConversionSpecImpl conv, FormatSinkImpl* sink); -IntegralConvertResult FormatConvertImpl(int128 v, ConversionSpec conv, +IntegralConvertResult FormatConvertImpl(int128 v, FormatConversionSpecImpl conv, FormatSinkImpl* sink); -IntegralConvertResult FormatConvertImpl(uint128 v, ConversionSpec conv, +IntegralConvertResult FormatConvertImpl(uint128 v, + FormatConversionSpecImpl conv, FormatSinkImpl* sink); template <typename T, enable_if_t<std::is_same<T, bool>::value, int> = 0> -IntegralConvertResult FormatConvertImpl(T v, ConversionSpec conv, +IntegralConvertResult FormatConvertImpl(T v, FormatConversionSpecImpl conv, FormatSinkImpl* sink) { return FormatConvertImpl(static_cast<int>(v), conv, sink); } @@ -164,12 +240,12 @@ template <typename T> typename std::enable_if<std::is_enum<T>::value && !HasUserDefinedConvert<T>::value, IntegralConvertResult>::type -FormatConvertImpl(T v, ConversionSpec conv, FormatSinkImpl* sink); +FormatConvertImpl(T v, FormatConversionSpecImpl conv, FormatSinkImpl* sink); template <typename T> -ConvertResult<Conv::s> FormatConvertImpl(const StreamedWrapper<T>& v, - ConversionSpec conv, - FormatSinkImpl* out) { +StringConvertResult FormatConvertImpl(const StreamedWrapper<T>& v, + FormatConversionSpecImpl conv, + FormatSinkImpl* out) { std::ostringstream oss; oss << v.v_; if (!oss) return {false}; @@ -180,21 +256,24 @@ ConvertResult<Conv::s> FormatConvertImpl(const StreamedWrapper<T>& v, // until after FormatCountCapture is fully defined. struct FormatCountCaptureHelper { template <class T = int> - static ConvertResult<Conv::n> ConvertHelper(const FormatCountCapture& v, - ConversionSpec conv, - FormatSinkImpl* sink) { + static ArgConvertResult<FormatConversionCharSetInternal::n> ConvertHelper( + const FormatCountCapture& v, FormatConversionSpecImpl conv, + FormatSinkImpl* sink) { const absl::enable_if_t<sizeof(T) != 0, FormatCountCapture>& v2 = v; - if (conv.conv() != str_format_internal::ConversionChar::n) return {false}; + if (conv.conversion_char() != + str_format_internal::FormatConversionCharInternal::n) { + return {false}; + } *v2.p_ = static_cast<int>(sink->size()); return {true}; } }; template <class T = int> -ConvertResult<Conv::n> FormatConvertImpl(const FormatCountCapture& v, - ConversionSpec conv, - FormatSinkImpl* sink) { +ArgConvertResult<FormatConversionCharSetInternal::n> FormatConvertImpl( + const FormatCountCapture& v, FormatConversionSpecImpl conv, + FormatSinkImpl* sink) { return FormatCountCaptureHelper::ConvertHelper(v, conv, sink); } @@ -203,13 +282,13 @@ ConvertResult<Conv::n> FormatConvertImpl(const FormatCountCapture& v, struct FormatArgImplFriend { template <typename Arg> static bool ToInt(Arg arg, int* out) { - // A value initialized ConversionSpec has a `none` conv, which tells the - // dispatcher to run the `int` conversion. + // A value initialized FormatConversionSpecImpl has a `none` conv, which + // tells the dispatcher to run the `int` conversion. return arg.dispatcher_(arg.data_, {}, out); } template <typename Arg> - static bool Convert(Arg arg, str_format_internal::ConversionSpec conv, + static bool Convert(Arg arg, FormatConversionSpecImpl conv, FormatSinkImpl* out) { return arg.dispatcher_(arg.data_, conv, out); } @@ -220,6 +299,15 @@ struct FormatArgImplFriend { } }; +template <typename Arg> +constexpr FormatConversionCharSet ArgumentToConv() { + return absl::str_format_internal::ExtractCharSet( + decltype(str_format_internal::FormatConvertImpl( + std::declval<const Arg&>(), + std::declval<const FormatConversionSpecImpl&>(), + std::declval<FormatSinkImpl*>())){}); +} + // A type-erased handle to a format argument. class FormatArgImpl { private: @@ -233,7 +321,7 @@ class FormatArgImpl { char buf[kInlinedSpace]; }; - using Dispatcher = bool (*)(Data, ConversionSpec, void* out); + using Dispatcher = bool (*)(Data, FormatConversionSpecImpl, void* out); template <typename T> struct store_by_value @@ -375,15 +463,20 @@ class FormatArgImpl { } template <typename T> - static bool Dispatch(Data arg, ConversionSpec spec, void* out) { + static bool Dispatch(Data arg, FormatConversionSpecImpl spec, void* out) { // A `none` conv indicates that we want the `int` conversion. - if (ABSL_PREDICT_FALSE(spec.conv() == ConversionChar::none)) { + if (ABSL_PREDICT_FALSE(spec.conversion_char() == + FormatConversionCharInternal::kNone)) { return ToInt<T>(arg, static_cast<int*>(out), std::is_integral<T>(), std::is_enum<T>()); } - + if (ABSL_PREDICT_FALSE(!Contains(ArgumentToConv<T>(), + spec.conversion_char()))) { + return false; + } return str_format_internal::FormatConvertImpl( - Manager<T>::Value(arg), spec, static_cast<FormatSinkImpl*>(out)) + Manager<T>::Value(arg), spec, + static_cast<FormatSinkImpl*>(out)) .value; } @@ -391,8 +484,9 @@ class FormatArgImpl { Dispatcher dispatcher_; }; -#define ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(T, E) \ - E template bool FormatArgImpl::Dispatch<T>(Data, ConversionSpec, void*) +#define ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(T, E) \ + E template bool FormatArgImpl::Dispatch<T>(Data, FormatConversionSpecImpl, \ + void*) #define ABSL_INTERNAL_FORMAT_DISPATCH_OVERLOADS_EXPAND_(...) \ ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(str_format_internal::VoidPtr, \ diff --git a/third_party/abseil-cpp/absl/strings/internal/str_format/arg_test.cc b/third_party/abseil-cpp/absl/strings/internal/str_format/arg_test.cc index 8d30d8b8ce..1261937c30 100644 --- a/third_party/abseil-cpp/absl/strings/internal/str_format/arg_test.cc +++ b/third_party/abseil-cpp/absl/strings/internal/str_format/arg_test.cc @@ -6,6 +6,12 @@ // // https://www.apache.org/licenses/LICENSE-2.0 // +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + #include "absl/strings/internal/str_format/arg.h" #include <ostream> @@ -23,8 +29,17 @@ class FormatArgImplTest : public ::testing::Test { enum Color { kRed, kGreen, kBlue }; static const char *hi() { return "hi"; } + + struct X {}; + + X x_; }; +inline FormatConvertResult<FormatConversionCharSet{}> AbslFormatConvert( + const FormatArgImplTest::X &, const FormatConversionSpec &, FormatSink *) { + return {false}; +} + TEST_F(FormatArgImplTest, ToInt) { int out = 0; EXPECT_TRUE(FormatArgImplFriend::ToInt(FormatArgImpl(1), &out)); @@ -59,6 +74,7 @@ TEST_F(FormatArgImplTest, ToInt) { FormatArgImpl(static_cast<int *>(nullptr)), &out)); EXPECT_FALSE(FormatArgImplFriend::ToInt(FormatArgImpl(hi()), &out)); EXPECT_FALSE(FormatArgImplFriend::ToInt(FormatArgImpl("hi"), &out)); + EXPECT_FALSE(FormatArgImplFriend::ToInt(FormatArgImpl(x_), &out)); EXPECT_TRUE(FormatArgImplFriend::ToInt(FormatArgImpl(kBlue), &out)); EXPECT_EQ(2, out); } @@ -95,8 +111,9 @@ TEST_F(FormatArgImplTest, OtherPtrDecayToVoidPtr) { TEST_F(FormatArgImplTest, WorksWithCharArraysOfUnknownSize) { std::string s; FormatSinkImpl sink(&s); - ConversionSpec conv; - FormatConversionSpecImplFriend::SetConversionChar(ConversionChar::s, &conv); + FormatConversionSpecImpl conv; + FormatConversionSpecImplFriend::SetConversionChar( + FormatConversionCharInternal::s, &conv); FormatConversionSpecImplFriend::SetFlags(Flags(), &conv); FormatConversionSpecImplFriend::SetWidth(-1, &conv); FormatConversionSpecImplFriend::SetPrecision(-1, &conv); diff --git a/third_party/abseil-cpp/absl/strings/internal/str_format/bind.cc b/third_party/abseil-cpp/absl/strings/internal/str_format/bind.cc index 27522fdb4f..c988ba8fd2 100644 --- a/third_party/abseil-cpp/absl/strings/internal/str_format/bind.cc +++ b/third_party/abseil-cpp/absl/strings/internal/str_format/bind.cc @@ -1,3 +1,17 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + #include "absl/strings/internal/str_format/bind.h" #include <cerrno> @@ -44,7 +58,7 @@ inline bool ArgContext::Bind(const UnboundConversion* unbound, if (static_cast<size_t>(arg_position - 1) >= pack_.size()) return false; arg = &pack_[arg_position - 1]; // 1-based - if (!unbound->flags.basic) { + if (unbound->flags != Flags::kBasic) { int width = unbound->width.value(); bool force_left = false; if (unbound->width.is_from_arg()) { @@ -70,9 +84,8 @@ inline bool ArgContext::Bind(const UnboundConversion* unbound, FormatConversionSpecImplFriend::SetPrecision(precision, bound); if (force_left) { - Flags flags = unbound->flags; - flags.left = true; - FormatConversionSpecImplFriend::SetFlags(flags, bound); + FormatConversionSpecImplFriend::SetFlags(unbound->flags | Flags::kLeft, + bound); } else { FormatConversionSpecImplFriend::SetFlags(unbound->flags, bound); } @@ -147,7 +160,7 @@ class SummarizingConverter { << FormatConversionSpecImplFriend::FlagsToString(bound); if (bound.width() >= 0) ss << bound.width(); if (bound.precision() >= 0) ss << "." << bound.precision(); - ss << bound.conv() << "}"; + ss << bound.conversion_char() << "}"; Append(ss.str()); return true; } @@ -221,7 +234,7 @@ int FprintF(std::FILE* output, const UntypedFormatSpecImpl format, errno = sink.error(); return -1; } - if (sink.count() > std::numeric_limits<int>::max()) { + if (sink.count() > static_cast<size_t>(std::numeric_limits<int>::max())) { errno = EFBIG; return -1; } diff --git a/third_party/abseil-cpp/absl/strings/internal/str_format/bind.h b/third_party/abseil-cpp/absl/strings/internal/str_format/bind.h index cf41b19748..b26cff6648 100644 --- a/third_party/abseil-cpp/absl/strings/internal/str_format/bind.h +++ b/third_party/abseil-cpp/absl/strings/internal/str_format/bind.h @@ -1,3 +1,17 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + #ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_BIND_H_ #define ABSL_STRINGS_INTERNAL_STR_FORMAT_BIND_H_ @@ -19,7 +33,7 @@ class UntypedFormatSpec; namespace str_format_internal { -class BoundConversion : public ConversionSpec { +class BoundConversion : public FormatConversionSpecImpl { public: const FormatArgImpl* arg() const { return arg_; } void set_arg(const FormatArgImpl* a) { arg_ = a; } @@ -60,7 +74,7 @@ class UntypedFormatSpecImpl { size_t size_; }; -template <typename T, typename...> +template <typename T, FormatConversionCharSet...> struct MakeDependent { using type = T; }; @@ -68,7 +82,7 @@ struct MakeDependent { // Implicitly convertible from `const char*`, `string_view`, and the // `ExtendedParsedFormat` type. This abstraction allows all format functions to // operate on any without providing too many overloads. -template <typename... Args> +template <FormatConversionCharSet... Args> class FormatSpecTemplate : public MakeDependent<UntypedFormatSpec, Args...>::type { using Base = typename MakeDependent<UntypedFormatSpec, Args...>::type; @@ -76,17 +90,17 @@ class FormatSpecTemplate public: #ifdef ABSL_INTERNAL_ENABLE_FORMAT_CHECKER - // Honeypot overload for when the std::string is not constexpr. + // Honeypot overload for when the string is not constexpr. // We use the 'unavailable' attribute to give a better compiler error than // just 'method is deleted'. FormatSpecTemplate(...) // NOLINT - __attribute__((unavailable("Format std::string is not constexpr."))); + __attribute__((unavailable("Format string is not constexpr."))); // Honeypot overload for when the format is constexpr and invalid. // We use the 'unavailable' attribute to give a better compiler error than // just 'method is deleted'. // To avoid checking the format twice, we just check that the format is - // constexpr. If is it valid, then the overload below will kick in. + // constexpr. If it is valid, then the overload below will kick in. // We add the template here to make this overload have lower priority. template <typename = void> FormatSpecTemplate(const char* s) // NOLINT @@ -105,13 +119,11 @@ class FormatSpecTemplate // Good format overload. FormatSpecTemplate(const char* s) // NOLINT - __attribute__((enable_if(ValidFormatImpl<ArgumentToConv<Args>()...>(s), - "bad format trap"))) + __attribute__((enable_if(ValidFormatImpl<Args...>(s), "bad format trap"))) : Base(s) {} FormatSpecTemplate(string_view s) // NOLINT - __attribute__((enable_if(ValidFormatImpl<ArgumentToConv<Args>()...>(s), - "bad format trap"))) + __attribute__((enable_if(ValidFormatImpl<Args...>(s), "bad format trap"))) : Base(s) {} #else // ABSL_INTERNAL_ENABLE_FORMAT_CHECKER @@ -121,19 +133,15 @@ class FormatSpecTemplate #endif // ABSL_INTERNAL_ENABLE_FORMAT_CHECKER - template <Conv... C, typename = typename std::enable_if< - AllOf(sizeof...(C) == sizeof...(Args), - Contains(ArgumentToConv<Args>(), - C)...)>::type> + template < + FormatConversionCharSet... C, + typename = typename std::enable_if<sizeof...(C) == sizeof...(Args)>::type, + typename = typename std::enable_if<AllOf(Contains(Args, + C)...)>::type> FormatSpecTemplate(const ExtendedParsedFormat<C...>& pc) // NOLINT : Base(&pc) {} }; -template <typename... Args> -struct FormatSpecDeductionBarrier { - using type = FormatSpecTemplate<Args...>; -}; - class Streamable { public: Streamable(const UntypedFormatSpecImpl& format, @@ -196,9 +204,9 @@ class StreamedWrapper { private: template <typename S> - friend ConvertResult<Conv::s> FormatConvertImpl(const StreamedWrapper<S>& v, - ConversionSpec conv, - FormatSinkImpl* out); + friend ArgConvertResult<FormatConversionCharSetInternal::s> FormatConvertImpl( + const StreamedWrapper<S>& v, FormatConversionSpecImpl conv, + FormatSinkImpl* out); const T& v_; }; diff --git a/third_party/abseil-cpp/absl/strings/internal/str_format/bind_test.cc b/third_party/abseil-cpp/absl/strings/internal/str_format/bind_test.cc index 64790a85fd..1eef9c4326 100644 --- a/third_party/abseil-cpp/absl/strings/internal/str_format/bind_test.cc +++ b/third_party/abseil-cpp/absl/strings/internal/str_format/bind_test.cc @@ -1,3 +1,17 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + #include "absl/strings/internal/str_format/bind.h" #include <string.h> diff --git a/third_party/abseil-cpp/absl/strings/internal/str_format/checker.h b/third_party/abseil-cpp/absl/strings/internal/str_format/checker.h index 8993a79b95..2a2601eccf 100644 --- a/third_party/abseil-cpp/absl/strings/internal/str_format/checker.h +++ b/third_party/abseil-cpp/absl/strings/internal/str_format/checker.h @@ -1,3 +1,17 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + #ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_CHECKER_H_ #define ABSL_STRINGS_INTERNAL_STR_FORMAT_CHECKER_H_ @@ -24,13 +38,6 @@ constexpr bool AllOf(bool b, T... t) { return b && AllOf(t...); } -template <typename Arg> -constexpr Conv ArgumentToConv() { - return decltype(str_format_internal::FormatConvertImpl( - std::declval<const Arg&>(), std::declval<const ConversionSpec&>(), - std::declval<FormatSinkImpl*>()))::kConv; -} - #ifdef ABSL_INTERNAL_ENABLE_FORMAT_CHECKER constexpr bool ContainsChar(const char* chars, char c) { @@ -39,14 +46,14 @@ constexpr bool ContainsChar(const char* chars, char c) { // A constexpr compatible list of Convs. struct ConvList { - const Conv* array; + const FormatConversionCharSet* array; int count; // We do the bound check here to avoid having to do it on the callers. - // Returning an empty Conv has the same effect as short circuiting because it - // will never match any conversion. - constexpr Conv operator[](int i) const { - return i < count ? array[i] : Conv{}; + // Returning an empty FormatConversionCharSet has the same effect as + // short circuiting because it will never match any conversion. + constexpr FormatConversionCharSet operator[](int i) const { + return i < count ? array[i] : FormatConversionCharSet{}; } constexpr ConvList without_front() const { @@ -57,7 +64,7 @@ struct ConvList { template <size_t count> struct ConvListT { // Make sure the array has size > 0. - Conv list[count ? count : 1]; + FormatConversionCharSet list[count ? count : 1]; }; constexpr char GetChar(string_view str, size_t index) { @@ -310,7 +317,7 @@ class FormatParser { ConvList args_; }; -template <Conv... C> +template <FormatConversionCharSet... C> constexpr bool ValidFormatImpl(string_view format) { return FormatParser(format, {ConvListT<sizeof...(C)>{{C...}}.list, sizeof...(C)}) diff --git a/third_party/abseil-cpp/absl/strings/internal/str_format/checker_test.cc b/third_party/abseil-cpp/absl/strings/internal/str_format/checker_test.cc index ea2a7681a6..7c70f47d68 100644 --- a/third_party/abseil-cpp/absl/strings/internal/str_format/checker_test.cc +++ b/third_party/abseil-cpp/absl/strings/internal/str_format/checker_test.cc @@ -1,3 +1,17 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + #include <string> #include "gmock/gmock.h" @@ -9,18 +23,22 @@ ABSL_NAMESPACE_BEGIN namespace str_format_internal { namespace { -std::string ConvToString(Conv conv) { +std::string ConvToString(FormatConversionCharSet conv) { std::string out; -#define CONV_SET_CASE(c) \ - if (Contains(conv, Conv::c)) out += #c; +#define CONV_SET_CASE(c) \ + if (Contains(conv, FormatConversionCharSetInternal::c)) { \ + out += #c; \ + } ABSL_INTERNAL_CONVERSION_CHARS_EXPAND_(CONV_SET_CASE, ) #undef CONV_SET_CASE - if (Contains(conv, Conv::star)) out += "*"; + if (Contains(conv, FormatConversionCharSetInternal::kStar)) { + out += "*"; + } return out; } TEST(StrFormatChecker, ArgumentToConv) { - Conv conv = ArgumentToConv<std::string>(); + FormatConversionCharSet conv = ArgumentToConv<std::string>(); EXPECT_EQ(ConvToString(conv), "s"); conv = ArgumentToConv<const char*>(); diff --git a/third_party/abseil-cpp/absl/strings/internal/str_format/convert_test.cc b/third_party/abseil-cpp/absl/strings/internal/str_format/convert_test.cc index cbcd7caf46..91e0360901 100644 --- a/third_party/abseil-cpp/absl/strings/internal/str_format/convert_test.cc +++ b/third_party/abseil-cpp/absl/strings/internal/str_format/convert_test.cc @@ -1,20 +1,46 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + #include <errno.h> #include <stdarg.h> #include <stdio.h> + #include <cctype> #include <cmath> +#include <limits> #include <string> +#include <thread> // NOLINT #include "gmock/gmock.h" #include "gtest/gtest.h" #include "absl/base/internal/raw_logging.h" #include "absl/strings/internal/str_format/bind.h" +#include "absl/strings/match.h" +#include "absl/types/optional.h" namespace absl { ABSL_NAMESPACE_BEGIN namespace str_format_internal { namespace { +struct NativePrintfTraits { + bool hex_float_has_glibc_rounding; + bool hex_float_prefers_denormal_repr; + bool hex_float_uses_minimal_precision_when_not_specified; + bool hex_float_optimizes_leading_digit_bit_count; +}; + template <typename T, size_t N> size_t ArraySize(T (&)[N]) { return N; @@ -57,7 +83,7 @@ std::string Esc(const T &v) { return oss.str(); } -void StrAppend(std::string *dst, const char *format, va_list ap) { +void StrAppendV(std::string *dst, const char *format, va_list ap) { // First try with a small fixed size buffer static const int kSpaceLength = 1024; char space[kSpaceLength]; @@ -98,15 +124,79 @@ void StrAppend(std::string *dst, const char *format, va_list ap) { delete[] buf; } +void StrAppend(std::string *out, const char *format, ...) { + va_list ap; + va_start(ap, format); + StrAppendV(out, format, ap); + va_end(ap); +} + std::string StrPrint(const char *format, ...) { va_list ap; va_start(ap, format); std::string result; - StrAppend(&result, format, ap); + StrAppendV(&result, format, ap); va_end(ap); return result; } +NativePrintfTraits VerifyNativeImplementationImpl() { + NativePrintfTraits result; + + // >>> hex_float_has_glibc_rounding. To have glibc's rounding behavior we need + // to meet three requirements: + // + // - The threshold for rounding up is 8 (for e.g. MSVC uses 9). + // - If the digits lower than than the 8 are non-zero then we round up. + // - If the digits lower than the 8 are all zero then we round toward even. + // + // The numbers below represent all the cases covering {below,at,above} the + // threshold (8) with both {zero,non-zero} lower bits and both {even,odd} + // preceding digits. + const double d0079 = 65657.0; // 0x1.0079p+16 + const double d0179 = 65913.0; // 0x1.0179p+16 + const double d0080 = 65664.0; // 0x1.0080p+16 + const double d0180 = 65920.0; // 0x1.0180p+16 + const double d0081 = 65665.0; // 0x1.0081p+16 + const double d0181 = 65921.0; // 0x1.0181p+16 + result.hex_float_has_glibc_rounding = + StartsWith(StrPrint("%.2a", d0079), "0x1.00") && + StartsWith(StrPrint("%.2a", d0179), "0x1.01") && + StartsWith(StrPrint("%.2a", d0080), "0x1.00") && + StartsWith(StrPrint("%.2a", d0180), "0x1.02") && + StartsWith(StrPrint("%.2a", d0081), "0x1.01") && + StartsWith(StrPrint("%.2a", d0181), "0x1.02"); + + // >>> hex_float_prefers_denormal_repr. Formatting `denormal` on glibc yields + // "0x0.0000000000001p-1022", whereas on std libs that don't use denormal + // representation it would either be 0x1p-1074 or 0x1.0000000000000-1074. + const double denormal = std::numeric_limits<double>::denorm_min(); + result.hex_float_prefers_denormal_repr = + StartsWith(StrPrint("%a", denormal), "0x0.0000000000001"); + + // >>> hex_float_uses_minimal_precision_when_not_specified. Some (non-glibc) + // libs will format the following as "0x1.0079000000000p+16". + result.hex_float_uses_minimal_precision_when_not_specified = + (StrPrint("%a", d0079) == "0x1.0079p+16"); + + // >>> hex_float_optimizes_leading_digit_bit_count. The number 1.5, when + // formatted by glibc should yield "0x1.8p+0" for `double` and "0xcp-3" for + // `long double`, i.e., number of bits in the leading digit is adapted to the + // number of bits in the mantissa. + const double d_15 = 1.5; + const long double ld_15 = 1.5; + result.hex_float_optimizes_leading_digit_bit_count = + StartsWith(StrPrint("%a", d_15), "0x1.8") && + StartsWith(StrPrint("%La", ld_15), "0xc"); + + return result; +} + +const NativePrintfTraits &VerifyNativeImplementation() { + static NativePrintfTraits native_traits = VerifyNativeImplementationImpl(); + return native_traits; +} + class FormatConvertTest : public ::testing::Test { }; template <typename T> @@ -139,6 +229,9 @@ TEST_F(FormatConvertTest, BasicString) { TestStringConvert(static_cast<const char*>("hello")); TestStringConvert(std::string("hello")); TestStringConvert(string_view("hello")); +#if defined(ABSL_HAVE_STD_STRING_VIEW) + TestStringConvert(std::string_view("hello")); +#endif // ABSL_HAVE_STD_STRING_VIEW } TEST_F(FormatConvertTest, NullString) { @@ -463,17 +556,130 @@ TEST_F(FormatConvertTest, Uint128) { } } -TEST_F(FormatConvertTest, Float) { -#ifdef _MSC_VER - // MSVC has a different rounding policy than us so we can't test our - // implementation against the native one there. - return; -#endif // _MSC_VER +template <typename Floating> +void TestWithMultipleFormatsHelper(const std::vector<Floating> &floats, + const std::set<Floating> &skip_verify) { + const NativePrintfTraits &native_traits = VerifyNativeImplementation(); + // Reserve the space to ensure we don't allocate memory in the output itself. + std::string str_format_result; + str_format_result.reserve(1 << 20); + std::string string_printf_result; + string_printf_result.reserve(1 << 20); const char *const kFormats[] = { - "%", "%.3", "%8.5", "%9", "%.60", "%.30", "%03", "%+", - "% ", "%-10", "%#15.3", "%#.0", "%.0", "%1$*2$", "%1$.*2$"}; + "%", "%.3", "%8.5", "%500", "%.5000", "%.60", "%.30", "%03", + "%+", "% ", "%-10", "%#15.3", "%#.0", "%.0", "%1$*2$", "%1$.*2$"}; + + for (const char *fmt : kFormats) { + for (char f : {'f', 'F', // + 'g', 'G', // + 'a', 'A', // + 'e', 'E'}) { + std::string fmt_str = std::string(fmt) + f; + + if (fmt == absl::string_view("%.5000") && f != 'f' && f != 'F' && + f != 'a' && f != 'A') { + // This particular test takes way too long with snprintf. + // Disable for the case we are not implementing natively. + continue; + } + + if ((f == 'a' || f == 'A') && + !native_traits.hex_float_has_glibc_rounding) { + continue; + } + for (Floating d : floats) { + if (!native_traits.hex_float_prefers_denormal_repr && + (f == 'a' || f == 'A') && std::fpclassify(d) == FP_SUBNORMAL) { + continue; + } + int i = -10; + FormatArgImpl args[2] = {FormatArgImpl(d), FormatArgImpl(i)}; + UntypedFormatSpecImpl format(fmt_str); + + string_printf_result.clear(); + StrAppend(&string_printf_result, fmt_str.c_str(), d, i); + str_format_result.clear(); + + { + AppendPack(&str_format_result, format, absl::MakeSpan(args)); + } + +#ifdef _MSC_VER + // MSVC has a different rounding policy than us so we can't test our + // implementation against the native one there. + continue; +#elif defined(__APPLE__) + // Apple formats NaN differently (+nan) vs. (nan) + if (std::isnan(d)) continue; +#endif + if (string_printf_result != str_format_result && + skip_verify.find(d) == skip_verify.end()) { + // We use ASSERT_EQ here because failures are usually correlated and a + // bug would print way too many failed expectations causing the test + // to time out. + ASSERT_EQ(string_printf_result, str_format_result) + << fmt_str << " " << StrPrint("%.18g", d) << " " + << StrPrint("%a", d) << " " << StrPrint("%.50f", d); + } + } + } + } +} + +TEST_F(FormatConvertTest, Float) { + std::vector<float> floats = {0.0f, + -0.0f, + .9999999f, + 9999999.f, + std::numeric_limits<float>::max(), + -std::numeric_limits<float>::max(), + std::numeric_limits<float>::min(), + -std::numeric_limits<float>::min(), + std::numeric_limits<float>::lowest(), + -std::numeric_limits<float>::lowest(), + std::numeric_limits<float>::epsilon(), + std::numeric_limits<float>::epsilon() + 1.0f, + std::numeric_limits<float>::infinity(), + -std::numeric_limits<float>::infinity(), + std::nanf("")}; + + // Some regression tests. + floats.push_back(0.999999989f); + + if (std::numeric_limits<float>::has_denorm != std::denorm_absent) { + floats.push_back(std::numeric_limits<float>::denorm_min()); + floats.push_back(-std::numeric_limits<float>::denorm_min()); + } + + for (float base : + {1.f, 12.f, 123.f, 1234.f, 12345.f, 123456.f, 1234567.f, 12345678.f, + 123456789.f, 1234567890.f, 12345678901.f, 12345678.f, 12345678.f}) { + for (int exp = -123; exp <= 123; ++exp) { + for (int sign : {1, -1}) { + floats.push_back(sign * std::ldexp(base, exp)); + } + } + } + + for (int exp = -300; exp <= 300; ++exp) { + const float all_ones_mantissa = 0xffffff; + floats.push_back(std::ldexp(all_ones_mantissa, exp)); + } + + // Remove duplicates to speed up the logic below. + std::sort(floats.begin(), floats.end()); + floats.erase(std::unique(floats.begin(), floats.end()), floats.end()); + + TestWithMultipleFormatsHelper(floats, {}); +} + +TEST_F(FormatConvertTest, Double) { + // For values that we know won't match the standard library implementation we + // skip verification, but still run the algorithm to catch asserts/sanitizer + // bugs. + std::set<double> skip_verify; std::vector<double> doubles = {0.0, -0.0, .99999999999999, @@ -487,12 +693,8 @@ TEST_F(FormatConvertTest, Float) { std::numeric_limits<double>::epsilon(), std::numeric_limits<double>::epsilon() + 1, std::numeric_limits<double>::infinity(), - -std::numeric_limits<double>::infinity()}; - -#ifndef __APPLE__ - // Apple formats NaN differently (+nan) vs. (nan) - doubles.push_back(std::nan("")); -#endif + -std::numeric_limits<double>::infinity(), + std::nan("")}; // Some regression tests. doubles.push_back(0.99999999999999989); @@ -512,43 +714,366 @@ TEST_F(FormatConvertTest, Float) { } } - for (const char *fmt : kFormats) { - for (char f : {'f', 'F', // - 'g', 'G', // - 'a', 'A', // - 'e', 'E'}) { - std::string fmt_str = std::string(fmt) + f; - for (double d : doubles) { - int i = -10; - FormatArgImpl args[2] = {FormatArgImpl(d), FormatArgImpl(i)}; - UntypedFormatSpecImpl format(fmt_str); - // We use ASSERT_EQ here because failures are usually correlated and a - // bug would print way too many failed expectations causing the test to - // time out. - ASSERT_EQ(StrPrint(fmt_str.c_str(), d, i), - FormatPack(format, absl::MakeSpan(args))) - << fmt_str << " " << StrPrint("%.18g", d) << " " - << StrPrint("%.999f", d); - } + // Workaround libc bug. + // https://sourceware.org/bugzilla/show_bug.cgi?id=22142 + const bool gcc_bug_22142 = + StrPrint("%f", std::numeric_limits<double>::max()) != + "1797693134862315708145274237317043567980705675258449965989174768031" + "5726078002853876058955863276687817154045895351438246423432132688946" + "4182768467546703537516986049910576551282076245490090389328944075868" + "5084551339423045832369032229481658085593321233482747978262041447231" + "68738177180919299881250404026184124858368.000000"; + + for (int exp = -300; exp <= 300; ++exp) { + const double all_ones_mantissa = 0x1fffffffffffff; + doubles.push_back(std::ldexp(all_ones_mantissa, exp)); + if (gcc_bug_22142) { + skip_verify.insert(doubles.back()); + } + } + + if (gcc_bug_22142) { + using L = std::numeric_limits<double>; + skip_verify.insert(L::max()); + skip_verify.insert(L::min()); // NOLINT + skip_verify.insert(L::denorm_min()); + skip_verify.insert(-L::max()); + skip_verify.insert(-L::min()); // NOLINT + skip_verify.insert(-L::denorm_min()); + } + + // Remove duplicates to speed up the logic below. + std::sort(doubles.begin(), doubles.end()); + doubles.erase(std::unique(doubles.begin(), doubles.end()), doubles.end()); + + TestWithMultipleFormatsHelper(doubles, skip_verify); +} + +TEST_F(FormatConvertTest, DoubleRound) { + std::string s; + const auto format = [&](const char *fmt, double d) -> std::string & { + s.clear(); + FormatArgImpl args[1] = {FormatArgImpl(d)}; + AppendPack(&s, UntypedFormatSpecImpl(fmt), absl::MakeSpan(args)); +#if !defined(_MSC_VER) + // MSVC has a different rounding policy than us so we can't test our + // implementation against the native one there. + EXPECT_EQ(StrPrint(fmt, d), s); +#endif // _MSC_VER + + return s; + }; + // All of these values have to be exactly represented. + // Otherwise we might not be testing what we think we are testing. + + // These values can fit in a 64bit "fast" representation. + const double exact_value = 0.00000000000005684341886080801486968994140625; + assert(exact_value == std::pow(2, -44)); + // Round up at a 5xx. + EXPECT_EQ(format("%.13f", exact_value), "0.0000000000001"); + // Round up at a >5 + EXPECT_EQ(format("%.14f", exact_value), "0.00000000000006"); + // Round down at a <5 + EXPECT_EQ(format("%.16f", exact_value), "0.0000000000000568"); + // Nine handling + EXPECT_EQ(format("%.35f", exact_value), + "0.00000000000005684341886080801486969"); + EXPECT_EQ(format("%.36f", exact_value), + "0.000000000000056843418860808014869690"); + // Round down the last nine. + EXPECT_EQ(format("%.37f", exact_value), + "0.0000000000000568434188608080148696899"); + EXPECT_EQ(format("%.10f", 0.000003814697265625), "0.0000038147"); + // Round up the last nine + EXPECT_EQ(format("%.11f", 0.000003814697265625), "0.00000381470"); + EXPECT_EQ(format("%.12f", 0.000003814697265625), "0.000003814697"); + + // Round to even (down) + EXPECT_EQ(format("%.43f", exact_value), + "0.0000000000000568434188608080148696899414062"); + // Exact + EXPECT_EQ(format("%.44f", exact_value), + "0.00000000000005684341886080801486968994140625"); + // Round to even (up), let make the last digits 75 instead of 25 + EXPECT_EQ(format("%.43f", exact_value + std::pow(2, -43)), + "0.0000000000001705302565824240446090698242188"); + // Exact, just to check. + EXPECT_EQ(format("%.44f", exact_value + std::pow(2, -43)), + "0.00000000000017053025658242404460906982421875"); + + // This value has to be small enough that it won't fit in the uint128 + // representation for printing. + const double small_exact_value = + 0.000000000000000000000000000000000000752316384526264005099991383822237233803945956334136013765601092018187046051025390625; // NOLINT + assert(small_exact_value == std::pow(2, -120)); + // Round up at a 5xx. + EXPECT_EQ(format("%.37f", small_exact_value), + "0.0000000000000000000000000000000000008"); + // Round down at a <5 + EXPECT_EQ(format("%.38f", small_exact_value), + "0.00000000000000000000000000000000000075"); + // Round up at a >5 + EXPECT_EQ(format("%.41f", small_exact_value), + "0.00000000000000000000000000000000000075232"); + // Nine handling + EXPECT_EQ(format("%.55f", small_exact_value), + "0.0000000000000000000000000000000000007523163845262640051"); + EXPECT_EQ(format("%.56f", small_exact_value), + "0.00000000000000000000000000000000000075231638452626400510"); + EXPECT_EQ(format("%.57f", small_exact_value), + "0.000000000000000000000000000000000000752316384526264005100"); + EXPECT_EQ(format("%.58f", small_exact_value), + "0.0000000000000000000000000000000000007523163845262640051000"); + // Round down the last nine + EXPECT_EQ(format("%.59f", small_exact_value), + "0.00000000000000000000000000000000000075231638452626400509999"); + // Round up the last nine + EXPECT_EQ(format("%.79f", small_exact_value), + "0.000000000000000000000000000000000000" + "7523163845262640050999913838222372338039460"); + + // Round to even (down) + EXPECT_EQ(format("%.119f", small_exact_value), + "0.000000000000000000000000000000000000" + "75231638452626400509999138382223723380" + "394595633413601376560109201818704605102539062"); + // Exact + EXPECT_EQ(format("%.120f", small_exact_value), + "0.000000000000000000000000000000000000" + "75231638452626400509999138382223723380" + "3945956334136013765601092018187046051025390625"); + // Round to even (up), let make the last digits 75 instead of 25 + EXPECT_EQ(format("%.119f", small_exact_value + std::pow(2, -119)), + "0.000000000000000000000000000000000002" + "25694915357879201529997415146671170141" + "183786900240804129680327605456113815307617188"); + // Exact, just to check. + EXPECT_EQ(format("%.120f", small_exact_value + std::pow(2, -119)), + "0.000000000000000000000000000000000002" + "25694915357879201529997415146671170141" + "1837869002408041296803276054561138153076171875"); +} + +TEST_F(FormatConvertTest, DoubleRoundA) { + const NativePrintfTraits &native_traits = VerifyNativeImplementation(); + std::string s; + const auto format = [&](const char *fmt, double d) -> std::string & { + s.clear(); + FormatArgImpl args[1] = {FormatArgImpl(d)}; + AppendPack(&s, UntypedFormatSpecImpl(fmt), absl::MakeSpan(args)); + if (native_traits.hex_float_has_glibc_rounding) { + EXPECT_EQ(StrPrint(fmt, d), s); + } + return s; + }; + + // 0x1.00018000p+100 + const double on_boundary_odd = 1267679614447900152596896153600.0; + EXPECT_EQ(format("%.0a", on_boundary_odd), "0x1p+100"); + EXPECT_EQ(format("%.1a", on_boundary_odd), "0x1.0p+100"); + EXPECT_EQ(format("%.2a", on_boundary_odd), "0x1.00p+100"); + EXPECT_EQ(format("%.3a", on_boundary_odd), "0x1.000p+100"); + EXPECT_EQ(format("%.4a", on_boundary_odd), "0x1.0002p+100"); // round + EXPECT_EQ(format("%.5a", on_boundary_odd), "0x1.00018p+100"); + EXPECT_EQ(format("%.6a", on_boundary_odd), "0x1.000180p+100"); + + // 0x1.00028000p-2 + const double on_boundary_even = 0.250009536743164062500; + EXPECT_EQ(format("%.0a", on_boundary_even), "0x1p-2"); + EXPECT_EQ(format("%.1a", on_boundary_even), "0x1.0p-2"); + EXPECT_EQ(format("%.2a", on_boundary_even), "0x1.00p-2"); + EXPECT_EQ(format("%.3a", on_boundary_even), "0x1.000p-2"); + EXPECT_EQ(format("%.4a", on_boundary_even), "0x1.0002p-2"); // no round + EXPECT_EQ(format("%.5a", on_boundary_even), "0x1.00028p-2"); + EXPECT_EQ(format("%.6a", on_boundary_even), "0x1.000280p-2"); + + // 0x1.00018001p+1 + const double slightly_over = 2.00004577683284878730773925781250; + EXPECT_EQ(format("%.0a", slightly_over), "0x1p+1"); + EXPECT_EQ(format("%.1a", slightly_over), "0x1.0p+1"); + EXPECT_EQ(format("%.2a", slightly_over), "0x1.00p+1"); + EXPECT_EQ(format("%.3a", slightly_over), "0x1.000p+1"); + EXPECT_EQ(format("%.4a", slightly_over), "0x1.0002p+1"); + EXPECT_EQ(format("%.5a", slightly_over), "0x1.00018p+1"); + EXPECT_EQ(format("%.6a", slightly_over), "0x1.000180p+1"); + + // 0x1.00017fffp+0 + const double slightly_under = 1.000022887950763106346130371093750; + EXPECT_EQ(format("%.0a", slightly_under), "0x1p+0"); + EXPECT_EQ(format("%.1a", slightly_under), "0x1.0p+0"); + EXPECT_EQ(format("%.2a", slightly_under), "0x1.00p+0"); + EXPECT_EQ(format("%.3a", slightly_under), "0x1.000p+0"); + EXPECT_EQ(format("%.4a", slightly_under), "0x1.0001p+0"); + EXPECT_EQ(format("%.5a", slightly_under), "0x1.00018p+0"); + EXPECT_EQ(format("%.6a", slightly_under), "0x1.000180p+0"); + EXPECT_EQ(format("%.7a", slightly_under), "0x1.0001800p+0"); + + // 0x1.1b3829ac28058p+3 + const double hex_value = 8.85060580848964661981881363317370414733886718750; + EXPECT_EQ(format("%.0a", hex_value), "0x1p+3"); + EXPECT_EQ(format("%.1a", hex_value), "0x1.2p+3"); + EXPECT_EQ(format("%.2a", hex_value), "0x1.1bp+3"); + EXPECT_EQ(format("%.3a", hex_value), "0x1.1b4p+3"); + EXPECT_EQ(format("%.4a", hex_value), "0x1.1b38p+3"); + EXPECT_EQ(format("%.5a", hex_value), "0x1.1b383p+3"); + EXPECT_EQ(format("%.6a", hex_value), "0x1.1b382ap+3"); + EXPECT_EQ(format("%.7a", hex_value), "0x1.1b3829bp+3"); + EXPECT_EQ(format("%.8a", hex_value), "0x1.1b3829acp+3"); + EXPECT_EQ(format("%.9a", hex_value), "0x1.1b3829ac3p+3"); + EXPECT_EQ(format("%.10a", hex_value), "0x1.1b3829ac28p+3"); + EXPECT_EQ(format("%.11a", hex_value), "0x1.1b3829ac280p+3"); + EXPECT_EQ(format("%.12a", hex_value), "0x1.1b3829ac2806p+3"); + EXPECT_EQ(format("%.13a", hex_value), "0x1.1b3829ac28058p+3"); + EXPECT_EQ(format("%.14a", hex_value), "0x1.1b3829ac280580p+3"); + EXPECT_EQ(format("%.15a", hex_value), "0x1.1b3829ac2805800p+3"); + EXPECT_EQ(format("%.16a", hex_value), "0x1.1b3829ac28058000p+3"); + EXPECT_EQ(format("%.17a", hex_value), "0x1.1b3829ac280580000p+3"); + EXPECT_EQ(format("%.18a", hex_value), "0x1.1b3829ac2805800000p+3"); + EXPECT_EQ(format("%.19a", hex_value), "0x1.1b3829ac28058000000p+3"); + EXPECT_EQ(format("%.20a", hex_value), "0x1.1b3829ac280580000000p+3"); + EXPECT_EQ(format("%.21a", hex_value), "0x1.1b3829ac2805800000000p+3"); + + // 0x1.0818283848586p+3 + const double hex_value2 = 8.2529488658208371987257123691961169242858886718750; + EXPECT_EQ(format("%.0a", hex_value2), "0x1p+3"); + EXPECT_EQ(format("%.1a", hex_value2), "0x1.1p+3"); + EXPECT_EQ(format("%.2a", hex_value2), "0x1.08p+3"); + EXPECT_EQ(format("%.3a", hex_value2), "0x1.082p+3"); + EXPECT_EQ(format("%.4a", hex_value2), "0x1.0818p+3"); + EXPECT_EQ(format("%.5a", hex_value2), "0x1.08183p+3"); + EXPECT_EQ(format("%.6a", hex_value2), "0x1.081828p+3"); + EXPECT_EQ(format("%.7a", hex_value2), "0x1.0818284p+3"); + EXPECT_EQ(format("%.8a", hex_value2), "0x1.08182838p+3"); + EXPECT_EQ(format("%.9a", hex_value2), "0x1.081828385p+3"); + EXPECT_EQ(format("%.10a", hex_value2), "0x1.0818283848p+3"); + EXPECT_EQ(format("%.11a", hex_value2), "0x1.08182838486p+3"); + EXPECT_EQ(format("%.12a", hex_value2), "0x1.081828384858p+3"); + EXPECT_EQ(format("%.13a", hex_value2), "0x1.0818283848586p+3"); + EXPECT_EQ(format("%.14a", hex_value2), "0x1.08182838485860p+3"); + EXPECT_EQ(format("%.15a", hex_value2), "0x1.081828384858600p+3"); + EXPECT_EQ(format("%.16a", hex_value2), "0x1.0818283848586000p+3"); + EXPECT_EQ(format("%.17a", hex_value2), "0x1.08182838485860000p+3"); + EXPECT_EQ(format("%.18a", hex_value2), "0x1.081828384858600000p+3"); + EXPECT_EQ(format("%.19a", hex_value2), "0x1.0818283848586000000p+3"); + EXPECT_EQ(format("%.20a", hex_value2), "0x1.08182838485860000000p+3"); + EXPECT_EQ(format("%.21a", hex_value2), "0x1.081828384858600000000p+3"); +} + +TEST_F(FormatConvertTest, LongDoubleRoundA) { + if (std::numeric_limits<long double>::digits % 4 != 0) { + // This test doesn't really make sense to run on platforms where a long + // double has a different mantissa size (mod 4) than Prod, since then the + // leading digit will be formatted differently. + return; + } + const NativePrintfTraits &native_traits = VerifyNativeImplementation(); + std::string s; + const auto format = [&](const char *fmt, long double d) -> std::string & { + s.clear(); + FormatArgImpl args[1] = {FormatArgImpl(d)}; + AppendPack(&s, UntypedFormatSpecImpl(fmt), absl::MakeSpan(args)); + if (native_traits.hex_float_has_glibc_rounding && + native_traits.hex_float_optimizes_leading_digit_bit_count) { + EXPECT_EQ(StrPrint(fmt, d), s); + } + return s; + }; + + // 0x8.8p+4 + const long double on_boundary_even = 136.0; + EXPECT_EQ(format("%.0La", on_boundary_even), "0x8p+4"); + EXPECT_EQ(format("%.1La", on_boundary_even), "0x8.8p+4"); + EXPECT_EQ(format("%.2La", on_boundary_even), "0x8.80p+4"); + EXPECT_EQ(format("%.3La", on_boundary_even), "0x8.800p+4"); + EXPECT_EQ(format("%.4La", on_boundary_even), "0x8.8000p+4"); + EXPECT_EQ(format("%.5La", on_boundary_even), "0x8.80000p+4"); + EXPECT_EQ(format("%.6La", on_boundary_even), "0x8.800000p+4"); + + // 0x9.8p+4 + const long double on_boundary_odd = 152.0; + EXPECT_EQ(format("%.0La", on_boundary_odd), "0xap+4"); + EXPECT_EQ(format("%.1La", on_boundary_odd), "0x9.8p+4"); + EXPECT_EQ(format("%.2La", on_boundary_odd), "0x9.80p+4"); + EXPECT_EQ(format("%.3La", on_boundary_odd), "0x9.800p+4"); + EXPECT_EQ(format("%.4La", on_boundary_odd), "0x9.8000p+4"); + EXPECT_EQ(format("%.5La", on_boundary_odd), "0x9.80000p+4"); + EXPECT_EQ(format("%.6La", on_boundary_odd), "0x9.800000p+4"); + + // 0x8.80001p+24 + const long double slightly_over = 142606352.0; + EXPECT_EQ(format("%.0La", slightly_over), "0x9p+24"); + EXPECT_EQ(format("%.1La", slightly_over), "0x8.8p+24"); + EXPECT_EQ(format("%.2La", slightly_over), "0x8.80p+24"); + EXPECT_EQ(format("%.3La", slightly_over), "0x8.800p+24"); + EXPECT_EQ(format("%.4La", slightly_over), "0x8.8000p+24"); + EXPECT_EQ(format("%.5La", slightly_over), "0x8.80001p+24"); + EXPECT_EQ(format("%.6La", slightly_over), "0x8.800010p+24"); + + // 0x8.7ffffp+24 + const long double slightly_under = 142606320.0; + EXPECT_EQ(format("%.0La", slightly_under), "0x8p+24"); + EXPECT_EQ(format("%.1La", slightly_under), "0x8.8p+24"); + EXPECT_EQ(format("%.2La", slightly_under), "0x8.80p+24"); + EXPECT_EQ(format("%.3La", slightly_under), "0x8.800p+24"); + EXPECT_EQ(format("%.4La", slightly_under), "0x8.8000p+24"); + EXPECT_EQ(format("%.5La", slightly_under), "0x8.7ffffp+24"); + EXPECT_EQ(format("%.6La", slightly_under), "0x8.7ffff0p+24"); + EXPECT_EQ(format("%.7La", slightly_under), "0x8.7ffff00p+24"); + + // 0xc.0828384858688000p+128 + const long double eights = 4094231060438608800781871108094404067328.0; + EXPECT_EQ(format("%.0La", eights), "0xcp+128"); + EXPECT_EQ(format("%.1La", eights), "0xc.1p+128"); + EXPECT_EQ(format("%.2La", eights), "0xc.08p+128"); + EXPECT_EQ(format("%.3La", eights), "0xc.083p+128"); + EXPECT_EQ(format("%.4La", eights), "0xc.0828p+128"); + EXPECT_EQ(format("%.5La", eights), "0xc.08284p+128"); + EXPECT_EQ(format("%.6La", eights), "0xc.082838p+128"); + EXPECT_EQ(format("%.7La", eights), "0xc.0828385p+128"); + EXPECT_EQ(format("%.8La", eights), "0xc.08283848p+128"); + EXPECT_EQ(format("%.9La", eights), "0xc.082838486p+128"); + EXPECT_EQ(format("%.10La", eights), "0xc.0828384858p+128"); + EXPECT_EQ(format("%.11La", eights), "0xc.08283848587p+128"); + EXPECT_EQ(format("%.12La", eights), "0xc.082838485868p+128"); + EXPECT_EQ(format("%.13La", eights), "0xc.0828384858688p+128"); + EXPECT_EQ(format("%.14La", eights), "0xc.08283848586880p+128"); + EXPECT_EQ(format("%.15La", eights), "0xc.082838485868800p+128"); + EXPECT_EQ(format("%.16La", eights), "0xc.0828384858688000p+128"); +} + +// We don't actually store the results. This is just to exercise the rest of the +// machinery. +struct NullSink { + friend void AbslFormatFlush(NullSink *sink, string_view str) {} +}; + +template <typename... T> +bool FormatWithNullSink(absl::string_view fmt, const T &... a) { + NullSink sink; + FormatArgImpl args[] = {FormatArgImpl(a)...}; + return FormatUntyped(&sink, UntypedFormatSpecImpl(fmt), absl::MakeSpan(args)); +} + +TEST_F(FormatConvertTest, ExtremeWidthPrecision) { + for (const char *fmt : {"f"}) { + for (double d : {1e-100, 1.0, 1e100}) { + constexpr int max = std::numeric_limits<int>::max(); + EXPECT_TRUE(FormatWithNullSink(std::string("%.*") + fmt, max, d)); + EXPECT_TRUE(FormatWithNullSink(std::string("%1.*") + fmt, max, d)); + EXPECT_TRUE(FormatWithNullSink(std::string("%*") + fmt, max, d)); + EXPECT_TRUE(FormatWithNullSink(std::string("%*.*") + fmt, max, max, d)); } } } TEST_F(FormatConvertTest, LongDouble) { - const char *const kFormats[] = {"%", "%.3", "%8.5", "%9", + const NativePrintfTraits &native_traits = VerifyNativeImplementation(); + const char *const kFormats[] = {"%", "%.3", "%8.5", "%9", "%.5000", "%.60", "%+", "% ", "%-10"}; - // This value is not representable in double, but it is in long double that - // uses the extended format. - // This is to verify that we are not truncating the value mistakenly through a - // double. - long double very_precise = 10000000000000000.25L; - std::vector<long double> doubles = { 0.0, -0.0, - very_precise, - 1 / very_precise, std::numeric_limits<long double>::max(), -std::numeric_limits<long double>::max(), std::numeric_limits<long double>::min(), @@ -556,28 +1081,73 @@ TEST_F(FormatConvertTest, LongDouble) { std::numeric_limits<long double>::infinity(), -std::numeric_limits<long double>::infinity()}; + for (long double base : {1.L, 12.L, 123.L, 1234.L, 12345.L, 123456.L, + 1234567.L, 12345678.L, 123456789.L, 1234567890.L, + 12345678901.L, 123456789012.L, 1234567890123.L, + // This value is not representable in double, but it + // is in long double that uses the extended format. + // This is to verify that we are not truncating the + // value mistakenly through a double. + 10000000000000000.25L}) { + for (int exp : {-1000, -500, 0, 500, 1000}) { + for (int sign : {1, -1}) { + doubles.push_back(sign * std::ldexp(base, exp)); + doubles.push_back(sign / std::ldexp(base, exp)); + } + } + } + + // Regression tests + // + // Using a string literal because not all platforms support hex literals or it + // might be out of range. + doubles.push_back(std::strtold("-0xf.ffffffb5feafffbp-16324L", nullptr)); + for (const char *fmt : kFormats) { for (char f : {'f', 'F', // 'g', 'G', // 'a', 'A', // 'e', 'E'}) { std::string fmt_str = std::string(fmt) + 'L' + f; + + if (fmt == absl::string_view("%.5000") && f != 'f' && f != 'F' && + f != 'a' && f != 'A') { + // This particular test takes way too long with snprintf. + // Disable for the case we are not implementing natively. + continue; + } + + if (f == 'a' || f == 'A') { + if (!native_traits.hex_float_has_glibc_rounding || + !native_traits.hex_float_optimizes_leading_digit_bit_count) { + continue; + } + } + for (auto d : doubles) { FormatArgImpl arg(d); UntypedFormatSpecImpl format(fmt_str); + std::string result = FormatPack(format, {&arg, 1}); + +#ifdef _MSC_VER + // MSVC has a different rounding policy than us so we can't test our + // implementation against the native one there. + continue; +#endif // _MSC_VER + // We use ASSERT_EQ here because failures are usually correlated and a // bug would print way too many failed expectations causing the test to // time out. - ASSERT_EQ(StrPrint(fmt_str.c_str(), d), - FormatPack(format, {&arg, 1})) + ASSERT_EQ(StrPrint(fmt_str.c_str(), d), result) << fmt_str << " " << StrPrint("%.18Lg", d) << " " - << StrPrint("%.999Lf", d); + << StrPrint("%La", d) << " " << StrPrint("%.1080Lf", d); } } } } -TEST_F(FormatConvertTest, IntAsFloat) { +TEST_F(FormatConvertTest, IntAsDouble) { + const NativePrintfTraits &native_traits = VerifyNativeImplementation(); const int kMin = std::numeric_limits<int>::min(); const int kMax = std::numeric_limits<int>::max(); const int ia[] = { @@ -593,14 +1163,17 @@ TEST_F(FormatConvertTest, IntAsFloat) { const char *fmt; }; const double dx = static_cast<double>(fx); - const Expectation kExpect[] = { - { __LINE__, StrPrint("%f", dx), "%f" }, - { __LINE__, StrPrint("%12f", dx), "%12f" }, - { __LINE__, StrPrint("%.12f", dx), "%.12f" }, - { __LINE__, StrPrint("%12a", dx), "%12a" }, - { __LINE__, StrPrint("%.12a", dx), "%.12a" }, + std::vector<Expectation> expect = { + {__LINE__, StrPrint("%f", dx), "%f"}, + {__LINE__, StrPrint("%12f", dx), "%12f"}, + {__LINE__, StrPrint("%.12f", dx), "%.12f"}, + {__LINE__, StrPrint("%.12a", dx), "%.12a"}, }; - for (const Expectation &e : kExpect) { + if (native_traits.hex_float_uses_minimal_precision_when_not_specified) { + Expectation ex = {__LINE__, StrPrint("%12a", dx), "%12a"}; + expect.push_back(ex); + } + for (const Expectation &e : expect) { SCOPED_TRACE(e.line); SCOPED_TRACE(e.fmt); UntypedFormatSpecImpl format(e.fmt); @@ -645,6 +1218,25 @@ TEST_F(FormatConvertTest, ExpectedFailures) { EXPECT_TRUE(FormatFails("%*d", "")); } +// Sanity check to make sure that we are testing what we think we're testing on +// e.g. the x86_64+glibc platform. +TEST_F(FormatConvertTest, GlibcHasCorrectTraits) { +#if !defined(__GLIBC__) || !defined(__x86_64__) + return; +#endif + const NativePrintfTraits &native_traits = VerifyNativeImplementation(); + // If one of the following tests break then it is either because the above PP + // macro guards failed to exclude a new platform (likely) or because something + // has changed in the implemention of glibc sprintf float formatting behavior. + // If the latter, then the code that computes these flags needs to be + // revisited and/or possibly the StrFormat implementation. + EXPECT_TRUE(native_traits.hex_float_has_glibc_rounding); + EXPECT_TRUE(native_traits.hex_float_prefers_denormal_repr); + EXPECT_TRUE( + native_traits.hex_float_uses_minimal_precision_when_not_specified); + EXPECT_TRUE(native_traits.hex_float_optimizes_leading_digit_bit_count); +} + } // namespace } // namespace str_format_internal ABSL_NAMESPACE_END diff --git a/third_party/abseil-cpp/absl/strings/internal/str_format/extension.cc b/third_party/abseil-cpp/absl/strings/internal/str_format/extension.cc index 2e5bc2ce0b..484f6ebfc1 100644 --- a/third_party/abseil-cpp/absl/strings/internal/str_format/extension.cc +++ b/third_party/abseil-cpp/absl/strings/internal/str_format/extension.cc @@ -23,26 +23,50 @@ namespace absl { ABSL_NAMESPACE_BEGIN namespace str_format_internal { -std::string Flags::ToString() const { +std::string FlagsToString(Flags v) { std::string s; - s.append(left ? "-" : ""); - s.append(show_pos ? "+" : ""); - s.append(sign_col ? " " : ""); - s.append(alt ? "#" : ""); - s.append(zero ? "0" : ""); + s.append(FlagsContains(v, Flags::kLeft) ? "-" : ""); + s.append(FlagsContains(v, Flags::kShowPos) ? "+" : ""); + s.append(FlagsContains(v, Flags::kSignCol) ? " " : ""); + s.append(FlagsContains(v, Flags::kAlt) ? "#" : ""); + s.append(FlagsContains(v, Flags::kZero) ? "0" : ""); return s; } -bool FormatSinkImpl::PutPaddedString(string_view v, int w, int p, bool l) { +#define ABSL_INTERNAL_X_VAL(id) \ + constexpr absl::FormatConversionChar FormatConversionCharInternal::id; +ABSL_INTERNAL_CONVERSION_CHARS_EXPAND_(ABSL_INTERNAL_X_VAL, ) +#undef ABSL_INTERNAL_X_VAL +// NOLINTNEXTLINE(readability-redundant-declaration) +constexpr absl::FormatConversionChar FormatConversionCharInternal::kNone; + +#define ABSL_INTERNAL_CHAR_SET_CASE(c) \ + constexpr FormatConversionCharSet FormatConversionCharSetInternal::c; +ABSL_INTERNAL_CONVERSION_CHARS_EXPAND_(ABSL_INTERNAL_CHAR_SET_CASE, ) +#undef ABSL_INTERNAL_CHAR_SET_CASE + +// NOLINTNEXTLINE(readability-redundant-declaration) +constexpr FormatConversionCharSet FormatConversionCharSetInternal::kStar; +// NOLINTNEXTLINE(readability-redundant-declaration) +constexpr FormatConversionCharSet FormatConversionCharSetInternal::kIntegral; +// NOLINTNEXTLINE(readability-redundant-declaration) +constexpr FormatConversionCharSet FormatConversionCharSetInternal::kFloating; +// NOLINTNEXTLINE(readability-redundant-declaration) +constexpr FormatConversionCharSet FormatConversionCharSetInternal::kNumeric; +// NOLINTNEXTLINE(readability-redundant-declaration) +constexpr FormatConversionCharSet FormatConversionCharSetInternal::kPointer; + +bool FormatSinkImpl::PutPaddedString(string_view value, int width, + int precision, bool left) { size_t space_remaining = 0; - if (w >= 0) space_remaining = w; - size_t n = v.size(); - if (p >= 0) n = std::min(n, static_cast<size_t>(p)); - string_view shown(v.data(), n); + if (width >= 0) space_remaining = width; + size_t n = value.size(); + if (precision >= 0) n = std::min(n, static_cast<size_t>(precision)); + string_view shown(value.data(), n); space_remaining = Excess(shown.size(), space_remaining); - if (!l) Append(space_remaining, ' '); + if (!left) Append(space_remaining, ' '); Append(shown); - if (l) Append(space_remaining, ' '); + if (left) Append(space_remaining, ' '); return true; } diff --git a/third_party/abseil-cpp/absl/strings/internal/str_format/extension.h b/third_party/abseil-cpp/absl/strings/internal/str_format/extension.h index d1665753d1..55cbb56d0a 100644 --- a/third_party/abseil-cpp/absl/strings/internal/str_format/extension.h +++ b/third_party/abseil-cpp/absl/strings/internal/str_format/extension.h @@ -24,11 +24,16 @@ #include "absl/base/config.h" #include "absl/base/port.h" +#include "absl/meta/type_traits.h" #include "absl/strings/internal/str_format/output.h" #include "absl/strings/string_view.h" namespace absl { ABSL_NAMESPACE_BEGIN + +enum class FormatConversionChar : uint8_t; +enum class FormatConversionCharSet : uint64_t; + namespace str_format_internal { class FormatRawSinkImpl { @@ -102,7 +107,7 @@ class FormatSinkImpl { size_t size() const { return size_; } // Put 'v' to 'sink' with specified width, precision, and left flag. - bool PutPaddedString(string_view v, int w, int p, bool l); + bool PutPaddedString(string_view v, int width, int precision, bool left); template <typename T> T Wrap() { @@ -123,23 +128,37 @@ class FormatSinkImpl { char buf_[1024]; }; -struct Flags { - bool basic : 1; // fastest conversion: no flags, width, or precision - bool left : 1; // "-" - bool show_pos : 1; // "+" - bool sign_col : 1; // " " - bool alt : 1; // "#" - bool zero : 1; // "0" - std::string ToString() const; - friend std::ostream& operator<<(std::ostream& os, const Flags& v) { - return os << v.ToString(); - } +enum class Flags : uint8_t { + kBasic = 0, + kLeft = 1 << 0, + kShowPos = 1 << 1, + kSignCol = 1 << 2, + kAlt = 1 << 3, + kZero = 1 << 4, + // This is not a real flag. It just exists to turn off kBasic when no other + // flags are set. This is for when width/precision are specified. + kNonBasic = 1 << 5, }; +constexpr Flags operator|(Flags a, Flags b) { + return static_cast<Flags>(static_cast<uint8_t>(a) | static_cast<uint8_t>(b)); +} + +constexpr bool FlagsContains(Flags haystack, Flags needle) { + return (static_cast<uint8_t>(haystack) & static_cast<uint8_t>(needle)) == + static_cast<uint8_t>(needle); +} + +std::string FlagsToString(Flags v); + +inline std::ostream& operator<<(std::ostream& os, Flags v) { + return os << FlagsToString(v); +} + // clang-format off #define ABSL_INTERNAL_CONVERSION_CHARS_EXPAND_(X_VAL, X_SEP) \ /* text */ \ - X_VAL(c) X_SEP X_VAL(C) X_SEP X_VAL(s) X_SEP X_VAL(S) X_SEP \ + X_VAL(c) X_SEP X_VAL(s) X_SEP \ /* ints */ \ X_VAL(d) X_SEP X_VAL(i) X_SEP X_VAL(o) X_SEP \ X_VAL(u) X_SEP X_VAL(x) X_SEP X_VAL(X) X_SEP \ @@ -148,14 +167,39 @@ struct Flags { X_VAL(g) X_SEP X_VAL(G) X_SEP X_VAL(a) X_SEP X_VAL(A) X_SEP \ /* misc */ \ X_VAL(n) X_SEP X_VAL(p) +// clang-format on + +// This type should not be referenced, it exists only to provide labels +// internally that match the values declared in FormatConversionChar in +// str_format.h. This is meant to allow internal libraries to use the same +// declared interface type as the public interface +// (absl::StrFormatConversionChar) while keeping the definition in a public +// header. +// Internal libraries should use the form +// `FormatConversionCharInternal::c`, `FormatConversionCharInternal::kNone` for +// comparisons. Use in switch statements is not recommended due to a bug in how +// gcc 4.9 -Wswitch handles declared but undefined enums. +struct FormatConversionCharInternal { + FormatConversionCharInternal() = delete; -enum class FormatConversionChar : uint8_t { - c, C, s, S, // text + private: + // clang-format off + enum class Enum : uint8_t { + c, s, // text d, i, o, u, x, X, // int f, F, e, E, g, G, a, A, // float n, p, // misc - kNone, - none = kNone + kNone + }; + // clang-format on + public: +#define ABSL_INTERNAL_X_VAL(id) \ + static constexpr FormatConversionChar id = \ + static_cast<FormatConversionChar>(Enum::id); + ABSL_INTERNAL_CONVERSION_CHARS_EXPAND_(ABSL_INTERNAL_X_VAL, ) +#undef ABSL_INTERNAL_X_VAL + static constexpr FormatConversionChar kNone = + static_cast<FormatConversionChar>(Enum::kNone); }; // clang-format on @@ -163,95 +207,56 @@ inline FormatConversionChar FormatConversionCharFromChar(char c) { switch (c) { #define ABSL_INTERNAL_X_VAL(id) \ case #id[0]: \ - return FormatConversionChar::id; + return FormatConversionCharInternal::id; ABSL_INTERNAL_CONVERSION_CHARS_EXPAND_(ABSL_INTERNAL_X_VAL, ) #undef ABSL_INTERNAL_X_VAL } - return FormatConversionChar::kNone; -} - -inline int FormatConversionCharRadix(FormatConversionChar c) { - switch (c) { - case FormatConversionChar::x: - case FormatConversionChar::X: - case FormatConversionChar::a: - case FormatConversionChar::A: - case FormatConversionChar::p: - return 16; - case FormatConversionChar::o: - return 8; - default: - return 10; - } + return FormatConversionCharInternal::kNone; } inline bool FormatConversionCharIsUpper(FormatConversionChar c) { - switch (c) { - case FormatConversionChar::X: - case FormatConversionChar::F: - case FormatConversionChar::E: - case FormatConversionChar::G: - case FormatConversionChar::A: - return true; - default: - return false; - } -} - -inline bool FormatConversionCharIsSigned(FormatConversionChar c) { - switch (c) { - case FormatConversionChar::d: - case FormatConversionChar::i: - return true; - default: - return false; - } -} - -inline bool FormatConversionCharIsIntegral(FormatConversionChar c) { - switch (c) { - case FormatConversionChar::d: - case FormatConversionChar::i: - case FormatConversionChar::u: - case FormatConversionChar::o: - case FormatConversionChar::x: - case FormatConversionChar::X: - return true; - default: - return false; + if (c == FormatConversionCharInternal::X || + c == FormatConversionCharInternal::F || + c == FormatConversionCharInternal::E || + c == FormatConversionCharInternal::G || + c == FormatConversionCharInternal::A) { + return true; + } else { + return false; } } inline bool FormatConversionCharIsFloat(FormatConversionChar c) { - switch (c) { - case FormatConversionChar::a: - case FormatConversionChar::e: - case FormatConversionChar::f: - case FormatConversionChar::g: - case FormatConversionChar::A: - case FormatConversionChar::E: - case FormatConversionChar::F: - case FormatConversionChar::G: - return true; - default: - return false; + if (c == FormatConversionCharInternal::a || + c == FormatConversionCharInternal::e || + c == FormatConversionCharInternal::f || + c == FormatConversionCharInternal::g || + c == FormatConversionCharInternal::A || + c == FormatConversionCharInternal::E || + c == FormatConversionCharInternal::F || + c == FormatConversionCharInternal::G) { + return true; + } else { + return false; } } inline char FormatConversionCharToChar(FormatConversionChar c) { - switch (c) { -#define ABSL_INTERNAL_X_VAL(e) \ - case FormatConversionChar::e: \ + if (c == FormatConversionCharInternal::kNone) { + return '\0'; + +#define ABSL_INTERNAL_X_VAL(e) \ + } else if (c == FormatConversionCharInternal::e) { \ return #e[0]; #define ABSL_INTERNAL_X_SEP - ABSL_INTERNAL_CONVERSION_CHARS_EXPAND_(ABSL_INTERNAL_X_VAL, - ABSL_INTERNAL_X_SEP) - case FormatConversionChar::kNone: - return '\0'; + ABSL_INTERNAL_CONVERSION_CHARS_EXPAND_(ABSL_INTERNAL_X_VAL, + ABSL_INTERNAL_X_SEP) + } else { + return '\0'; + } + #undef ABSL_INTERNAL_X_VAL #undef ABSL_INTERNAL_X_SEP - } - return '\0'; } // The associated char. @@ -263,20 +268,24 @@ inline std::ostream& operator<<(std::ostream& os, FormatConversionChar v) { struct FormatConversionSpecImplFriend; -class FormatConversionSpec { +class FormatConversionSpecImpl { public: // Width and precison are not specified, no flags are set. - bool is_basic() const { return flags_.basic; } - bool has_left_flag() const { return flags_.left; } - bool has_show_pos_flag() const { return flags_.show_pos; } - bool has_sign_col_flag() const { return flags_.sign_col; } - bool has_alt_flag() const { return flags_.alt; } - bool has_zero_flag() const { return flags_.zero; } + bool is_basic() const { return flags_ == Flags::kBasic; } + bool has_left_flag() const { return FlagsContains(flags_, Flags::kLeft); } + bool has_show_pos_flag() const { + return FlagsContains(flags_, Flags::kShowPos); + } + bool has_sign_col_flag() const { + return FlagsContains(flags_, Flags::kSignCol); + } + bool has_alt_flag() const { return FlagsContains(flags_, Flags::kAlt); } + bool has_zero_flag() const { return FlagsContains(flags_, Flags::kZero); } FormatConversionChar conversion_char() const { // Keep this field first in the struct . It generates better code when // accessing it when ConversionSpec is passed by value in registers. - static_assert(offsetof(FormatConversionSpec, conv_) == 0, ""); + static_assert(offsetof(FormatConversionSpecImpl, conv_) == 0, ""); return conv_; } @@ -287,41 +296,65 @@ class FormatConversionSpec { // negative value. int precision() const { return precision_; } - // Deprecated (use has_x_flag() instead). - Flags flags() const { return flags_; } - // Deprecated - FormatConversionChar conv() const { return conversion_char(); } + template <typename T> + T Wrap() { + return T(*this); + } private: friend struct str_format_internal::FormatConversionSpecImplFriend; - FormatConversionChar conv_ = FormatConversionChar::kNone; + FormatConversionChar conv_ = FormatConversionCharInternal::kNone; Flags flags_; int width_; int precision_; }; struct FormatConversionSpecImplFriend final { - static void SetFlags(Flags f, FormatConversionSpec* conv) { + static void SetFlags(Flags f, FormatConversionSpecImpl* conv) { conv->flags_ = f; } static void SetConversionChar(FormatConversionChar c, - FormatConversionSpec* conv) { + FormatConversionSpecImpl* conv) { conv->conv_ = c; } - static void SetWidth(int w, FormatConversionSpec* conv) { conv->width_ = w; } - static void SetPrecision(int p, FormatConversionSpec* conv) { + static void SetWidth(int w, FormatConversionSpecImpl* conv) { + conv->width_ = w; + } + static void SetPrecision(int p, FormatConversionSpecImpl* conv) { conv->precision_ = p; } - static std::string FlagsToString(const FormatConversionSpec& spec) { - return spec.flags_.ToString(); + static std::string FlagsToString(const FormatConversionSpecImpl& spec) { + return str_format_internal::FlagsToString(spec.flags_); } }; -constexpr uint64_t FormatConversionCharToConvValue(char conv) { +// Type safe OR operator. +// We need this for two reasons: +// 1. operator| on enums makes them decay to integers and the result is an +// integer. We need the result to stay as an enum. +// 2. We use "enum class" which would not work even if we accepted the decay. +constexpr FormatConversionCharSet FormatConversionCharSetUnion( + FormatConversionCharSet a) { + return a; +} + +template <typename... CharSet> +constexpr FormatConversionCharSet FormatConversionCharSetUnion( + FormatConversionCharSet a, CharSet... rest) { + return static_cast<FormatConversionCharSet>( + static_cast<uint64_t>(a) | + static_cast<uint64_t>(FormatConversionCharSetUnion(rest...))); +} + +constexpr uint64_t FormatConversionCharToConvInt(FormatConversionChar c) { + return uint64_t{1} << (1 + static_cast<uint8_t>(c)); +} + +constexpr uint64_t FormatConversionCharToConvInt(char conv) { return -#define ABSL_INTERNAL_CHAR_SET_CASE(c) \ - conv == #c[0] \ - ? (uint64_t{1} << (1 + static_cast<uint8_t>(FormatConversionChar::c))) \ +#define ABSL_INTERNAL_CHAR_SET_CASE(c) \ + conv == #c[0] \ + ? FormatConversionCharToConvInt(FormatConversionCharInternal::c) \ : ABSL_INTERNAL_CONVERSION_CHARS_EXPAND_(ABSL_INTERNAL_CHAR_SET_CASE, ) #undef ABSL_INTERNAL_CHAR_SET_CASE @@ -330,28 +363,29 @@ constexpr uint64_t FormatConversionCharToConvValue(char conv) { : 0; } -enum class FormatConversionCharSet : uint64_t { -#define ABSL_INTERNAL_CHAR_SET_CASE(c) \ - c = FormatConversionCharToConvValue(#c[0]), +constexpr FormatConversionCharSet FormatConversionCharToConvValue(char conv) { + return static_cast<FormatConversionCharSet>( + FormatConversionCharToConvInt(conv)); +} + +struct FormatConversionCharSetInternal { +#define ABSL_INTERNAL_CHAR_SET_CASE(c) \ + static constexpr FormatConversionCharSet c = \ + FormatConversionCharToConvValue(#c[0]); ABSL_INTERNAL_CONVERSION_CHARS_EXPAND_(ABSL_INTERNAL_CHAR_SET_CASE, ) #undef ABSL_INTERNAL_CHAR_SET_CASE // Used for width/precision '*' specification. - kStar = FormatConversionCharToConvValue('*'), - // Some predefined values: - kIntegral = d | i | u | o | x | X, - kFloating = a | e | f | g | A | E | F | G, - kNumeric = kIntegral | kFloating, - kString = s, - kPointer = p, - - // The following are deprecated - star = kStar, - integral = kIntegral, - floating = kFloating, - numeric = kNumeric, - string = kString, - pointer = kPointer + static constexpr FormatConversionCharSet kStar = + FormatConversionCharToConvValue('*'); + + static constexpr FormatConversionCharSet kIntegral = + FormatConversionCharSetUnion(d, i, u, o, x, X); + static constexpr FormatConversionCharSet kFloating = + FormatConversionCharSetUnion(a, e, f, g, A, E, F, G); + static constexpr FormatConversionCharSet kNumeric = + FormatConversionCharSetUnion(kIntegral, kFloating); + static constexpr FormatConversionCharSet kPointer = p; }; // Type safe OR operator. @@ -361,18 +395,29 @@ enum class FormatConversionCharSet : uint64_t { // 2. We use "enum class" which would not work even if we accepted the decay. constexpr FormatConversionCharSet operator|(FormatConversionCharSet a, FormatConversionCharSet b) { - return FormatConversionCharSet(static_cast<uint64_t>(a) | - static_cast<uint64_t>(b)); + return FormatConversionCharSetUnion(a, b); } +// Overloaded conversion functions to support absl::ParsedFormat. // Get a conversion with a single character in it. -constexpr FormatConversionCharSet ConversionCharToConv(char c) { - return FormatConversionCharSet(FormatConversionCharToConvValue(c)); +constexpr FormatConversionCharSet ToFormatConversionCharSet(char c) { + return static_cast<FormatConversionCharSet>( + FormatConversionCharToConvValue(c)); } +// Get a conversion with a single character in it. +constexpr FormatConversionCharSet ToFormatConversionCharSet( + FormatConversionCharSet c) { + return c; +} + +template <typename T> +void ToFormatConversionCharSet(T) = delete; + // Checks whether `c` exists in `set`. constexpr bool Contains(FormatConversionCharSet set, char c) { - return (static_cast<uint64_t>(set) & FormatConversionCharToConvValue(c)) != 0; + return (static_cast<uint64_t>(set) & + static_cast<uint64_t>(FormatConversionCharToConvValue(c))) != 0; } // Checks whether all the characters in `c` are contained in `set` @@ -382,31 +427,16 @@ constexpr bool Contains(FormatConversionCharSet set, static_cast<uint64_t>(c); } -// Return type of the AbslFormatConvert() functions. -// The FormatConversionCharSet template parameter is used to inform the -// framework of what conversion characters are supported by that -// AbslFormatConvert routine. -template <FormatConversionCharSet C> -struct FormatConvertResult { - static constexpr FormatConversionCharSet kConv = C; - bool value; -}; - -template <FormatConversionCharSet C> -constexpr FormatConversionCharSet FormatConvertResult<C>::kConv; +// Checks whether all the characters in `c` are contained in `set` +constexpr bool Contains(FormatConversionCharSet set, FormatConversionChar c) { + return (static_cast<uint64_t>(set) & FormatConversionCharToConvInt(c)) != 0; +} // Return capacity - used, clipped to a minimum of 0. inline size_t Excess(size_t used, size_t capacity) { return used < capacity ? capacity - used : 0; } -// Type alias for use during migration. -using ConversionChar = FormatConversionChar; -using ConversionSpec = FormatConversionSpec; -using Conv = FormatConversionCharSet; -template <FormatConversionCharSet C> -using ConvertResult = FormatConvertResult<C>; - } // namespace str_format_internal ABSL_NAMESPACE_END diff --git a/third_party/abseil-cpp/absl/strings/internal/str_format/extension_test.cc b/third_party/abseil-cpp/absl/strings/internal/str_format/extension_test.cc index 4e23fefbd5..1c93fdb1c7 100644 --- a/third_party/abseil-cpp/absl/strings/internal/str_format/extension_test.cc +++ b/third_party/abseil-cpp/absl/strings/internal/str_format/extension_test.cc @@ -19,9 +19,26 @@ #include <random> #include <string> +#include "gtest/gtest.h" #include "absl/strings/str_format.h" +#include "absl/strings/string_view.h" -#include "gtest/gtest.h" +namespace my_namespace { +class UserDefinedType { + public: + UserDefinedType() = default; + + void Append(absl::string_view str) { value_.append(str.data(), str.size()); } + const std::string& Value() const { return value_; } + + friend void AbslFormatFlush(UserDefinedType* x, absl::string_view str) { + x->Append(str); + } + + private: + std::string value_; +}; +} // namespace my_namespace namespace { @@ -63,4 +80,19 @@ TEST(FormatExtensionTest, SinkAppendChars) { EXPECT_EQ(actual, expected); } } + +TEST(FormatExtensionTest, VerifyEnumEquality) { +#define X_VAL(id) \ + EXPECT_EQ(absl::FormatConversionChar::id, \ + absl::str_format_internal::FormatConversionCharInternal::id); + ABSL_INTERNAL_CONVERSION_CHARS_EXPAND_(X_VAL, ); +#undef X_VAL + +#define X_VAL(id) \ + EXPECT_EQ(absl::FormatConversionCharSet::id, \ + absl::str_format_internal::FormatConversionCharSetInternal::id); + ABSL_INTERNAL_CONVERSION_CHARS_EXPAND_(X_VAL, ); +#undef X_VAL +} + } // namespace diff --git a/third_party/abseil-cpp/absl/strings/internal/str_format/float_conversion.cc b/third_party/abseil-cpp/absl/strings/internal/str_format/float_conversion.cc index d4c647c3ed..b1c4068475 100644 --- a/third_party/abseil-cpp/absl/strings/internal/str_format/float_conversion.cc +++ b/third_party/abseil-cpp/absl/strings/internal/str_format/float_conversion.cc @@ -1,12 +1,38 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + #include "absl/strings/internal/str_format/float_conversion.h" #include <string.h> + #include <algorithm> #include <cassert> #include <cmath> +#include <limits> #include <string> +#include "absl/base/attributes.h" #include "absl/base/config.h" +#include "absl/base/optimization.h" +#include "absl/functional/function_ref.h" +#include "absl/meta/type_traits.h" +#include "absl/numeric/bits.h" +#include "absl/numeric/int128.h" +#include "absl/numeric/internal/representation.h" +#include "absl/strings/numbers.h" +#include "absl/types/optional.h" +#include "absl/types/span.h" namespace absl { ABSL_NAMESPACE_BEGIN @@ -14,13 +40,905 @@ namespace str_format_internal { namespace { -char *CopyStringTo(string_view v, char *out) { +using ::absl::numeric_internal::IsDoubleDouble; + +// The code below wants to avoid heap allocations. +// To do so it needs to allocate memory on the stack. +// `StackArray` will allocate memory on the stack in the form of a uint32_t +// array and call the provided callback with said memory. +// It will allocate memory in increments of 512 bytes. We could allocate the +// largest needed unconditionally, but that is more than we need in most of +// cases. This way we use less stack in the common cases. +class StackArray { + using Func = absl::FunctionRef<void(absl::Span<uint32_t>)>; + static constexpr size_t kStep = 512 / sizeof(uint32_t); + // 5 steps is 2560 bytes, which is enough to hold a long double with the + // largest/smallest exponents. + // The operations below will static_assert their particular maximum. + static constexpr size_t kNumSteps = 5; + + // We do not want this function to be inlined. + // Otherwise the caller will allocate the stack space unnecessarily for all + // the variants even though it only calls one. + template <size_t steps> + ABSL_ATTRIBUTE_NOINLINE static void RunWithCapacityImpl(Func f) { + uint32_t values[steps * kStep]{}; + f(absl::MakeSpan(values)); + } + + public: + static constexpr size_t kMaxCapacity = kStep * kNumSteps; + + static void RunWithCapacity(size_t capacity, Func f) { + assert(capacity <= kMaxCapacity); + const size_t step = (capacity + kStep - 1) / kStep; + assert(step <= kNumSteps); + switch (step) { + case 1: + return RunWithCapacityImpl<1>(f); + case 2: + return RunWithCapacityImpl<2>(f); + case 3: + return RunWithCapacityImpl<3>(f); + case 4: + return RunWithCapacityImpl<4>(f); + case 5: + return RunWithCapacityImpl<5>(f); + } + + assert(false && "Invalid capacity"); + } +}; + +// Calculates `10 * (*v) + carry` and stores the result in `*v` and returns +// the carry. +template <typename Int> +inline Int MultiplyBy10WithCarry(Int *v, Int carry) { + using BiggerInt = absl::conditional_t<sizeof(Int) == 4, uint64_t, uint128>; + BiggerInt tmp = 10 * static_cast<BiggerInt>(*v) + carry; + *v = static_cast<Int>(tmp); + return static_cast<Int>(tmp >> (sizeof(Int) * 8)); +} + +// Calculates `(2^64 * carry + *v) / 10`. +// Stores the quotient in `*v` and returns the remainder. +// Requires: `0 <= carry <= 9` +inline uint64_t DivideBy10WithCarry(uint64_t *v, uint64_t carry) { + constexpr uint64_t divisor = 10; + // 2^64 / divisor = chunk_quotient + chunk_remainder / divisor + constexpr uint64_t chunk_quotient = (uint64_t{1} << 63) / (divisor / 2); + constexpr uint64_t chunk_remainder = uint64_t{} - chunk_quotient * divisor; + + const uint64_t mod = *v % divisor; + const uint64_t next_carry = chunk_remainder * carry + mod; + *v = *v / divisor + carry * chunk_quotient + next_carry / divisor; + return next_carry % divisor; +} + +using MaxFloatType = + typename std::conditional<IsDoubleDouble(), double, long double>::type; + +// Generates the decimal representation for an integer of the form `v * 2^exp`, +// where `v` and `exp` are both positive integers. +// It generates the digits from the left (ie the most significant digit first) +// to allow for direct printing into the sink. +// +// Requires `0 <= exp` and `exp <= numeric_limits<MaxFloatType>::max_exponent`. +class BinaryToDecimal { + static constexpr int ChunksNeeded(int exp) { + // We will left shift a uint128 by `exp` bits, so we need `128+exp` total + // bits. Round up to 32. + // See constructor for details about adding `10%` to the value. + return (128 + exp + 31) / 32 * 11 / 10; + } + + public: + // Run the conversion for `v * 2^exp` and call `f(binary_to_decimal)`. + // This function will allocate enough stack space to perform the conversion. + static void RunConversion(uint128 v, int exp, + absl::FunctionRef<void(BinaryToDecimal)> f) { + assert(exp > 0); + assert(exp <= std::numeric_limits<MaxFloatType>::max_exponent); + static_assert( + static_cast<int>(StackArray::kMaxCapacity) >= + ChunksNeeded(std::numeric_limits<MaxFloatType>::max_exponent), + ""); + + StackArray::RunWithCapacity( + ChunksNeeded(exp), + [=](absl::Span<uint32_t> input) { f(BinaryToDecimal(input, v, exp)); }); + } + + int TotalDigits() const { + return static_cast<int>((decimal_end_ - decimal_start_) * kDigitsPerChunk + + CurrentDigits().size()); + } + + // See the current block of digits. + absl::string_view CurrentDigits() const { + return absl::string_view(digits_ + kDigitsPerChunk - size_, size_); + } + + // Advance the current view of digits. + // Returns `false` when no more digits are available. + bool AdvanceDigits() { + if (decimal_start_ >= decimal_end_) return false; + + uint32_t w = data_[decimal_start_++]; + for (size_ = 0; size_ < kDigitsPerChunk; w /= 10) { + digits_[kDigitsPerChunk - ++size_] = w % 10 + '0'; + } + return true; + } + + private: + BinaryToDecimal(absl::Span<uint32_t> data, uint128 v, int exp) : data_(data) { + // We need to print the digits directly into the sink object without + // buffering them all first. To do this we need two things: + // - to know the total number of digits to do padding when necessary + // - to generate the decimal digits from the left. + // + // In order to do this, we do a two pass conversion. + // On the first pass we convert the binary representation of the value into + // a decimal representation in which each uint32_t chunk holds up to 9 + // decimal digits. In the second pass we take each decimal-holding-uint32_t + // value and generate the ascii decimal digits into `digits_`. + // + // The binary and decimal representations actually share the same memory + // region. As we go converting the chunks from binary to decimal we free + // them up and reuse them for the decimal representation. One caveat is that + // the decimal representation is around 7% less efficient in space than the + // binary one. We allocate an extra 10% memory to account for this. See + // ChunksNeeded for this calculation. + int chunk_index = exp / 32; + decimal_start_ = decimal_end_ = ChunksNeeded(exp); + const int offset = exp % 32; + // Left shift v by exp bits. + data_[chunk_index] = static_cast<uint32_t>(v << offset); + for (v >>= (32 - offset); v; v >>= 32) + data_[++chunk_index] = static_cast<uint32_t>(v); + + while (chunk_index >= 0) { + // While we have more than one chunk available, go in steps of 1e9. + // `data_[chunk_index]` holds the highest non-zero binary chunk, so keep + // the variable updated. + uint32_t carry = 0; + for (int i = chunk_index; i >= 0; --i) { + uint64_t tmp = uint64_t{data_[i]} + (uint64_t{carry} << 32); + data_[i] = static_cast<uint32_t>(tmp / uint64_t{1000000000}); + carry = static_cast<uint32_t>(tmp % uint64_t{1000000000}); + } + + // If the highest chunk is now empty, remove it from view. + if (data_[chunk_index] == 0) --chunk_index; + + --decimal_start_; + assert(decimal_start_ != chunk_index); + data_[decimal_start_] = carry; + } + + // Fill the first set of digits. The first chunk might not be complete, so + // handle differently. + for (uint32_t first = data_[decimal_start_++]; first != 0; first /= 10) { + digits_[kDigitsPerChunk - ++size_] = first % 10 + '0'; + } + } + + private: + static constexpr int kDigitsPerChunk = 9; + + int decimal_start_; + int decimal_end_; + + char digits_[kDigitsPerChunk]; + int size_ = 0; + + absl::Span<uint32_t> data_; +}; + +// Converts a value of the form `x * 2^-exp` into a sequence of decimal digits. +// Requires `-exp < 0` and +// `-exp >= limits<MaxFloatType>::min_exponent - limits<MaxFloatType>::digits`. +class FractionalDigitGenerator { + public: + // Run the conversion for `v * 2^exp` and call `f(generator)`. + // This function will allocate enough stack space to perform the conversion. + static void RunConversion( + uint128 v, int exp, absl::FunctionRef<void(FractionalDigitGenerator)> f) { + using Limits = std::numeric_limits<MaxFloatType>; + assert(-exp < 0); + assert(-exp >= Limits::min_exponent - 128); + static_assert(StackArray::kMaxCapacity >= + (Limits::digits + 128 - Limits::min_exponent + 31) / 32, + ""); + StackArray::RunWithCapacity((Limits::digits + exp + 31) / 32, + [=](absl::Span<uint32_t> input) { + f(FractionalDigitGenerator(input, v, exp)); + }); + } + + // Returns true if there are any more non-zero digits left. + bool HasMoreDigits() const { return next_digit_ != 0 || chunk_index_ >= 0; } + + // Returns true if the remainder digits are greater than 5000... + bool IsGreaterThanHalf() const { + return next_digit_ > 5 || (next_digit_ == 5 && chunk_index_ >= 0); + } + // Returns true if the remainder digits are exactly 5000... + bool IsExactlyHalf() const { return next_digit_ == 5 && chunk_index_ < 0; } + + struct Digits { + int digit_before_nine; + int num_nines; + }; + + // Get the next set of digits. + // They are composed by a non-9 digit followed by a runs of zero or more 9s. + Digits GetDigits() { + Digits digits{next_digit_, 0}; + + next_digit_ = GetOneDigit(); + while (next_digit_ == 9) { + ++digits.num_nines; + next_digit_ = GetOneDigit(); + } + + return digits; + } + + private: + // Return the next digit. + int GetOneDigit() { + if (chunk_index_ < 0) return 0; + + uint32_t carry = 0; + for (int i = chunk_index_; i >= 0; --i) { + carry = MultiplyBy10WithCarry(&data_[i], carry); + } + // If the lowest chunk is now empty, remove it from view. + if (data_[chunk_index_] == 0) --chunk_index_; + return carry; + } + + FractionalDigitGenerator(absl::Span<uint32_t> data, uint128 v, int exp) + : chunk_index_(exp / 32), data_(data) { + const int offset = exp % 32; + // Right shift `v` by `exp` bits. + data_[chunk_index_] = static_cast<uint32_t>(v << (32 - offset)); + v >>= offset; + // Make sure we don't overflow the data. We already calculated that + // non-zero bits fit, so we might not have space for leading zero bits. + for (int pos = chunk_index_; v; v >>= 32) + data_[--pos] = static_cast<uint32_t>(v); + + // Fill next_digit_, as GetDigits expects it to be populated always. + next_digit_ = GetOneDigit(); + } + + int next_digit_; + int chunk_index_; + absl::Span<uint32_t> data_; +}; + +// Count the number of leading zero bits. +int LeadingZeros(uint64_t v) { return countl_zero(v); } +int LeadingZeros(uint128 v) { + auto high = static_cast<uint64_t>(v >> 64); + auto low = static_cast<uint64_t>(v); + return high != 0 ? countl_zero(high) : 64 + countl_zero(low); +} + +// Round up the text digits starting at `p`. +// The buffer must have an extra digit that is known to not need rounding. +// This is done below by having an extra '0' digit on the left. +void RoundUp(char *p) { + while (*p == '9' || *p == '.') { + if (*p == '9') *p = '0'; + --p; + } + ++*p; +} + +// Check the previous digit and round up or down to follow the round-to-even +// policy. +void RoundToEven(char *p) { + if (*p == '.') --p; + if (*p % 2 == 1) RoundUp(p); +} + +// Simple integral decimal digit printing for values that fit in 64-bits. +// Returns the pointer to the last written digit. +char *PrintIntegralDigitsFromRightFast(uint64_t v, char *p) { + do { + *--p = DivideBy10WithCarry(&v, 0) + '0'; + } while (v != 0); + return p; +} + +// Simple integral decimal digit printing for values that fit in 128-bits. +// Returns the pointer to the last written digit. +char *PrintIntegralDigitsFromRightFast(uint128 v, char *p) { + auto high = static_cast<uint64_t>(v >> 64); + auto low = static_cast<uint64_t>(v); + + while (high != 0) { + uint64_t carry = DivideBy10WithCarry(&high, 0); + carry = DivideBy10WithCarry(&low, carry); + *--p = carry + '0'; + } + return PrintIntegralDigitsFromRightFast(low, p); +} + +// Simple fractional decimal digit printing for values that fir in 64-bits after +// shifting. +// Performs rounding if necessary to fit within `precision`. +// Returns the pointer to one after the last character written. +char *PrintFractionalDigitsFast(uint64_t v, char *start, int exp, + int precision) { + char *p = start; + v <<= (64 - exp); + while (precision > 0) { + if (!v) return p; + *p++ = MultiplyBy10WithCarry(&v, uint64_t{0}) + '0'; + --precision; + } + + // We need to round. + if (v < 0x8000000000000000) { + // We round down, so nothing to do. + } else if (v > 0x8000000000000000) { + // We round up. + RoundUp(p - 1); + } else { + RoundToEven(p - 1); + } + + assert(precision == 0); + // Precision can only be zero here. + return p; +} + +// Simple fractional decimal digit printing for values that fir in 128-bits +// after shifting. +// Performs rounding if necessary to fit within `precision`. +// Returns the pointer to one after the last character written. +char *PrintFractionalDigitsFast(uint128 v, char *start, int exp, + int precision) { + char *p = start; + v <<= (128 - exp); + auto high = static_cast<uint64_t>(v >> 64); + auto low = static_cast<uint64_t>(v); + + // While we have digits to print and `low` is not empty, do the long + // multiplication. + while (precision > 0 && low != 0) { + uint64_t carry = MultiplyBy10WithCarry(&low, uint64_t{0}); + carry = MultiplyBy10WithCarry(&high, carry); + + *p++ = carry + '0'; + --precision; + } + + // Now `low` is empty, so use a faster approach for the rest of the digits. + // This block is pretty much the same as the main loop for the 64-bit case + // above. + while (precision > 0) { + if (!high) return p; + *p++ = MultiplyBy10WithCarry(&high, uint64_t{0}) + '0'; + --precision; + } + + // We need to round. + if (high < 0x8000000000000000) { + // We round down, so nothing to do. + } else if (high > 0x8000000000000000 || low != 0) { + // We round up. + RoundUp(p - 1); + } else { + RoundToEven(p - 1); + } + + assert(precision == 0); + // Precision can only be zero here. + return p; +} + +struct FormatState { + char sign_char; + int precision; + const FormatConversionSpecImpl &conv; + FormatSinkImpl *sink; + + // In `alt` mode (flag #) we keep the `.` even if there are no fractional + // digits. In non-alt mode, we strip it. + bool ShouldPrintDot() const { return precision != 0 || conv.has_alt_flag(); } +}; + +struct Padding { + int left_spaces; + int zeros; + int right_spaces; +}; + +Padding ExtraWidthToPadding(size_t total_size, const FormatState &state) { + if (state.conv.width() < 0 || + static_cast<size_t>(state.conv.width()) <= total_size) { + return {0, 0, 0}; + } + int missing_chars = state.conv.width() - total_size; + if (state.conv.has_left_flag()) { + return {0, 0, missing_chars}; + } else if (state.conv.has_zero_flag()) { + return {0, missing_chars, 0}; + } else { + return {missing_chars, 0, 0}; + } +} + +void FinalPrint(const FormatState &state, absl::string_view data, + int padding_offset, int trailing_zeros, + absl::string_view data_postfix) { + if (state.conv.width() < 0) { + // No width specified. Fast-path. + if (state.sign_char != '\0') state.sink->Append(1, state.sign_char); + state.sink->Append(data); + state.sink->Append(trailing_zeros, '0'); + state.sink->Append(data_postfix); + return; + } + + auto padding = ExtraWidthToPadding((state.sign_char != '\0' ? 1 : 0) + + data.size() + data_postfix.size() + + static_cast<size_t>(trailing_zeros), + state); + + state.sink->Append(padding.left_spaces, ' '); + if (state.sign_char != '\0') state.sink->Append(1, state.sign_char); + // Padding in general needs to be inserted somewhere in the middle of `data`. + state.sink->Append(data.substr(0, padding_offset)); + state.sink->Append(padding.zeros, '0'); + state.sink->Append(data.substr(padding_offset)); + state.sink->Append(trailing_zeros, '0'); + state.sink->Append(data_postfix); + state.sink->Append(padding.right_spaces, ' '); +} + +// Fastpath %f formatter for when the shifted value fits in a simple integral +// type. +// Prints `v*2^exp` with the options from `state`. +template <typename Int> +void FormatFFast(Int v, int exp, const FormatState &state) { + constexpr int input_bits = sizeof(Int) * 8; + + static constexpr size_t integral_size = + /* in case we need to round up an extra digit */ 1 + + /* decimal digits for uint128 */ 40 + 1; + char buffer[integral_size + /* . */ 1 + /* max digits uint128 */ 128]; + buffer[integral_size] = '.'; + char *const integral_digits_end = buffer + integral_size; + char *integral_digits_start; + char *const fractional_digits_start = buffer + integral_size + 1; + char *fractional_digits_end = fractional_digits_start; + + if (exp >= 0) { + const int total_bits = input_bits - LeadingZeros(v) + exp; + integral_digits_start = + total_bits <= 64 + ? PrintIntegralDigitsFromRightFast(static_cast<uint64_t>(v) << exp, + integral_digits_end) + : PrintIntegralDigitsFromRightFast(static_cast<uint128>(v) << exp, + integral_digits_end); + } else { + exp = -exp; + + integral_digits_start = PrintIntegralDigitsFromRightFast( + exp < input_bits ? v >> exp : 0, integral_digits_end); + // PrintFractionalDigits may pull a carried 1 all the way up through the + // integral portion. + integral_digits_start[-1] = '0'; + + fractional_digits_end = + exp <= 64 ? PrintFractionalDigitsFast(v, fractional_digits_start, exp, + state.precision) + : PrintFractionalDigitsFast(static_cast<uint128>(v), + fractional_digits_start, exp, + state.precision); + // There was a carry, so include the first digit too. + if (integral_digits_start[-1] != '0') --integral_digits_start; + } + + size_t size = fractional_digits_end - integral_digits_start; + + // In `alt` mode (flag #) we keep the `.` even if there are no fractional + // digits. In non-alt mode, we strip it. + if (!state.ShouldPrintDot()) --size; + FinalPrint(state, absl::string_view(integral_digits_start, size), + /*padding_offset=*/0, + static_cast<int>(state.precision - (fractional_digits_end - + fractional_digits_start)), + /*data_postfix=*/""); +} + +// Slow %f formatter for when the shifted value does not fit in a uint128, and +// `exp > 0`. +// Prints `v*2^exp` with the options from `state`. +// This one is guaranteed to not have fractional digits, so we don't have to +// worry about anything after the `.`. +void FormatFPositiveExpSlow(uint128 v, int exp, const FormatState &state) { + BinaryToDecimal::RunConversion(v, exp, [&](BinaryToDecimal btd) { + const size_t total_digits = + btd.TotalDigits() + + (state.ShouldPrintDot() ? static_cast<size_t>(state.precision) + 1 : 0); + + const auto padding = ExtraWidthToPadding( + total_digits + (state.sign_char != '\0' ? 1 : 0), state); + + state.sink->Append(padding.left_spaces, ' '); + if (state.sign_char != '\0') state.sink->Append(1, state.sign_char); + state.sink->Append(padding.zeros, '0'); + + do { + state.sink->Append(btd.CurrentDigits()); + } while (btd.AdvanceDigits()); + + if (state.ShouldPrintDot()) state.sink->Append(1, '.'); + state.sink->Append(state.precision, '0'); + state.sink->Append(padding.right_spaces, ' '); + }); +} + +// Slow %f formatter for when the shifted value does not fit in a uint128, and +// `exp < 0`. +// Prints `v*2^exp` with the options from `state`. +// This one is guaranteed to be < 1.0, so we don't have to worry about integral +// digits. +void FormatFNegativeExpSlow(uint128 v, int exp, const FormatState &state) { + const size_t total_digits = + /* 0 */ 1 + + (state.ShouldPrintDot() ? static_cast<size_t>(state.precision) + 1 : 0); + auto padding = + ExtraWidthToPadding(total_digits + (state.sign_char ? 1 : 0), state); + padding.zeros += 1; + state.sink->Append(padding.left_spaces, ' '); + if (state.sign_char != '\0') state.sink->Append(1, state.sign_char); + state.sink->Append(padding.zeros, '0'); + + if (state.ShouldPrintDot()) state.sink->Append(1, '.'); + + // Print digits + int digits_to_go = state.precision; + + FractionalDigitGenerator::RunConversion( + v, exp, [&](FractionalDigitGenerator digit_gen) { + // There are no digits to print here. + if (state.precision == 0) return; + + // We go one digit at a time, while keeping track of runs of nines. + // The runs of nines are used to perform rounding when necessary. + + while (digits_to_go > 0 && digit_gen.HasMoreDigits()) { + auto digits = digit_gen.GetDigits(); + + // Now we have a digit and a run of nines. + // See if we can print them all. + if (digits.num_nines + 1 < digits_to_go) { + // We don't have to round yet, so print them. + state.sink->Append(1, digits.digit_before_nine + '0'); + state.sink->Append(digits.num_nines, '9'); + digits_to_go -= digits.num_nines + 1; + + } else { + // We can't print all the nines, see where we have to truncate. + + bool round_up = false; + if (digits.num_nines + 1 > digits_to_go) { + // We round up at a nine. No need to print them. + round_up = true; + } else { + // We can fit all the nines, but truncate just after it. + if (digit_gen.IsGreaterThanHalf()) { + round_up = true; + } else if (digit_gen.IsExactlyHalf()) { + // Round to even + round_up = + digits.num_nines != 0 || digits.digit_before_nine % 2 == 1; + } + } + + if (round_up) { + state.sink->Append(1, digits.digit_before_nine + '1'); + --digits_to_go; + // The rest will be zeros. + } else { + state.sink->Append(1, digits.digit_before_nine + '0'); + state.sink->Append(digits_to_go - 1, '9'); + digits_to_go = 0; + } + return; + } + } + }); + + state.sink->Append(digits_to_go, '0'); + state.sink->Append(padding.right_spaces, ' '); +} + +template <typename Int> +void FormatF(Int mantissa, int exp, const FormatState &state) { + if (exp >= 0) { + const int total_bits = sizeof(Int) * 8 - LeadingZeros(mantissa) + exp; + + // Fallback to the slow stack-based approach if we can't do it in a 64 or + // 128 bit state. + if (ABSL_PREDICT_FALSE(total_bits > 128)) { + return FormatFPositiveExpSlow(mantissa, exp, state); + } + } else { + // Fallback to the slow stack-based approach if we can't do it in a 64 or + // 128 bit state. + if (ABSL_PREDICT_FALSE(exp < -128)) { + return FormatFNegativeExpSlow(mantissa, -exp, state); + } + } + return FormatFFast(mantissa, exp, state); +} + +// Grab the group of four bits (nibble) from `n`. E.g., nibble 1 corresponds to +// bits 4-7. +template <typename Int> +uint8_t GetNibble(Int n, int nibble_index) { + constexpr Int mask_low_nibble = Int{0xf}; + int shift = nibble_index * 4; + n &= mask_low_nibble << shift; + return static_cast<uint8_t>((n >> shift) & 0xf); +} + +// Add one to the given nibble, applying carry to higher nibbles. Returns true +// if overflow, false otherwise. +template <typename Int> +bool IncrementNibble(int nibble_index, Int *n) { + constexpr int kShift = sizeof(Int) * 8 - 1; + constexpr int kNumNibbles = sizeof(Int) * 8 / 4; + Int before = *n >> kShift; + // Here we essentially want to take the number 1 and move it into the requsted + // nibble, then add it to *n to effectively increment the nibble. However, + // ASan will complain if we try to shift the 1 beyond the limits of the Int, + // i.e., if the nibble_index is out of range. So therefore we check for this + // and if we are out of range we just add 0 which leaves *n unchanged, which + // seems like the reasonable thing to do in that case. + *n += ((nibble_index >= kNumNibbles) ? 0 : (Int{1} << (nibble_index * 4))); + Int after = *n >> kShift; + return (before && !after) || (nibble_index >= kNumNibbles); +} + +// Return a mask with 1's in the given nibble and all lower nibbles. +template <typename Int> +Int MaskUpToNibbleInclusive(int nibble_index) { + constexpr int kNumNibbles = sizeof(Int) * 8 / 4; + static const Int ones = ~Int{0}; + return ones >> std::max(0, 4 * (kNumNibbles - nibble_index - 1)); +} + +// Return a mask with 1's below the given nibble. +template <typename Int> +Int MaskUpToNibbleExclusive(int nibble_index) { + return nibble_index <= 0 ? 0 : MaskUpToNibbleInclusive<Int>(nibble_index - 1); +} + +template <typename Int> +Int MoveToNibble(uint8_t nibble, int nibble_index) { + return Int{nibble} << (4 * nibble_index); +} + +// Given mantissa size, find optimal # of mantissa bits to put in initial digit. +// +// In the hex representation we keep a single hex digit to the left of the dot. +// However, the question as to how many bits of the mantissa should be put into +// that hex digit in theory is arbitrary, but in practice it is optimal to +// choose based on the size of the mantissa. E.g., for a `double`, there are 53 +// mantissa bits, so that means that we should put 1 bit to the left of the dot, +// thereby leaving 52 bits to the right, which is evenly divisible by four and +// thus all fractional digits represent actual precision. For a `long double`, +// on the other hand, there are 64 bits of mantissa, thus we can use all four +// bits for the initial hex digit and still have a number left over (60) that is +// a multiple of four. Once again, the goal is to have all fractional digits +// represent real precision. +template <typename Float> +constexpr int HexFloatLeadingDigitSizeInBits() { + return std::numeric_limits<Float>::digits % 4 > 0 + ? std::numeric_limits<Float>::digits % 4 + : 4; +} + +// This function captures the rounding behavior of glibc for hex float +// representations. E.g. when rounding 0x1.ab800000 to a precision of .2 +// ("%.2a") glibc will round up because it rounds toward the even number (since +// 0xb is an odd number, it will round up to 0xc). However, when rounding at a +// point that is not followed by 800000..., it disregards the parity and rounds +// up if > 8 and rounds down if < 8. +template <typename Int> +bool HexFloatNeedsRoundUp(Int mantissa, int final_nibble_displayed, + uint8_t leading) { + // If the last nibble (hex digit) to be displayed is the lowest on in the + // mantissa then that means that we don't have any further nibbles to inform + // rounding, so don't round. + if (final_nibble_displayed <= 0) { + return false; + } + int rounding_nibble_idx = final_nibble_displayed - 1; + constexpr int kTotalNibbles = sizeof(Int) * 8 / 4; + assert(final_nibble_displayed <= kTotalNibbles); + Int mantissa_up_to_rounding_nibble_inclusive = + mantissa & MaskUpToNibbleInclusive<Int>(rounding_nibble_idx); + Int eight = MoveToNibble<Int>(8, rounding_nibble_idx); + if (mantissa_up_to_rounding_nibble_inclusive != eight) { + return mantissa_up_to_rounding_nibble_inclusive > eight; + } + // Nibble in question == 8. + uint8_t round_if_odd = (final_nibble_displayed == kTotalNibbles) + ? leading + : GetNibble(mantissa, final_nibble_displayed); + return round_if_odd % 2 == 1; +} + +// Stores values associated with a Float type needed by the FormatA +// implementation in order to avoid templatizing that function by the Float +// type. +struct HexFloatTypeParams { + template <typename Float> + explicit HexFloatTypeParams(Float) + : min_exponent(std::numeric_limits<Float>::min_exponent - 1), + leading_digit_size_bits(HexFloatLeadingDigitSizeInBits<Float>()) { + assert(leading_digit_size_bits >= 1 && leading_digit_size_bits <= 4); + } + + int min_exponent; + int leading_digit_size_bits; +}; + +// Hex Float Rounding. First check if we need to round; if so, then we do that +// by manipulating (incrementing) the mantissa, that way we can later print the +// mantissa digits by iterating through them in the same way regardless of +// whether a rounding happened. +template <typename Int> +void FormatARound(bool precision_specified, const FormatState &state, + uint8_t *leading, Int *mantissa, int *exp) { + constexpr int kTotalNibbles = sizeof(Int) * 8 / 4; + // Index of the last nibble that we could display given precision. + int final_nibble_displayed = + precision_specified ? std::max(0, (kTotalNibbles - state.precision)) : 0; + if (HexFloatNeedsRoundUp(*mantissa, final_nibble_displayed, *leading)) { + // Need to round up. + bool overflow = IncrementNibble(final_nibble_displayed, mantissa); + *leading += (overflow ? 1 : 0); + if (ABSL_PREDICT_FALSE(*leading > 15)) { + // We have overflowed the leading digit. This would mean that we would + // need two hex digits to the left of the dot, which is not allowed. So + // adjust the mantissa and exponent so that the result is always 1.0eXXX. + *leading = 1; + *mantissa = 0; + *exp += 4; + } + } + // Now that we have handled a possible round-up we can go ahead and zero out + // all the nibbles of the mantissa that we won't need. + if (precision_specified) { + *mantissa &= ~MaskUpToNibbleExclusive<Int>(final_nibble_displayed); + } +} + +template <typename Int> +void FormatANormalize(const HexFloatTypeParams float_traits, uint8_t *leading, + Int *mantissa, int *exp) { + constexpr int kIntBits = sizeof(Int) * 8; + static const Int kHighIntBit = Int{1} << (kIntBits - 1); + const int kLeadDigitBitsCount = float_traits.leading_digit_size_bits; + // Normalize mantissa so that highest bit set is in MSB position, unless we + // get interrupted by the exponent threshold. + while (*mantissa && !(*mantissa & kHighIntBit)) { + if (ABSL_PREDICT_FALSE(*exp - 1 < float_traits.min_exponent)) { + *mantissa >>= (float_traits.min_exponent - *exp); + *exp = float_traits.min_exponent; + return; + } + *mantissa <<= 1; + --*exp; + } + // Extract bits for leading digit then shift them away leaving the + // fractional part. + *leading = + static_cast<uint8_t>(*mantissa >> (kIntBits - kLeadDigitBitsCount)); + *exp -= (*mantissa != 0) ? kLeadDigitBitsCount : *exp; + *mantissa <<= kLeadDigitBitsCount; +} + +template <typename Int> +void FormatA(const HexFloatTypeParams float_traits, Int mantissa, int exp, + bool uppercase, const FormatState &state) { + // Int properties. + constexpr int kIntBits = sizeof(Int) * 8; + constexpr int kTotalNibbles = sizeof(Int) * 8 / 4; + // Did the user specify a precision explicitly? + const bool precision_specified = state.conv.precision() >= 0; + + // ========== Normalize/Denormalize ========== + exp += kIntBits; // make all digits fractional digits. + // This holds the (up to four) bits of leading digit, i.e., the '1' in the + // number 0x1.e6fp+2. It's always > 0 unless number is zero or denormal. + uint8_t leading = 0; + FormatANormalize(float_traits, &leading, &mantissa, &exp); + + // =============== Rounding ================== + // Check if we need to round; if so, then we do that by manipulating + // (incrementing) the mantissa before beginning to print characters. + FormatARound(precision_specified, state, &leading, &mantissa, &exp); + + // ============= Format Result =============== + // This buffer holds the "0x1.ab1de3" portion of "0x1.ab1de3pe+2". Compute the + // size with long double which is the largest of the floats. + constexpr size_t kBufSizeForHexFloatRepr = + 2 // 0x + + std::numeric_limits<MaxFloatType>::digits / 4 // number of hex digits + + 1 // round up + + 1; // "." (dot) + char digits_buffer[kBufSizeForHexFloatRepr]; + char *digits_iter = digits_buffer; + const char *const digits = + static_cast<const char *>("0123456789ABCDEF0123456789abcdef") + + (uppercase ? 0 : 16); + + // =============== Hex Prefix ================ + *digits_iter++ = '0'; + *digits_iter++ = uppercase ? 'X' : 'x'; + + // ========== Non-Fractional Digit =========== + *digits_iter++ = digits[leading]; + + // ================== Dot ==================== + // There are three reasons we might need a dot. Keep in mind that, at this + // point, the mantissa holds only the fractional part. + if ((precision_specified && state.precision > 0) || + (!precision_specified && mantissa > 0) || state.conv.has_alt_flag()) { + *digits_iter++ = '.'; + } + + // ============ Fractional Digits ============ + int digits_emitted = 0; + while (mantissa > 0) { + *digits_iter++ = digits[GetNibble(mantissa, kTotalNibbles - 1)]; + mantissa <<= 4; + ++digits_emitted; + } + int trailing_zeros = + precision_specified ? state.precision - digits_emitted : 0; + assert(trailing_zeros >= 0); + auto digits_result = string_view(digits_buffer, digits_iter - digits_buffer); + + // =============== Exponent ================== + constexpr size_t kBufSizeForExpDecRepr = + numbers_internal::kFastToBufferSize // requred for FastIntToBuffer + + 1 // 'p' or 'P' + + 1; // '+' or '-' + char exp_buffer[kBufSizeForExpDecRepr]; + exp_buffer[0] = uppercase ? 'P' : 'p'; + exp_buffer[1] = exp >= 0 ? '+' : '-'; + numbers_internal::FastIntToBuffer(exp < 0 ? -exp : exp, exp_buffer + 2); + + // ============ Assemble Result ============== + FinalPrint(state, // + digits_result, // 0xN.NNN... + 2, // offset in `data` to start padding if needed. + trailing_zeros, // num remaining mantissa padding zeros + exp_buffer); // exponent +} + +char *CopyStringTo(absl::string_view v, char *out) { std::memcpy(out, v.data(), v.size()); return out + v.size(); } template <typename Float> -bool FallbackToSnprintf(const Float v, const ConversionSpec &conv, +bool FallbackToSnprintf(const Float v, const FormatConversionSpecImpl &conv, FormatSinkImpl *sink) { int w = conv.width() >= 0 ? conv.width() : 0; int p = conv.precision() >= 0 ? conv.precision() : -1; @@ -33,17 +951,17 @@ bool FallbackToSnprintf(const Float v, const ConversionSpec &conv, if (std::is_same<long double, Float>()) { *fp++ = 'L'; } - *fp++ = FormatConversionCharToChar(conv.conv()); + *fp++ = FormatConversionCharToChar(conv.conversion_char()); *fp = 0; assert(fp < fmt + sizeof(fmt)); } std::string space(512, '\0'); - string_view result; + absl::string_view result; while (true) { int n = snprintf(&space[0], space.size(), fmt, w, p, v); if (n < 0) return false; if (static_cast<size_t>(n) < space.size()) { - result = string_view(space.data(), n); + result = absl::string_view(space.data(), n); break; } space.resize(n + 1); @@ -96,21 +1014,24 @@ enum class FormatStyle { Fixed, Precision }; // Otherwise, return false. template <typename Float> bool ConvertNonNumericFloats(char sign_char, Float v, - const ConversionSpec &conv, FormatSinkImpl *sink) { + const FormatConversionSpecImpl &conv, + FormatSinkImpl *sink) { char text[4], *ptr = text; - if (sign_char) *ptr++ = sign_char; + if (sign_char != '\0') *ptr++ = sign_char; if (std::isnan(v)) { - ptr = std::copy_n(FormatConversionCharIsUpper(conv.conv()) ? "NAN" : "nan", - 3, ptr); + ptr = std::copy_n( + FormatConversionCharIsUpper(conv.conversion_char()) ? "NAN" : "nan", 3, + ptr); } else if (std::isinf(v)) { - ptr = std::copy_n(FormatConversionCharIsUpper(conv.conv()) ? "INF" : "inf", - 3, ptr); + ptr = std::copy_n( + FormatConversionCharIsUpper(conv.conversion_char()) ? "INF" : "inf", 3, + ptr); } else { return false; } return sink->PutPaddedString(string_view(text, ptr - text), conv.width(), -1, - conv.flags().left); + conv.has_left_flag()); } // Round up the last digit of the value. @@ -170,7 +1091,12 @@ constexpr bool CanFitMantissa() { template <typename Float> struct Decomposed { - Float mantissa; + using MantissaType = + absl::conditional_t<std::is_same<long double, Float>::value, uint128, + uint64_t>; + static_assert(std::numeric_limits<Float>::digits <= sizeof(MantissaType) * 8, + ""); + MantissaType mantissa; int exponent; }; @@ -181,7 +1107,8 @@ Decomposed<Float> Decompose(Float v) { Float m = std::frexp(v, &exp); m = std::ldexp(m, std::numeric_limits<Float>::digits); exp -= std::numeric_limits<Float>::digits; - return {m, exp}; + + return {static_cast<typename Decomposed<Float>::MantissaType>(m), exp}; } // Print 'digits' as decimal. @@ -350,31 +1277,32 @@ bool FloatToBuffer(Decomposed<Float> decomposed, int precision, Buffer *out, return false; } -void WriteBufferToSink(char sign_char, string_view str, - const ConversionSpec &conv, FormatSinkImpl *sink) { +void WriteBufferToSink(char sign_char, absl::string_view str, + const FormatConversionSpecImpl &conv, + FormatSinkImpl *sink) { int left_spaces = 0, zeros = 0, right_spaces = 0; int missing_chars = conv.width() >= 0 ? std::max(conv.width() - static_cast<int>(str.size()) - static_cast<int>(sign_char != 0), 0) : 0; - if (conv.flags().left) { + if (conv.has_left_flag()) { right_spaces = missing_chars; - } else if (conv.flags().zero) { + } else if (conv.has_zero_flag()) { zeros = missing_chars; } else { left_spaces = missing_chars; } sink->Append(left_spaces, ' '); - if (sign_char) sink->Append(1, sign_char); + if (sign_char != '\0') sink->Append(1, sign_char); sink->Append(zeros, '0'); sink->Append(str); sink->Append(right_spaces, ' '); } template <typename Float> -bool FloatToSink(const Float v, const ConversionSpec &conv, +bool FloatToSink(const Float v, const FormatConversionSpecImpl &conv, FormatSinkImpl *sink) { // Print the sign or the sign column. Float abs_v = v; @@ -382,9 +1310,9 @@ bool FloatToSink(const Float v, const ConversionSpec &conv, if (std::signbit(abs_v)) { sign_char = '-'; abs_v = -abs_v; - } else if (conv.flags().show_pos) { + } else if (conv.has_show_pos_flag()) { sign_char = '+'; - } else if (conv.flags().sign_col) { + } else if (conv.has_sign_col_flag()) { sign_char = ' '; } @@ -401,89 +1329,91 @@ bool FloatToSink(const Float v, const ConversionSpec &conv, Buffer buffer; - switch (conv.conv()) { - case ConversionChar::f: - case ConversionChar::F: - if (!FloatToBuffer<FormatStyle::Fixed>(decomposed, precision, &buffer, - nullptr)) { - return FallbackToSnprintf(v, conv, sink); - } - if (!conv.flags().alt && buffer.back() == '.') buffer.pop_back(); - break; - - case ConversionChar::e: - case ConversionChar::E: - if (!FloatToBuffer<FormatStyle::Precision>(decomposed, precision, &buffer, - &exp)) { - return FallbackToSnprintf(v, conv, sink); - } - if (!conv.flags().alt && buffer.back() == '.') buffer.pop_back(); - PrintExponent(exp, FormatConversionCharIsUpper(conv.conv()) ? 'E' : 'e', - &buffer); - break; - - case ConversionChar::g: - case ConversionChar::G: - precision = std::max(0, precision - 1); - if (!FloatToBuffer<FormatStyle::Precision>(decomposed, precision, &buffer, - &exp)) { - return FallbackToSnprintf(v, conv, sink); - } - if (precision + 1 > exp && exp >= -4) { - if (exp < 0) { - // Have 1.23456, needs 0.00123456 - // Move the first digit - buffer.begin[1] = *buffer.begin; - // Add some zeros - for (; exp < -1; ++exp) *buffer.begin-- = '0'; - *buffer.begin-- = '.'; - *buffer.begin = '0'; - } else if (exp > 0) { - // Have 1.23456, needs 1234.56 - // Move the '.' exp positions to the right. - std::rotate(buffer.begin + 1, buffer.begin + 2, - buffer.begin + exp + 2); - } - exp = 0; - } - if (!conv.flags().alt) { - while (buffer.back() == '0') buffer.pop_back(); - if (buffer.back() == '.') buffer.pop_back(); - } - if (exp) { - PrintExponent(exp, FormatConversionCharIsUpper(conv.conv()) ? 'E' : 'e', - &buffer); - } - break; + FormatConversionChar c = conv.conversion_char(); - case ConversionChar::a: - case ConversionChar::A: + if (c == FormatConversionCharInternal::f || + c == FormatConversionCharInternal::F) { + FormatF(decomposed.mantissa, decomposed.exponent, + {sign_char, precision, conv, sink}); + return true; + } else if (c == FormatConversionCharInternal::e || + c == FormatConversionCharInternal::E) { + if (!FloatToBuffer<FormatStyle::Precision>(decomposed, precision, &buffer, + &exp)) { return FallbackToSnprintf(v, conv, sink); - - default: - return false; + } + if (!conv.has_alt_flag() && buffer.back() == '.') buffer.pop_back(); + PrintExponent( + exp, FormatConversionCharIsUpper(conv.conversion_char()) ? 'E' : 'e', + &buffer); + } else if (c == FormatConversionCharInternal::g || + c == FormatConversionCharInternal::G) { + precision = std::max(0, precision - 1); + if (!FloatToBuffer<FormatStyle::Precision>(decomposed, precision, &buffer, + &exp)) { + return FallbackToSnprintf(v, conv, sink); + } + if (precision + 1 > exp && exp >= -4) { + if (exp < 0) { + // Have 1.23456, needs 0.00123456 + // Move the first digit + buffer.begin[1] = *buffer.begin; + // Add some zeros + for (; exp < -1; ++exp) *buffer.begin-- = '0'; + *buffer.begin-- = '.'; + *buffer.begin = '0'; + } else if (exp > 0) { + // Have 1.23456, needs 1234.56 + // Move the '.' exp positions to the right. + std::rotate(buffer.begin + 1, buffer.begin + 2, buffer.begin + exp + 2); + } + exp = 0; + } + if (!conv.has_alt_flag()) { + while (buffer.back() == '0') buffer.pop_back(); + if (buffer.back() == '.') buffer.pop_back(); + } + if (exp) { + PrintExponent( + exp, FormatConversionCharIsUpper(conv.conversion_char()) ? 'E' : 'e', + &buffer); + } + } else if (c == FormatConversionCharInternal::a || + c == FormatConversionCharInternal::A) { + bool uppercase = (c == FormatConversionCharInternal::A); + FormatA(HexFloatTypeParams(Float{}), decomposed.mantissa, + decomposed.exponent, uppercase, {sign_char, precision, conv, sink}); + return true; + } else { + return false; } WriteBufferToSink(sign_char, - string_view(buffer.begin, buffer.end - buffer.begin), conv, - sink); + absl::string_view(buffer.begin, buffer.end - buffer.begin), + conv, sink); return true; } } // namespace -bool ConvertFloatImpl(long double v, const ConversionSpec &conv, +bool ConvertFloatImpl(long double v, const FormatConversionSpecImpl &conv, FormatSinkImpl *sink) { + if (IsDoubleDouble()) { + // This is the `double-double` representation of `long double`. We do not + // handle it natively. Fallback to snprintf. + return FallbackToSnprintf(v, conv, sink); + } + return FloatToSink(v, conv, sink); } -bool ConvertFloatImpl(float v, const ConversionSpec &conv, +bool ConvertFloatImpl(float v, const FormatConversionSpecImpl &conv, FormatSinkImpl *sink) { - return FloatToSink(v, conv, sink); + return FloatToSink(static_cast<double>(v), conv, sink); } -bool ConvertFloatImpl(double v, const ConversionSpec &conv, +bool ConvertFloatImpl(double v, const FormatConversionSpecImpl &conv, FormatSinkImpl *sink) { return FloatToSink(v, conv, sink); } diff --git a/third_party/abseil-cpp/absl/strings/internal/str_format/float_conversion.h b/third_party/abseil-cpp/absl/strings/internal/str_format/float_conversion.h index 49a6a63630..71100e7142 100644 --- a/third_party/abseil-cpp/absl/strings/internal/str_format/float_conversion.h +++ b/third_party/abseil-cpp/absl/strings/internal/str_format/float_conversion.h @@ -1,3 +1,17 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + #ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_FLOAT_CONVERSION_H_ #define ABSL_STRINGS_INTERNAL_STR_FORMAT_FLOAT_CONVERSION_H_ @@ -7,13 +21,13 @@ namespace absl { ABSL_NAMESPACE_BEGIN namespace str_format_internal { -bool ConvertFloatImpl(float v, const ConversionSpec &conv, +bool ConvertFloatImpl(float v, const FormatConversionSpecImpl &conv, FormatSinkImpl *sink); -bool ConvertFloatImpl(double v, const ConversionSpec &conv, +bool ConvertFloatImpl(double v, const FormatConversionSpecImpl &conv, FormatSinkImpl *sink); -bool ConvertFloatImpl(long double v, const ConversionSpec &conv, +bool ConvertFloatImpl(long double v, const FormatConversionSpecImpl &conv, FormatSinkImpl *sink); } // namespace str_format_internal diff --git a/third_party/abseil-cpp/absl/strings/internal/str_format/output.h b/third_party/abseil-cpp/absl/strings/internal/str_format/output.h index 28b288b7dd..8030dae00f 100644 --- a/third_party/abseil-cpp/absl/strings/internal/str_format/output.h +++ b/third_party/abseil-cpp/absl/strings/internal/str_format/output.h @@ -30,9 +30,6 @@ namespace absl { ABSL_NAMESPACE_BEGIN - -class Cord; - namespace str_format_internal { // RawSink implementation that writes into a char* buffer. @@ -77,12 +74,6 @@ inline void AbslFormatFlush(std::ostream* out, string_view s) { out->write(s.data(), s.size()); } -template <class AbslCord, typename = typename std::enable_if< - std::is_same<AbslCord, absl::Cord>::value>::type> -inline void AbslFormatFlush(AbslCord* out, string_view s) { - out->Append(s); -} - inline void AbslFormatFlush(FILERawSink* sink, string_view v) { sink->Write(v); } @@ -91,10 +82,11 @@ inline void AbslFormatFlush(BufferRawSink* sink, string_view v) { sink->Write(v); } +// This is a SFINAE to get a better compiler error message when the type +// is not supported. template <typename T> -auto InvokeFlush(T* out, string_view s) - -> decltype(str_format_internal::AbslFormatFlush(out, s)) { - str_format_internal::AbslFormatFlush(out, s); +auto InvokeFlush(T* out, string_view s) -> decltype(AbslFormatFlush(out, s)) { + AbslFormatFlush(out, s); } } // namespace str_format_internal diff --git a/third_party/abseil-cpp/absl/strings/internal/str_format/output_test.cc b/third_party/abseil-cpp/absl/strings/internal/str_format/output_test.cc index e54e6f70a5..ce2e91a0bb 100644 --- a/third_party/abseil-cpp/absl/strings/internal/str_format/output_test.cc +++ b/third_party/abseil-cpp/absl/strings/internal/str_format/output_test.cc @@ -19,6 +19,7 @@ #include "gmock/gmock.h" #include "gtest/gtest.h" +#include "absl/strings/cord.h" namespace absl { ABSL_NAMESPACE_BEGIN @@ -37,6 +38,12 @@ TEST(InvokeFlush, Stream) { EXPECT_EQ(str.str(), "ABCDEF"); } +TEST(InvokeFlush, Cord) { + absl::Cord str("ABC"); + str_format_internal::InvokeFlush(&str, "DEF"); + EXPECT_EQ(str, "ABCDEF"); +} + TEST(BufferRawSink, Limits) { char buf[16]; { @@ -70,4 +77,3 @@ TEST(BufferRawSink, Limits) { } // namespace ABSL_NAMESPACE_END } // namespace absl - diff --git a/third_party/abseil-cpp/absl/strings/internal/str_format/parser.cc b/third_party/abseil-cpp/absl/strings/internal/str_format/parser.cc index aab68db94b..2c9c07dacc 100644 --- a/third_party/abseil-cpp/absl/strings/internal/str_format/parser.cc +++ b/third_party/abseil-cpp/absl/strings/internal/str_format/parser.cc @@ -1,3 +1,17 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + #include "absl/strings/internal/str_format/parser.h" #include <assert.h> @@ -17,63 +31,70 @@ namespace absl { ABSL_NAMESPACE_BEGIN namespace str_format_internal { -using CC = ConversionChar; +using CC = FormatConversionCharInternal; using LM = LengthMod; +// Abbreviations to fit in the table below. +constexpr auto f_sign = Flags::kSignCol; +constexpr auto f_alt = Flags::kAlt; +constexpr auto f_pos = Flags::kShowPos; +constexpr auto f_left = Flags::kLeft; +constexpr auto f_zero = Flags::kZero; + ABSL_CONST_INIT const ConvTag kTags[256] = { - {}, {}, {}, {}, {}, {}, {}, {}, // 00-07 - {}, {}, {}, {}, {}, {}, {}, {}, // 08-0f - {}, {}, {}, {}, {}, {}, {}, {}, // 10-17 - {}, {}, {}, {}, {}, {}, {}, {}, // 18-1f - {}, {}, {}, {}, {}, {}, {}, {}, // 20-27 - {}, {}, {}, {}, {}, {}, {}, {}, // 28-2f - {}, {}, {}, {}, {}, {}, {}, {}, // 30-37 - {}, {}, {}, {}, {}, {}, {}, {}, // 38-3f - {}, CC::A, {}, CC::C, {}, CC::E, CC::F, CC::G, // @ABCDEFG - {}, {}, {}, {}, LM::L, {}, {}, {}, // HIJKLMNO - {}, {}, {}, CC::S, {}, {}, {}, {}, // PQRSTUVW - CC::X, {}, {}, {}, {}, {}, {}, {}, // XYZ[\]^_ - {}, CC::a, {}, CC::c, CC::d, CC::e, CC::f, CC::g, // `abcdefg - LM::h, CC::i, LM::j, {}, LM::l, {}, CC::n, CC::o, // hijklmno - CC::p, LM::q, {}, CC::s, LM::t, CC::u, {}, {}, // pqrstuvw - CC::x, {}, LM::z, {}, {}, {}, {}, {}, // xyz{|}! - {}, {}, {}, {}, {}, {}, {}, {}, // 80-87 - {}, {}, {}, {}, {}, {}, {}, {}, // 88-8f - {}, {}, {}, {}, {}, {}, {}, {}, // 90-97 - {}, {}, {}, {}, {}, {}, {}, {}, // 98-9f - {}, {}, {}, {}, {}, {}, {}, {}, // a0-a7 - {}, {}, {}, {}, {}, {}, {}, {}, // a8-af - {}, {}, {}, {}, {}, {}, {}, {}, // b0-b7 - {}, {}, {}, {}, {}, {}, {}, {}, // b8-bf - {}, {}, {}, {}, {}, {}, {}, {}, // c0-c7 - {}, {}, {}, {}, {}, {}, {}, {}, // c8-cf - {}, {}, {}, {}, {}, {}, {}, {}, // d0-d7 - {}, {}, {}, {}, {}, {}, {}, {}, // d8-df - {}, {}, {}, {}, {}, {}, {}, {}, // e0-e7 - {}, {}, {}, {}, {}, {}, {}, {}, // e8-ef - {}, {}, {}, {}, {}, {}, {}, {}, // f0-f7 - {}, {}, {}, {}, {}, {}, {}, {}, // f8-ff + {}, {}, {}, {}, {}, {}, {}, {}, // 00-07 + {}, {}, {}, {}, {}, {}, {}, {}, // 08-0f + {}, {}, {}, {}, {}, {}, {}, {}, // 10-17 + {}, {}, {}, {}, {}, {}, {}, {}, // 18-1f + f_sign, {}, {}, f_alt, {}, {}, {}, {}, // !"#$%&' + {}, {}, {}, f_pos, {}, f_left, {}, {}, // ()*+,-./ + f_zero, {}, {}, {}, {}, {}, {}, {}, // 01234567 + {}, {}, {}, {}, {}, {}, {}, {}, // 89:;<=>? + {}, CC::A, {}, {}, {}, CC::E, CC::F, CC::G, // @ABCDEFG + {}, {}, {}, {}, LM::L, {}, {}, {}, // HIJKLMNO + {}, {}, {}, {}, {}, {}, {}, {}, // PQRSTUVW + CC::X, {}, {}, {}, {}, {}, {}, {}, // XYZ[\]^_ + {}, CC::a, {}, CC::c, CC::d, CC::e, CC::f, CC::g, // `abcdefg + LM::h, CC::i, LM::j, {}, LM::l, {}, CC::n, CC::o, // hijklmno + CC::p, LM::q, {}, CC::s, LM::t, CC::u, {}, {}, // pqrstuvw + CC::x, {}, LM::z, {}, {}, {}, {}, {}, // xyz{|}! + {}, {}, {}, {}, {}, {}, {}, {}, // 80-87 + {}, {}, {}, {}, {}, {}, {}, {}, // 88-8f + {}, {}, {}, {}, {}, {}, {}, {}, // 90-97 + {}, {}, {}, {}, {}, {}, {}, {}, // 98-9f + {}, {}, {}, {}, {}, {}, {}, {}, // a0-a7 + {}, {}, {}, {}, {}, {}, {}, {}, // a8-af + {}, {}, {}, {}, {}, {}, {}, {}, // b0-b7 + {}, {}, {}, {}, {}, {}, {}, {}, // b8-bf + {}, {}, {}, {}, {}, {}, {}, {}, // c0-c7 + {}, {}, {}, {}, {}, {}, {}, {}, // c8-cf + {}, {}, {}, {}, {}, {}, {}, {}, // d0-d7 + {}, {}, {}, {}, {}, {}, {}, {}, // d8-df + {}, {}, {}, {}, {}, {}, {}, {}, // e0-e7 + {}, {}, {}, {}, {}, {}, {}, {}, // e8-ef + {}, {}, {}, {}, {}, {}, {}, {}, // f0-f7 + {}, {}, {}, {}, {}, {}, {}, {}, // f8-ff }; namespace { bool CheckFastPathSetting(const UnboundConversion& conv) { - bool should_be_basic = !conv.flags.left && // - !conv.flags.show_pos && // - !conv.flags.sign_col && // - !conv.flags.alt && // - !conv.flags.zero && // - (conv.width.value() == -1) && - (conv.precision.value() == -1); - if (should_be_basic != conv.flags.basic) { + bool width_precision_needed = + conv.width.value() >= 0 || conv.precision.value() >= 0; + if (width_precision_needed && conv.flags == Flags::kBasic) { fprintf(stderr, "basic=%d left=%d show_pos=%d sign_col=%d alt=%d zero=%d " "width=%d precision=%d\n", - conv.flags.basic, conv.flags.left, conv.flags.show_pos, - conv.flags.sign_col, conv.flags.alt, conv.flags.zero, - conv.width.value(), conv.precision.value()); + conv.flags == Flags::kBasic ? 1 : 0, + FlagsContains(conv.flags, Flags::kLeft) ? 1 : 0, + FlagsContains(conv.flags, Flags::kShowPos) ? 1 : 0, + FlagsContains(conv.flags, Flags::kSignCol) ? 1 : 0, + FlagsContains(conv.flags, Flags::kAlt) ? 1 : 0, + FlagsContains(conv.flags, Flags::kZero) ? 1 : 0, conv.width.value(), + conv.precision.value()); + return false; } - return should_be_basic == conv.flags.basic; + return true; } template <bool is_positional> @@ -117,40 +138,21 @@ const char *ConsumeConversion(const char *pos, const char *const end, ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); // We should start with the basic flag on. - assert(conv->flags.basic); + assert(conv->flags == Flags::kBasic); // Any non alpha character makes this conversion not basic. // This includes flags (-+ #0), width (1-9, *) or precision (.). // All conversion characters and length modifiers are alpha characters. if (c < 'A') { - conv->flags.basic = false; - - for (; c <= '0';) { - // FIXME: We might be able to speed this up reusing the lookup table from - // above. It might require changing Flags to be a plain integer where we - // can |= a value. - switch (c) { - case '-': - conv->flags.left = true; - break; - case '+': - conv->flags.show_pos = true; - break; - case ' ': - conv->flags.sign_col = true; - break; - case '#': - conv->flags.alt = true; - break; - case '0': - conv->flags.zero = true; - break; - default: - goto flags_done; + while (c <= '0') { + auto tag = GetTagForChar(c); + if (tag.is_flags()) { + conv->flags = conv->flags | tag.as_flags(); + ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); + } else { + break; } - ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); } -flags_done: if (c <= '9') { if (c >= '0') { @@ -159,12 +161,12 @@ flags_done: if (ABSL_PREDICT_FALSE(*next_arg != 0)) return nullptr; // Positional conversion. *next_arg = -1; - conv->flags = Flags(); - conv->flags.basic = true; return ConsumeConversion<true>(original_pos, end, conv, next_arg); } + conv->flags = conv->flags | Flags::kNonBasic; conv->width.set_value(maybe_width); } else if (c == '*') { + conv->flags = conv->flags | Flags::kNonBasic; ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); if (is_positional) { if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr; @@ -178,6 +180,7 @@ flags_done: } if (c == '.') { + conv->flags = conv->flags | Flags::kNonBasic; ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); if (std::isdigit(c)) { conv->precision.set_value(parse_digits()); @@ -296,15 +299,17 @@ struct ParsedFormatBase::ParsedFormatConsumer { char* data_pos; }; -ParsedFormatBase::ParsedFormatBase(string_view format, bool allow_ignored, - std::initializer_list<Conv> convs) +ParsedFormatBase::ParsedFormatBase( + string_view format, bool allow_ignored, + std::initializer_list<FormatConversionCharSet> convs) : data_(format.empty() ? nullptr : new char[format.size()]) { has_error_ = !ParseFormatString(format, ParsedFormatConsumer(this)) || !MatchesConversions(allow_ignored, convs); } bool ParsedFormatBase::MatchesConversions( - bool allow_ignored, std::initializer_list<Conv> convs) const { + bool allow_ignored, + std::initializer_list<FormatConversionCharSet> convs) const { std::unordered_set<int> used; auto add_if_valid_conv = [&](int pos, char c) { if (static_cast<size_t>(pos) > convs.size() || diff --git a/third_party/abseil-cpp/absl/strings/internal/str_format/parser.h b/third_party/abseil-cpp/absl/strings/internal/str_format/parser.h index 45c90d1df0..ad8646edff 100644 --- a/third_party/abseil-cpp/absl/strings/internal/str_format/parser.h +++ b/third_party/abseil-cpp/absl/strings/internal/str_format/parser.h @@ -1,3 +1,17 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + #ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_PARSER_H_ #define ABSL_STRINGS_INTERNAL_STR_FORMAT_PARSER_H_ @@ -27,10 +41,7 @@ std::string LengthModToString(LengthMod v); // The analyzed properties of a single specified conversion. struct UnboundConversion { - UnboundConversion() - : flags() /* This is required to zero all the fields of flags. */ { - flags.basic = true; - } + UnboundConversion() {} class InputValue { public: @@ -65,9 +76,9 @@ struct UnboundConversion { InputValue width; InputValue precision; - Flags flags; + Flags flags = Flags::kBasic; LengthMod length_mod = LengthMod::none; - ConversionChar conv = FormatConversionChar::kNone; + FormatConversionChar conv = FormatConversionCharInternal::kNone; }; // Consume conversion spec prefix (not including '%') of [p, end) if valid. @@ -79,32 +90,43 @@ const char* ConsumeUnboundConversion(const char* p, const char* end, UnboundConversion* conv, int* next_arg); // Helper tag class for the table below. -// It allows fast `char -> ConversionChar/LengthMod` checking and +// It allows fast `char -> ConversionChar/LengthMod/Flags` checking and // conversions. class ConvTag { public: - constexpr ConvTag(ConversionChar conversion_char) // NOLINT - : tag_(static_cast<int8_t>(conversion_char)) {} - // We invert the length modifiers to make them negative so that we can easily - // test for them. + constexpr ConvTag(FormatConversionChar conversion_char) // NOLINT + : tag_(static_cast<uint8_t>(conversion_char)) {} constexpr ConvTag(LengthMod length_mod) // NOLINT - : tag_(~static_cast<std::int8_t>(length_mod)) {} - // Everything else is -128, which is negative to make is_conv() simpler. - constexpr ConvTag() : tag_(-128) {} + : tag_(0x80 | static_cast<uint8_t>(length_mod)) {} + constexpr ConvTag(Flags flags) // NOLINT + : tag_(0xc0 | static_cast<uint8_t>(flags)) {} + constexpr ConvTag() : tag_(0xFF) {} + + bool is_conv() const { return (tag_ & 0x80) == 0; } + bool is_length() const { return (tag_ & 0xC0) == 0x80; } + bool is_flags() const { return (tag_ & 0xE0) == 0xC0; } - bool is_conv() const { return tag_ >= 0; } - bool is_length() const { return tag_ < 0 && tag_ != -128; } - ConversionChar as_conv() const { + FormatConversionChar as_conv() const { assert(is_conv()); - return static_cast<ConversionChar>(tag_); + assert(!is_length()); + assert(!is_flags()); + return static_cast<FormatConversionChar>(tag_); } LengthMod as_length() const { + assert(!is_conv()); assert(is_length()); - return static_cast<LengthMod>(~tag_); + assert(!is_flags()); + return static_cast<LengthMod>(tag_ & 0x3F); + } + Flags as_flags() const { + assert(!is_conv()); + assert(!is_length()); + assert(is_flags()); + return static_cast<Flags>(tag_ & 0x1F); } private: - std::int8_t tag_; + uint8_t tag_; }; extern const ConvTag kTags[256]; @@ -143,7 +165,7 @@ bool ParseFormatString(string_view src, Consumer consumer) { auto tag = GetTagForChar(percent[1]); if (tag.is_conv()) { if (ABSL_PREDICT_FALSE(next_arg < 0)) { - // This indicates an error in the format std::string. + // This indicates an error in the format string. // The only way to get `next_arg < 0` here is to have a positional // argument first which sets next_arg to -1 and then a non-positional // argument. @@ -186,8 +208,9 @@ constexpr bool EnsureConstexpr(string_view s) { class ParsedFormatBase { public: - explicit ParsedFormatBase(string_view format, bool allow_ignored, - std::initializer_list<Conv> convs); + explicit ParsedFormatBase( + string_view format, bool allow_ignored, + std::initializer_list<FormatConversionCharSet> convs); ParsedFormatBase(const ParsedFormatBase& other) { *this = other; } @@ -234,8 +257,9 @@ class ParsedFormatBase { private: // Returns whether the conversions match and if !allow_ignored it verifies // that all conversions are used by the format. - bool MatchesConversions(bool allow_ignored, - std::initializer_list<Conv> convs) const; + bool MatchesConversions( + bool allow_ignored, + std::initializer_list<FormatConversionCharSet> convs) const; struct ParsedFormatConsumer; @@ -280,14 +304,14 @@ class ParsedFormatBase { // This is the only API that allows the user to pass a runtime specified format // string. These factory functions will return NULL if the format does not match // the conversions requested by the user. -template <str_format_internal::Conv... C> +template <FormatConversionCharSet... C> class ExtendedParsedFormat : public str_format_internal::ParsedFormatBase { public: explicit ExtendedParsedFormat(string_view format) #ifdef ABSL_INTERNAL_ENABLE_FORMAT_CHECKER __attribute__(( enable_if(str_format_internal::EnsureConstexpr(format), - "Format std::string is not constexpr."), + "Format string is not constexpr."), enable_if(str_format_internal::ValidFormatImpl<C...>(format), "Format specified does not match the template arguments."))) #endif // ABSL_INTERNAL_ENABLE_FORMAT_CHECKER diff --git a/third_party/abseil-cpp/absl/strings/internal/str_format/parser_test.cc b/third_party/abseil-cpp/absl/strings/internal/str_format/parser_test.cc index 1b1ee030f1..fe0d296360 100644 --- a/third_party/abseil-cpp/absl/strings/internal/str_format/parser_test.cc +++ b/third_party/abseil-cpp/absl/strings/internal/str_format/parser_test.cc @@ -1,3 +1,17 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + #include "absl/strings/internal/str_format/parser.h" #include <string.h> @@ -41,23 +55,23 @@ TEST(LengthModTest, Names) { TEST(ConversionCharTest, Names) { struct Expectation { - ConversionChar id; + FormatConversionChar id; char name; }; // clang-format off const Expectation kExpect[] = { -#define X(c) {ConversionChar::c, #c[0]} - X(c), X(C), X(s), X(S), // text +#define X(c) {FormatConversionCharInternal::c, #c[0]} + X(c), X(s), // text X(d), X(i), X(o), X(u), X(x), X(X), // int X(f), X(F), X(e), X(E), X(g), X(G), X(a), X(A), // float X(n), X(p), // misc #undef X - {ConversionChar::none, '\0'}, + {FormatConversionCharInternal::kNone, '\0'}, }; // clang-format on for (auto e : kExpect) { SCOPED_TRACE(e.name); - ConversionChar v = e.id; + FormatConversionChar v = e.id; EXPECT_EQ(e.name, FormatConversionCharToChar(v)); } } @@ -256,15 +270,22 @@ TEST_F(ConsumeUnboundConversionTest, Flags) { for (int k = 0; k < kNumFlags; ++k) if ((i >> k) & 1) fmt += kAllFlags[k]; // flag order shouldn't matter - if (rev == 1) { std::reverse(fmt.begin(), fmt.end()); } + if (rev == 1) { + std::reverse(fmt.begin(), fmt.end()); + } fmt += 'd'; SCOPED_TRACE(fmt); EXPECT_TRUE(Run(fmt.c_str())); - EXPECT_EQ(fmt.find('-') == std::string::npos, !o.flags.left); - EXPECT_EQ(fmt.find('+') == std::string::npos, !o.flags.show_pos); - EXPECT_EQ(fmt.find(' ') == std::string::npos, !o.flags.sign_col); - EXPECT_EQ(fmt.find('#') == std::string::npos, !o.flags.alt); - EXPECT_EQ(fmt.find('0') == std::string::npos, !o.flags.zero); + EXPECT_EQ(fmt.find('-') == std::string::npos, + !FlagsContains(o.flags, Flags::kLeft)); + EXPECT_EQ(fmt.find('+') == std::string::npos, + !FlagsContains(o.flags, Flags::kShowPos)); + EXPECT_EQ(fmt.find(' ') == std::string::npos, + !FlagsContains(o.flags, Flags::kSignCol)); + EXPECT_EQ(fmt.find('#') == std::string::npos, + !FlagsContains(o.flags, Flags::kAlt)); + EXPECT_EQ(fmt.find('0') == std::string::npos, + !FlagsContains(o.flags, Flags::kZero)); } } } @@ -274,14 +295,14 @@ TEST_F(ConsumeUnboundConversionTest, BasicFlag) { for (const char* fmt : {"d", "llx", "G", "1$X"}) { SCOPED_TRACE(fmt); EXPECT_TRUE(Run(fmt)); - EXPECT_TRUE(o.flags.basic); + EXPECT_EQ(o.flags, Flags::kBasic); } // Flag is off for (const char* fmt : {"3d", ".llx", "-G", "1$#X"}) { SCOPED_TRACE(fmt); EXPECT_TRUE(Run(fmt)); - EXPECT_FALSE(o.flags.basic); + EXPECT_NE(o.flags, Flags::kBasic); } } @@ -349,7 +370,8 @@ TEST_F(ParsedFormatTest, ValueSemantics) { ParsedFormatBase p2 = p1; // copy construct (empty) EXPECT_EQ(SummarizeParsedFormat(p1), SummarizeParsedFormat(p2)); - p1 = ParsedFormatBase("hello%s", true, {Conv::s}); // move assign + p1 = ParsedFormatBase("hello%s", true, + {FormatConversionCharSetInternal::s}); // move assign EXPECT_EQ("[hello]{s:1$s}", SummarizeParsedFormat(p1)); ParsedFormatBase p3 = p1; // copy construct (nonempty) @@ -367,7 +389,7 @@ TEST_F(ParsedFormatTest, ValueSemantics) { struct ExpectParse { const char* in; - std::initializer_list<Conv> conv_set; + std::initializer_list<FormatConversionCharSet> conv_set; const char* out; }; @@ -377,9 +399,9 @@ TEST_F(ParsedFormatTest, Parsing) { const ExpectParse kExpect[] = { {"", {}, ""}, {"ab", {}, "[ab]"}, - {"a%d", {Conv::d}, "[a]{d:1$d}"}, - {"a%+d", {Conv::d}, "[a]{+d:1$d}"}, - {"a% d", {Conv::d}, "[a]{ d:1$d}"}, + {"a%d", {FormatConversionCharSetInternal::d}, "[a]{d:1$d}"}, + {"a%+d", {FormatConversionCharSetInternal::d}, "[a]{+d:1$d}"}, + {"a% d", {FormatConversionCharSetInternal::d}, "[a]{ d:1$d}"}, {"a%b %d", {}, "[a]!"}, // stop after error }; for (const auto& e : kExpect) { @@ -391,13 +413,13 @@ TEST_F(ParsedFormatTest, Parsing) { TEST_F(ParsedFormatTest, ParsingFlagOrder) { const ExpectParse kExpect[] = { - {"a%+ 0d", {Conv::d}, "[a]{+ 0d:1$d}"}, - {"a%+0 d", {Conv::d}, "[a]{+0 d:1$d}"}, - {"a%0+ d", {Conv::d}, "[a]{0+ d:1$d}"}, - {"a% +0d", {Conv::d}, "[a]{ +0d:1$d}"}, - {"a%0 +d", {Conv::d}, "[a]{0 +d:1$d}"}, - {"a% 0+d", {Conv::d}, "[a]{ 0+d:1$d}"}, - {"a%+ 0+d", {Conv::d}, "[a]{+ 0+d:1$d}"}, + {"a%+ 0d", {FormatConversionCharSetInternal::d}, "[a]{+ 0d:1$d}"}, + {"a%+0 d", {FormatConversionCharSetInternal::d}, "[a]{+0 d:1$d}"}, + {"a%0+ d", {FormatConversionCharSetInternal::d}, "[a]{0+ d:1$d}"}, + {"a% +0d", {FormatConversionCharSetInternal::d}, "[a]{ +0d:1$d}"}, + {"a%0 +d", {FormatConversionCharSetInternal::d}, "[a]{0 +d:1$d}"}, + {"a% 0+d", {FormatConversionCharSetInternal::d}, "[a]{ 0+d:1$d}"}, + {"a%+ 0+d", {FormatConversionCharSetInternal::d}, "[a]{+ 0+d:1$d}"}, }; for (const auto& e : kExpect) { SCOPED_TRACE(e.in); diff --git a/third_party/abseil-cpp/absl/strings/internal/str_split_internal.h b/third_party/abseil-cpp/absl/strings/internal/str_split_internal.h index b54f6ebe09..e766421617 100644 --- a/third_party/abseil-cpp/absl/strings/internal/str_split_internal.h +++ b/third_party/abseil-cpp/absl/strings/internal/str_split_internal.h @@ -32,7 +32,7 @@ #include <array> #include <initializer_list> #include <iterator> -#include <map> +#include <tuple> #include <type_traits> #include <utility> #include <vector> @@ -51,9 +51,9 @@ ABSL_NAMESPACE_BEGIN namespace strings_internal { // This class is implicitly constructible from everything that absl::string_view -// is implicitly constructible from. If it's constructed from a temporary -// string, the data is moved into a data member so its lifetime matches that of -// the ConvertibleToStringView instance. +// is implicitly constructible from, except for rvalue strings. This means it +// can be used as a function parameter in places where passing a temporary +// string might cause memory lifetime issues. class ConvertibleToStringView { public: ConvertibleToStringView(const char* s) // NOLINT(runtime/explicit) @@ -64,42 +64,13 @@ class ConvertibleToStringView { ConvertibleToStringView(const std::string& s) // NOLINT(runtime/explicit) : value_(s) {} - // Matches rvalue strings and moves their data to a member. -ConvertibleToStringView(std::string&& s) // NOLINT(runtime/explicit) - : copy_(std::move(s)), value_(copy_) {} - - ConvertibleToStringView(const ConvertibleToStringView& other) - : copy_(other.copy_), - value_(other.IsSelfReferential() ? copy_ : other.value_) {} - - ConvertibleToStringView(ConvertibleToStringView&& other) { - StealMembers(std::move(other)); - } - - ConvertibleToStringView& operator=(ConvertibleToStringView other) { - StealMembers(std::move(other)); - return *this; - } + // Disable conversion from rvalue strings. + ConvertibleToStringView(std::string&& s) = delete; + ConvertibleToStringView(const std::string&& s) = delete; absl::string_view value() const { return value_; } private: - // Returns true if ctsp's value refers to its internal copy_ member. - bool IsSelfReferential() const { return value_.data() == copy_.data(); } - - void StealMembers(ConvertibleToStringView&& other) { - if (other.IsSelfReferential()) { - copy_ = std::move(other.copy_); - value_ = copy_; - other.value_ = other.copy_; - } else { - value_ = other.value_; - } - } - - // Holds the data moved from temporary std::string arguments. Declared first - // so that 'value' can refer to 'copy_'. - std::string copy_; absl::string_view value_; }; @@ -211,6 +182,13 @@ template <typename T> struct HasConstIterator<T, absl::void_t<typename T::const_iterator>> : std::true_type {}; +// HasEmplace<T>::value is true iff there exists a method T::emplace(). +template <typename T, typename = void> +struct HasEmplace : std::false_type {}; +template <typename T> +struct HasEmplace<T, absl::void_t<decltype(std::declval<T>().emplace())>> + : std::true_type {}; + // IsInitializerList<T>::value is true iff T is an std::initializer_list. More // details below in Splitter<> where this is used. std::false_type IsInitializerListDispatch(...); // default: No @@ -273,7 +251,11 @@ struct SplitterIsConvertibleTo // the split strings: only strings for which the predicate returns true will be // kept. A Predicate object is any unary functor that takes an absl::string_view // and returns bool. -template <typename Delimiter, typename Predicate> +// +// The StringType parameter can be either string_view or string, depending on +// whether the Splitter refers to a string stored elsewhere, or if the string +// resides inside the Splitter itself. +template <typename Delimiter, typename Predicate, typename StringType> class Splitter { public: using DelimiterType = Delimiter; @@ -281,12 +263,12 @@ class Splitter { using const_iterator = strings_internal::SplitIterator<Splitter>; using value_type = typename std::iterator_traits<const_iterator>::value_type; - Splitter(ConvertibleToStringView input_text, Delimiter d, Predicate p) + Splitter(StringType input_text, Delimiter d, Predicate p) : text_(std::move(input_text)), delimiter_(std::move(d)), predicate_(std::move(p)) {} - absl::string_view text() const { return text_.value(); } + absl::string_view text() const { return text_; } const Delimiter& delimiter() const { return delimiter_; } const Predicate& predicate() const { return predicate_; } @@ -336,7 +318,7 @@ class Splitter { Container operator()(const Splitter& splitter) const { Container c; auto it = std::inserter(c, c.end()); - for (const auto sp : splitter) { + for (const auto& sp : splitter) { *it++ = ValueType(sp); } return c; @@ -397,53 +379,46 @@ class Splitter { // value. template <typename Container, typename First, typename Second> struct ConvertToContainer<Container, std::pair<const First, Second>, true> { + using iterator = typename Container::iterator; + Container operator()(const Splitter& splitter) const { Container m; - typename Container::iterator it; + iterator it; bool insert = true; - for (const auto sp : splitter) { + for (const absl::string_view sv : splitter) { if (insert) { - it = Inserter<Container>::Insert(&m, First(sp), Second()); + it = InsertOrEmplace(&m, sv); } else { - it->second = Second(sp); + it->second = Second(sv); } insert = !insert; } return m; } - // Inserts the key and value into the given map, returning an iterator to - // the inserted item. Specialized for std::map and std::multimap to use - // emplace() and adapt emplace()'s return value. - template <typename Map> - struct Inserter { - using M = Map; - template <typename... Args> - static typename M::iterator Insert(M* m, Args&&... args) { - return m->insert(std::make_pair(std::forward<Args>(args)...)).first; - } - }; - - template <typename... Ts> - struct Inserter<std::map<Ts...>> { - using M = std::map<Ts...>; - template <typename... Args> - static typename M::iterator Insert(M* m, Args&&... args) { - return m->emplace(std::make_pair(std::forward<Args>(args)...)).first; - } - }; - - template <typename... Ts> - struct Inserter<std::multimap<Ts...>> { - using M = std::multimap<Ts...>; - template <typename... Args> - static typename M::iterator Insert(M* m, Args&&... args) { - return m->emplace(std::make_pair(std::forward<Args>(args)...)); - } - }; + // Inserts the key and an empty value into the map, returning an iterator to + // the inserted item. We use emplace() if available, otherwise insert(). + template <typename M> + static absl::enable_if_t<HasEmplace<M>::value, iterator> InsertOrEmplace( + M* m, absl::string_view key) { + // Use piecewise_construct to support old versions of gcc in which pair + // constructor can't otherwise construct string from string_view. + return ToIter(m->emplace(std::piecewise_construct, std::make_tuple(key), + std::tuple<>())); + } + template <typename M> + static absl::enable_if_t<!HasEmplace<M>::value, iterator> InsertOrEmplace( + M* m, absl::string_view key) { + return ToIter(m->insert(std::make_pair(First(key), Second("")))); + } + + static iterator ToIter(std::pair<iterator, bool> pair) { + return pair.first; + } + static iterator ToIter(iterator iter) { return iter; } }; - ConvertibleToStringView text_; + StringType text_; Delimiter delimiter_; Predicate predicate_; }; diff --git a/third_party/abseil-cpp/absl/strings/internal/string_constant.h b/third_party/abseil-cpp/absl/strings/internal/string_constant.h new file mode 100644 index 0000000000..a11336b7f0 --- /dev/null +++ b/third_party/abseil-cpp/absl/strings/internal/string_constant.h @@ -0,0 +1,64 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_STRING_CONSTANT_H_ +#define ABSL_STRINGS_INTERNAL_STRING_CONSTANT_H_ + +#include "absl/meta/type_traits.h" +#include "absl/strings/string_view.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace strings_internal { + +// StringConstant<T> represents a compile time string constant. +// It can be accessed via its `absl::string_view value` static member. +// It is guaranteed that the `string_view` returned has constant `.data()`, +// constant `.size()` and constant `value[i]` for all `0 <= i < .size()` +// +// The `T` is an opaque type. It is guaranteed that different string constants +// will have different values of `T`. This allows users to associate the string +// constant with other static state at compile time. +// +// Instances should be made using the `MakeStringConstant()` factory function +// below. +template <typename T> +struct StringConstant { + static constexpr absl::string_view value = T{}(); + constexpr absl::string_view operator()() const { return value; } + + // Check to be sure `view` points to constant data. + // Otherwise, it can't be constant evaluated. + static_assert(value.empty() || 2 * value[0] != 1, + "The input string_view must point to constant data."); +}; + +template <typename T> +constexpr absl::string_view StringConstant<T>::value; // NOLINT + +// Factory function for `StringConstant` instances. +// It supports callables that have a constexpr default constructor and a +// constexpr operator(). +// It must return an `absl::string_view` or `const char*` pointing to constant +// data. This is validated at compile time. +template <typename T> +constexpr StringConstant<T> MakeStringConstant(T) { + return {}; +} + +} // namespace strings_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_STRING_CONSTANT_H_ diff --git a/third_party/abseil-cpp/absl/strings/internal/string_constant_test.cc b/third_party/abseil-cpp/absl/strings/internal/string_constant_test.cc new file mode 100644 index 0000000000..392833cf15 --- /dev/null +++ b/third_party/abseil-cpp/absl/strings/internal/string_constant_test.cc @@ -0,0 +1,60 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/string_constant.h" + +#include "absl/meta/type_traits.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +namespace { + +using absl::strings_internal::MakeStringConstant; + +struct Callable { + constexpr absl::string_view operator()() const { + return absl::string_view("Callable", 8); + } +}; + +TEST(StringConstant, Traits) { + constexpr auto str = MakeStringConstant(Callable{}); + using T = decltype(str); + + EXPECT_TRUE(std::is_empty<T>::value); + EXPECT_TRUE(std::is_trivial<T>::value); + EXPECT_TRUE(absl::is_trivially_default_constructible<T>::value); + EXPECT_TRUE(absl::is_trivially_copy_constructible<T>::value); + EXPECT_TRUE(absl::is_trivially_move_constructible<T>::value); + EXPECT_TRUE(absl::is_trivially_destructible<T>::value); +} + +TEST(StringConstant, MakeFromCallable) { + constexpr auto str = MakeStringConstant(Callable{}); + using T = decltype(str); + EXPECT_EQ(Callable{}(), T::value); + EXPECT_EQ(Callable{}(), str()); +} + +TEST(StringConstant, MakeFromStringConstant) { + // We want to make sure the StringConstant itself is a valid input to the + // factory function. + constexpr auto str = MakeStringConstant(Callable{}); + constexpr auto str2 = MakeStringConstant(str); + using T = decltype(str2); + EXPECT_EQ(Callable{}(), T::value); + EXPECT_EQ(Callable{}(), str2()); +} + +} // namespace diff --git a/third_party/abseil-cpp/absl/strings/match.cc b/third_party/abseil-cpp/absl/strings/match.cc index 8127cb0c5e..2d67250970 100644 --- a/third_party/abseil-cpp/absl/strings/match.cc +++ b/third_party/abseil-cpp/absl/strings/match.cc @@ -19,19 +19,22 @@ namespace absl { ABSL_NAMESPACE_BEGIN -bool EqualsIgnoreCase(absl::string_view piece1, absl::string_view piece2) { +bool EqualsIgnoreCase(absl::string_view piece1, + absl::string_view piece2) noexcept { return (piece1.size() == piece2.size() && 0 == absl::strings_internal::memcasecmp(piece1.data(), piece2.data(), piece1.size())); // memcasecmp uses absl::ascii_tolower(). } -bool StartsWithIgnoreCase(absl::string_view text, absl::string_view prefix) { +bool StartsWithIgnoreCase(absl::string_view text, + absl::string_view prefix) noexcept { return (text.size() >= prefix.size()) && EqualsIgnoreCase(text.substr(0, prefix.size()), prefix); } -bool EndsWithIgnoreCase(absl::string_view text, absl::string_view suffix) { +bool EndsWithIgnoreCase(absl::string_view text, + absl::string_view suffix) noexcept { return (text.size() >= suffix.size()) && EqualsIgnoreCase(text.substr(text.size() - suffix.size()), suffix); } diff --git a/third_party/abseil-cpp/absl/strings/match.h b/third_party/abseil-cpp/absl/strings/match.h index 90fca98ad2..038cbb3fa8 100644 --- a/third_party/abseil-cpp/absl/strings/match.h +++ b/third_party/abseil-cpp/absl/strings/match.h @@ -43,14 +43,20 @@ ABSL_NAMESPACE_BEGIN // StrContains() // // Returns whether a given string `haystack` contains the substring `needle`. -inline bool StrContains(absl::string_view haystack, absl::string_view needle) { +inline bool StrContains(absl::string_view haystack, + absl::string_view needle) noexcept { return haystack.find(needle, 0) != haystack.npos; } +inline bool StrContains(absl::string_view haystack, char needle) noexcept { + return haystack.find(needle) != haystack.npos; +} + // StartsWith() // // Returns whether a given string `text` begins with `prefix`. -inline bool StartsWith(absl::string_view text, absl::string_view prefix) { +inline bool StartsWith(absl::string_view text, + absl::string_view prefix) noexcept { return prefix.empty() || (text.size() >= prefix.size() && memcmp(text.data(), prefix.data(), prefix.size()) == 0); @@ -59,7 +65,8 @@ inline bool StartsWith(absl::string_view text, absl::string_view prefix) { // EndsWith() // // Returns whether a given string `text` ends with `suffix`. -inline bool EndsWith(absl::string_view text, absl::string_view suffix) { +inline bool EndsWith(absl::string_view text, + absl::string_view suffix) noexcept { return suffix.empty() || (text.size() >= suffix.size() && memcmp(text.data() + (text.size() - suffix.size()), suffix.data(), @@ -70,19 +77,22 @@ inline bool EndsWith(absl::string_view text, absl::string_view suffix) { // // Returns whether given ASCII strings `piece1` and `piece2` are equal, ignoring // case in the comparison. -bool EqualsIgnoreCase(absl::string_view piece1, absl::string_view piece2); +bool EqualsIgnoreCase(absl::string_view piece1, + absl::string_view piece2) noexcept; // StartsWithIgnoreCase() // // Returns whether a given ASCII string `text` starts with `prefix`, // ignoring case in the comparison. -bool StartsWithIgnoreCase(absl::string_view text, absl::string_view prefix); +bool StartsWithIgnoreCase(absl::string_view text, + absl::string_view prefix) noexcept; // EndsWithIgnoreCase() // // Returns whether a given ASCII string `text` ends with `suffix`, ignoring // case in the comparison. -bool EndsWithIgnoreCase(absl::string_view text, absl::string_view suffix); +bool EndsWithIgnoreCase(absl::string_view text, + absl::string_view suffix) noexcept; ABSL_NAMESPACE_END } // namespace absl diff --git a/third_party/abseil-cpp/absl/strings/match_test.cc b/third_party/abseil-cpp/absl/strings/match_test.cc index 4c313dda14..5841bc1b48 100644 --- a/third_party/abseil-cpp/absl/strings/match_test.cc +++ b/third_party/abseil-cpp/absl/strings/match_test.cc @@ -66,6 +66,23 @@ TEST(MatchTest, Contains) { EXPECT_FALSE(absl::StrContains("", "a")); } +TEST(MatchTest, ContainsChar) { + absl::string_view a("abcdefg"); + absl::string_view b("abcd"); + EXPECT_TRUE(absl::StrContains(a, 'a')); + EXPECT_TRUE(absl::StrContains(a, 'b')); + EXPECT_TRUE(absl::StrContains(a, 'e')); + EXPECT_FALSE(absl::StrContains(a, 'h')); + + EXPECT_TRUE(absl::StrContains(b, 'a')); + EXPECT_TRUE(absl::StrContains(b, 'b')); + EXPECT_FALSE(absl::StrContains(b, 'e')); + EXPECT_FALSE(absl::StrContains(b, 'h')); + + EXPECT_FALSE(absl::StrContains("", 'a')); + EXPECT_FALSE(absl::StrContains("", 'a')); +} + TEST(MatchTest, ContainsNull) { const std::string s = "foo"; const char* cs = "foo"; diff --git a/third_party/abseil-cpp/absl/strings/numbers.cc b/third_party/abseil-cpp/absl/strings/numbers.cc index 68c26dd6f8..cbd84c918b 100644 --- a/third_party/abseil-cpp/absl/strings/numbers.cc +++ b/third_party/abseil-cpp/absl/strings/numbers.cc @@ -31,8 +31,8 @@ #include <utility> #include "absl/base/attributes.h" -#include "absl/base/internal/bits.h" #include "absl/base/internal/raw_logging.h" +#include "absl/numeric/bits.h" #include "absl/strings/ascii.h" #include "absl/strings/charconv.h" #include "absl/strings/escaping.h" @@ -46,8 +46,13 @@ ABSL_NAMESPACE_BEGIN bool SimpleAtof(absl::string_view str, float* out) { *out = 0.0; str = StripAsciiWhitespace(str); + // std::from_chars doesn't accept an initial +, but SimpleAtof does, so if one + // is present, skip it, while avoiding accepting "+-0" as valid. if (!str.empty() && str[0] == '+') { str.remove_prefix(1); + if (!str.empty() && str[0] == '-') { + return false; + } } auto result = absl::from_chars(str.data(), str.data() + str.size(), *out); if (result.ec == std::errc::invalid_argument) { @@ -72,8 +77,13 @@ bool SimpleAtof(absl::string_view str, float* out) { bool SimpleAtod(absl::string_view str, double* out) { *out = 0.0; str = StripAsciiWhitespace(str); + // std::from_chars doesn't accept an initial +, but SimpleAtod does, so if one + // is present, skip it, while avoiding accepting "+-0" as valid. if (!str.empty() && str[0] == '+') { str.remove_prefix(1); + if (!str.empty() && str[0] == '-') { + return false; + } } auto result = absl::from_chars(str.data(), str.data() + str.size(), *out); if (result.ec == std::errc::invalid_argument) { @@ -303,7 +313,7 @@ static std::pair<uint64_t, uint64_t> Mul32(std::pair<uint64_t, uint64_t> num, uint64_t bits128_up = (bits96_127 >> 32) + (bits64_127 < bits64_95); if (bits128_up == 0) return {bits64_127, bits0_63}; - int shift = 64 - base_internal::CountLeadingZeros64(bits128_up); + auto shift = static_cast<unsigned>(bit_width(bits128_up)); uint64_t lo = (bits0_63 >> shift) + (bits64_127 << (64 - shift)); uint64_t hi = (bits64_127 >> shift) + (bits128_up << (64 - shift)); return {hi, lo}; @@ -334,7 +344,7 @@ static std::pair<uint64_t, uint64_t> PowFive(uint64_t num, int expfive) { 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5, 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5}; result = Mul32(result, powers_of_five[expfive & 15]); - int shift = base_internal::CountLeadingZeros64(result.first); + int shift = countl_zero(result.first); if (shift != 0) { result.first = (result.first << shift) + (result.second >> (64 - shift)); result.second = (result.second << shift); @@ -495,7 +505,7 @@ size_t numbers_internal::SixDigitsToBuffer(double d, char* const buffer) { *out++ = '-'; d = -d; } - if (std::isinf(d)) { + if (d > std::numeric_limits<double>::max()) { strcpy(out, "inf"); // NOLINT(runtime/printf) return out + 3 - buffer; } @@ -736,9 +746,18 @@ struct LookupTables { X / 35, X / 36, \ } +// This kVmaxOverBase is generated with +// for (int base = 2; base < 37; ++base) { +// absl::uint128 max = std::numeric_limits<absl::uint128>::max(); +// auto result = max / base; +// std::cout << " MakeUint128(" << absl::Uint128High64(result) << "u, " +// << absl::Uint128Low64(result) << "u),\n"; +// } +// See https://godbolt.org/z/aneYsb +// // uint128& operator/=(uint128) is not constexpr, so hardcode the resulting // array to avoid a static initializer. -template <> +template<> const uint128 LookupTables<uint128>::kVmaxOverBase[] = { 0, 0, @@ -779,6 +798,111 @@ const uint128 LookupTables<uint128>::kVmaxOverBase[] = { MakeUint128(512409557603043100u, 8198552921648689607u), }; +// This kVmaxOverBase generated with +// for (int base = 2; base < 37; ++base) { +// absl::int128 max = std::numeric_limits<absl::int128>::max(); +// auto result = max / base; +// std::cout << "\tMakeInt128(" << absl::Int128High64(result) << ", " +// << absl::Int128Low64(result) << "u),\n"; +// } +// See https://godbolt.org/z/7djYWz +// +// int128& operator/=(int128) is not constexpr, so hardcode the resulting array +// to avoid a static initializer. +template<> +const int128 LookupTables<int128>::kVmaxOverBase[] = { + 0, + 0, + MakeInt128(4611686018427387903, 18446744073709551615u), + MakeInt128(3074457345618258602, 12297829382473034410u), + MakeInt128(2305843009213693951, 18446744073709551615u), + MakeInt128(1844674407370955161, 11068046444225730969u), + MakeInt128(1537228672809129301, 6148914691236517205u), + MakeInt128(1317624576693539401, 2635249153387078802u), + MakeInt128(1152921504606846975, 18446744073709551615u), + MakeInt128(1024819115206086200, 16397105843297379214u), + MakeInt128(922337203685477580, 14757395258967641292u), + MakeInt128(838488366986797800, 13415813871788764811u), + MakeInt128(768614336404564650, 12297829382473034410u), + MakeInt128(709490156681136600, 11351842506898185609u), + MakeInt128(658812288346769700, 10540996613548315209u), + MakeInt128(614891469123651720, 9838263505978427528u), + MakeInt128(576460752303423487, 18446744073709551615u), + MakeInt128(542551296285575047, 9765923333140350855u), + MakeInt128(512409557603043100, 8198552921648689607u), + MakeInt128(485440633518672410, 17475862806672206794u), + MakeInt128(461168601842738790, 7378697629483820646u), + MakeInt128(439208192231179800, 7027331075698876806u), + MakeInt128(419244183493398900, 6707906935894382405u), + MakeInt128(401016175515425035, 2406097053092550210u), + MakeInt128(384307168202282325, 6148914691236517205u), + MakeInt128(368934881474191032, 5902958103587056517u), + MakeInt128(354745078340568300, 5675921253449092804u), + MakeInt128(341606371735362066, 17763531330238827482u), + MakeInt128(329406144173384850, 5270498306774157604u), + MakeInt128(318047311615681924, 7633135478776366185u), + MakeInt128(307445734561825860, 4919131752989213764u), + MakeInt128(297528130221121800, 4760450083537948804u), + MakeInt128(288230376151711743, 18446744073709551615u), + MakeInt128(279496122328932600, 4471937957262921603u), + MakeInt128(271275648142787523, 14106333703424951235u), + MakeInt128(263524915338707880, 4216398645419326083u), + MakeInt128(256204778801521550, 4099276460824344803u), +}; + +// This kVminOverBase generated with +// for (int base = 2; base < 37; ++base) { +// absl::int128 min = std::numeric_limits<absl::int128>::min(); +// auto result = min / base; +// std::cout << "\tMakeInt128(" << absl::Int128High64(result) << ", " +// << absl::Int128Low64(result) << "u),\n"; +// } +// +// See https://godbolt.org/z/7djYWz +// +// int128& operator/=(int128) is not constexpr, so hardcode the resulting array +// to avoid a static initializer. +template<> +const int128 LookupTables<int128>::kVminOverBase[] = { + 0, + 0, + MakeInt128(-4611686018427387904, 0u), + MakeInt128(-3074457345618258603, 6148914691236517206u), + MakeInt128(-2305843009213693952, 0u), + MakeInt128(-1844674407370955162, 7378697629483820647u), + MakeInt128(-1537228672809129302, 12297829382473034411u), + MakeInt128(-1317624576693539402, 15811494920322472814u), + MakeInt128(-1152921504606846976, 0u), + MakeInt128(-1024819115206086201, 2049638230412172402u), + MakeInt128(-922337203685477581, 3689348814741910324u), + MakeInt128(-838488366986797801, 5030930201920786805u), + MakeInt128(-768614336404564651, 6148914691236517206u), + MakeInt128(-709490156681136601, 7094901566811366007u), + MakeInt128(-658812288346769701, 7905747460161236407u), + MakeInt128(-614891469123651721, 8608480567731124088u), + MakeInt128(-576460752303423488, 0u), + MakeInt128(-542551296285575048, 8680820740569200761u), + MakeInt128(-512409557603043101, 10248191152060862009u), + MakeInt128(-485440633518672411, 970881267037344822u), + MakeInt128(-461168601842738791, 11068046444225730970u), + MakeInt128(-439208192231179801, 11419412998010674810u), + MakeInt128(-419244183493398901, 11738837137815169211u), + MakeInt128(-401016175515425036, 16040647020617001406u), + MakeInt128(-384307168202282326, 12297829382473034411u), + MakeInt128(-368934881474191033, 12543785970122495099u), + MakeInt128(-354745078340568301, 12770822820260458812u), + MakeInt128(-341606371735362067, 683212743470724134u), + MakeInt128(-329406144173384851, 13176245766935394012u), + MakeInt128(-318047311615681925, 10813608594933185431u), + MakeInt128(-307445734561825861, 13527612320720337852u), + MakeInt128(-297528130221121801, 13686293990171602812u), + MakeInt128(-288230376151711744, 0u), + MakeInt128(-279496122328932601, 13974806116446630013u), + MakeInt128(-271275648142787524, 4340410370284600381u), + MakeInt128(-263524915338707881, 14230345428290225533u), + MakeInt128(-256204778801521551, 14347467612885206813u), +}; + template <typename IntType> const IntType LookupTables<IntType>::kVmaxOverBase[] = X_OVER_BASE_INITIALIZER(std::numeric_limits<IntType>::max()); @@ -948,6 +1072,10 @@ bool safe_strto64_base(absl::string_view text, int64_t* value, int base) { return safe_int_internal<int64_t>(text, value, base); } +bool safe_strto128_base(absl::string_view text, int128* value, int base) { + return safe_int_internal<absl::int128>(text, value, base); +} + bool safe_strtou32_base(absl::string_view text, uint32_t* value, int base) { return safe_uint_internal<uint32_t>(text, value, base); } diff --git a/third_party/abseil-cpp/absl/strings/numbers.h b/third_party/abseil-cpp/absl/strings/numbers.h index d872cca5dc..4ae07c2d60 100644 --- a/third_party/abseil-cpp/absl/strings/numbers.h +++ b/third_party/abseil-cpp/absl/strings/numbers.h @@ -1,4 +1,3 @@ -// // Copyright 2017 The Abseil Authors. // // Licensed under the Apache License, Version 2.0 (the "License"); @@ -37,7 +36,6 @@ #include <type_traits> #include "absl/base/config.h" -#include "absl/base/internal/bits.h" #ifdef __SSE4_2__ // TODO(jorg): Remove this when we figure out the right way // to swap bytes on SSE 4.2 that works with the compilers @@ -48,6 +46,7 @@ #endif #include "absl/base/macros.h" #include "absl/base/port.h" +#include "absl/numeric/bits.h" #include "absl/numeric/int128.h" #include "absl/strings/string_view.h" @@ -97,6 +96,25 @@ ABSL_MUST_USE_RESULT bool SimpleAtod(absl::string_view str, double* out); // unspecified state. ABSL_MUST_USE_RESULT bool SimpleAtob(absl::string_view str, bool* out); +// SimpleHexAtoi() +// +// Converts a hexadecimal string (optionally followed or preceded by ASCII +// whitespace) to an integer, returning `true` if successful. Only valid base-16 +// hexadecimal integers whose value falls within the range of the integer type +// (optionally preceded by a `+` or `-`) can be converted. A valid hexadecimal +// value may include both upper and lowercase character symbols, and may +// optionally include a leading "0x" (or "0X") number prefix, which is ignored +// by this function. If any errors are encountered, this function returns +// `false`, leaving `out` in an unspecified state. +template <typename int_type> +ABSL_MUST_USE_RESULT bool SimpleHexAtoi(absl::string_view str, int_type* out); + +// Overloads of SimpleHexAtoi() for 128 bit integers. +ABSL_MUST_USE_RESULT inline bool SimpleHexAtoi(absl::string_view str, + absl::int128* out); +ABSL_MUST_USE_RESULT inline bool SimpleHexAtoi(absl::string_view str, + absl::uint128* out); + ABSL_NAMESPACE_END } // namespace absl @@ -125,8 +143,11 @@ inline void PutTwoDigits(size_t i, char* buf) { } // safe_strto?() functions for implementing SimpleAtoi() + bool safe_strto32_base(absl::string_view text, int32_t* value, int base); bool safe_strto64_base(absl::string_view text, int64_t* value, int base); +bool safe_strto128_base(absl::string_view text, absl::int128* value, + int base); bool safe_strtou32_base(absl::string_view text, uint32_t* value, int base); bool safe_strtou64_base(absl::string_view text, uint64_t* value, int base); bool safe_strtou128_base(absl::string_view text, absl::uint128* value, @@ -238,28 +259,41 @@ inline size_t FastHexToBufferZeroPad16(uint64_t val, char* out) { } #endif // | 0x1 so that even 0 has 1 digit. - return 16 - absl::base_internal::CountLeadingZeros64(val | 0x1) / 4; + return 16 - countl_zero(val | 0x1) / 4; } } // namespace numbers_internal -// SimpleAtoi() -// -// Converts a string to an integer, using `safe_strto?()` functions for actual -// parsing, returning `true` if successful. The `safe_strto?()` functions apply -// strict checking; the string must be a base-10 integer, optionally followed or -// preceded by ASCII whitespace, with a value in the range of the corresponding -// integer type. template <typename int_type> ABSL_MUST_USE_RESULT bool SimpleAtoi(absl::string_view str, int_type* out) { return numbers_internal::safe_strtoi_base(str, out, 10); } ABSL_MUST_USE_RESULT inline bool SimpleAtoi(absl::string_view str, + absl::int128* out) { + return numbers_internal::safe_strto128_base(str, out, 10); +} + +ABSL_MUST_USE_RESULT inline bool SimpleAtoi(absl::string_view str, absl::uint128* out) { return numbers_internal::safe_strtou128_base(str, out, 10); } +template <typename int_type> +ABSL_MUST_USE_RESULT bool SimpleHexAtoi(absl::string_view str, int_type* out) { + return numbers_internal::safe_strtoi_base(str, out, 16); +} + +ABSL_MUST_USE_RESULT inline bool SimpleHexAtoi(absl::string_view str, + absl::int128* out) { + return numbers_internal::safe_strto128_base(str, out, 16); +} + +ABSL_MUST_USE_RESULT inline bool SimpleHexAtoi(absl::string_view str, + absl::uint128* out) { + return numbers_internal::safe_strtou128_base(str, out, 16); +} + ABSL_NAMESPACE_END } // namespace absl diff --git a/third_party/abseil-cpp/absl/strings/numbers_test.cc b/third_party/abseil-cpp/absl/strings/numbers_test.cc index 68229b15a4..498c210d3b 100644 --- a/third_party/abseil-cpp/absl/strings/numbers_test.cc +++ b/third_party/abseil-cpp/absl/strings/numbers_test.cc @@ -40,11 +40,14 @@ #include "absl/random/distributions.h" #include "absl/random/random.h" #include "absl/strings/internal/numbers_test_common.h" +#include "absl/strings/internal/ostringstream.h" #include "absl/strings/internal/pow10_helper.h" #include "absl/strings/str_cat.h" namespace { +using absl::SimpleAtoi; +using absl::SimpleHexAtoi; using absl::numbers_internal::kSixDigitsToBufferSize; using absl::numbers_internal::safe_strto32_base; using absl::numbers_internal::safe_strto64_base; @@ -54,7 +57,6 @@ using absl::numbers_internal::SixDigitsToBuffer; using absl::strings_internal::Itoa; using absl::strings_internal::strtouint32_test_cases; using absl::strings_internal::strtouint64_test_cases; -using absl::SimpleAtoi; using testing::Eq; using testing::MatchesRegex; @@ -250,7 +252,7 @@ TEST(Numbers, TestFastPrints) { template <typename int_type, typename in_val_type> void VerifySimpleAtoiGood(in_val_type in_value, int_type exp_value) { std::string s; - // uint128 can be streamed but not StrCat'd + // (u)int128 can be streamed but not StrCat'd. absl::strings_internal::OStringStream(&s) << in_value; int_type x = static_cast<int_type>(~exp_value); EXPECT_TRUE(SimpleAtoi(s, &x)) @@ -263,7 +265,9 @@ void VerifySimpleAtoiGood(in_val_type in_value, int_type exp_value) { template <typename int_type, typename in_val_type> void VerifySimpleAtoiBad(in_val_type in_value) { - std::string s = absl::StrCat(in_value); + std::string s; + // (u)int128 can be streamed but not StrCat'd. + absl::strings_internal::OStringStream(&s) << in_value; int_type x; EXPECT_FALSE(SimpleAtoi(s, &x)); EXPECT_FALSE(SimpleAtoi(s.c_str(), &x)); @@ -346,18 +350,71 @@ TEST(NumbersTest, Atoi) { std::numeric_limits<absl::uint128>::max(), std::numeric_limits<absl::uint128>::max()); + // SimpleAtoi(absl::string_view, absl::int128) + VerifySimpleAtoiGood<absl::int128>(0, 0); + VerifySimpleAtoiGood<absl::int128>(42, 42); + VerifySimpleAtoiGood<absl::int128>(-42, -42); + + VerifySimpleAtoiGood<absl::int128>(std::numeric_limits<int32_t>::min(), + std::numeric_limits<int32_t>::min()); + VerifySimpleAtoiGood<absl::int128>(std::numeric_limits<int32_t>::max(), + std::numeric_limits<int32_t>::max()); + VerifySimpleAtoiGood<absl::int128>(std::numeric_limits<uint32_t>::max(), + std::numeric_limits<uint32_t>::max()); + VerifySimpleAtoiGood<absl::int128>(std::numeric_limits<int64_t>::min(), + std::numeric_limits<int64_t>::min()); + VerifySimpleAtoiGood<absl::int128>(std::numeric_limits<int64_t>::max(), + std::numeric_limits<int64_t>::max()); + VerifySimpleAtoiGood<absl::int128>(std::numeric_limits<uint64_t>::max(), + std::numeric_limits<uint64_t>::max()); + VerifySimpleAtoiGood<absl::int128>( + std::numeric_limits<absl::int128>::min(), + std::numeric_limits<absl::int128>::min()); + VerifySimpleAtoiGood<absl::int128>( + std::numeric_limits<absl::int128>::max(), + std::numeric_limits<absl::int128>::max()); + VerifySimpleAtoiBad<absl::int128>(std::numeric_limits<absl::uint128>::max()); + // Some other types VerifySimpleAtoiGood<int>(-42, -42); VerifySimpleAtoiGood<int32_t>(-42, -42); VerifySimpleAtoiGood<uint32_t>(42, 42); VerifySimpleAtoiGood<unsigned int>(42, 42); VerifySimpleAtoiGood<int64_t>(-42, -42); - VerifySimpleAtoiGood<long>(-42, -42); // NOLINT(runtime/int) + VerifySimpleAtoiGood<long>(-42, -42); // NOLINT: runtime-int VerifySimpleAtoiGood<uint64_t>(42, 42); VerifySimpleAtoiGood<size_t>(42, 42); VerifySimpleAtoiGood<std::string::size_type>(42, 42); } +TEST(NumbersTest, Atod) { + double d; + EXPECT_TRUE(absl::SimpleAtod("nan", &d)); + EXPECT_TRUE(std::isnan(d)); +} + +TEST(NumbersTest, Prefixes) { + double d; + EXPECT_FALSE(absl::SimpleAtod("++1", &d)); + EXPECT_FALSE(absl::SimpleAtod("+-1", &d)); + EXPECT_FALSE(absl::SimpleAtod("-+1", &d)); + EXPECT_FALSE(absl::SimpleAtod("--1", &d)); + EXPECT_TRUE(absl::SimpleAtod("-1", &d)); + EXPECT_EQ(d, -1.); + EXPECT_TRUE(absl::SimpleAtod("+1", &d)); + EXPECT_EQ(d, +1.); + + float f; + EXPECT_FALSE(absl::SimpleAtof("++1", &f)); + EXPECT_FALSE(absl::SimpleAtof("+-1", &f)); + EXPECT_FALSE(absl::SimpleAtof("-+1", &f)); + EXPECT_FALSE(absl::SimpleAtof("--1", &f)); + EXPECT_TRUE(absl::SimpleAtof("-1", &f)); + EXPECT_EQ(f, -1.f); + EXPECT_TRUE(absl::SimpleAtof("+1", &f)); + EXPECT_EQ(f, +1.f); +} + TEST(NumbersTest, Atoenum) { enum E01 { E01_zero = 0, @@ -412,6 +469,148 @@ TEST(NumbersTest, Atoenum) { VerifySimpleAtoiGood<E_biguint>(E_biguint_max32, E_biguint_max32); } +template <typename int_type, typename in_val_type> +void VerifySimpleHexAtoiGood(in_val_type in_value, int_type exp_value) { + std::string s; + // uint128 can be streamed but not StrCat'd + absl::strings_internal::OStringStream strm(&s); + if (in_value >= 0) { + strm << std::hex << in_value; + } else { + // Inefficient for small integers, but works with all integral types. + strm << "-" << std::hex << -absl::uint128(in_value); + } + int_type x = static_cast<int_type>(~exp_value); + EXPECT_TRUE(SimpleHexAtoi(s, &x)) + << "in_value=" << std::hex << in_value << " s=" << s << " x=" << x; + EXPECT_EQ(exp_value, x); + x = static_cast<int_type>(~exp_value); + EXPECT_TRUE(SimpleHexAtoi( + s.c_str(), &x)); // NOLINT: readability-redundant-string-conversions + EXPECT_EQ(exp_value, x); +} + +template <typename int_type, typename in_val_type> +void VerifySimpleHexAtoiBad(in_val_type in_value) { + std::string s; + // uint128 can be streamed but not StrCat'd + absl::strings_internal::OStringStream strm(&s); + if (in_value >= 0) { + strm << std::hex << in_value; + } else { + // Inefficient for small integers, but works with all integral types. + strm << "-" << std::hex << -absl::uint128(in_value); + } + int_type x; + EXPECT_FALSE(SimpleHexAtoi(s, &x)); + EXPECT_FALSE(SimpleHexAtoi( + s.c_str(), &x)); // NOLINT: readability-redundant-string-conversions +} + +TEST(NumbersTest, HexAtoi) { + // SimpleHexAtoi(absl::string_view, int32_t) + VerifySimpleHexAtoiGood<int32_t>(0, 0); + VerifySimpleHexAtoiGood<int32_t>(0x42, 0x42); + VerifySimpleHexAtoiGood<int32_t>(-0x42, -0x42); + + VerifySimpleHexAtoiGood<int32_t>(std::numeric_limits<int32_t>::min(), + std::numeric_limits<int32_t>::min()); + VerifySimpleHexAtoiGood<int32_t>(std::numeric_limits<int32_t>::max(), + std::numeric_limits<int32_t>::max()); + + // SimpleHexAtoi(absl::string_view, uint32_t) + VerifySimpleHexAtoiGood<uint32_t>(0, 0); + VerifySimpleHexAtoiGood<uint32_t>(0x42, 0x42); + VerifySimpleHexAtoiBad<uint32_t>(-0x42); + + VerifySimpleHexAtoiBad<uint32_t>(std::numeric_limits<int32_t>::min()); + VerifySimpleHexAtoiGood<uint32_t>(std::numeric_limits<int32_t>::max(), + std::numeric_limits<int32_t>::max()); + VerifySimpleHexAtoiGood<uint32_t>(std::numeric_limits<uint32_t>::max(), + std::numeric_limits<uint32_t>::max()); + VerifySimpleHexAtoiBad<uint32_t>(std::numeric_limits<int64_t>::min()); + VerifySimpleHexAtoiBad<uint32_t>(std::numeric_limits<int64_t>::max()); + VerifySimpleHexAtoiBad<uint32_t>(std::numeric_limits<uint64_t>::max()); + + // SimpleHexAtoi(absl::string_view, int64_t) + VerifySimpleHexAtoiGood<int64_t>(0, 0); + VerifySimpleHexAtoiGood<int64_t>(0x42, 0x42); + VerifySimpleHexAtoiGood<int64_t>(-0x42, -0x42); + + VerifySimpleHexAtoiGood<int64_t>(std::numeric_limits<int32_t>::min(), + std::numeric_limits<int32_t>::min()); + VerifySimpleHexAtoiGood<int64_t>(std::numeric_limits<int32_t>::max(), + std::numeric_limits<int32_t>::max()); + VerifySimpleHexAtoiGood<int64_t>(std::numeric_limits<uint32_t>::max(), + std::numeric_limits<uint32_t>::max()); + VerifySimpleHexAtoiGood<int64_t>(std::numeric_limits<int64_t>::min(), + std::numeric_limits<int64_t>::min()); + VerifySimpleHexAtoiGood<int64_t>(std::numeric_limits<int64_t>::max(), + std::numeric_limits<int64_t>::max()); + VerifySimpleHexAtoiBad<int64_t>(std::numeric_limits<uint64_t>::max()); + + // SimpleHexAtoi(absl::string_view, uint64_t) + VerifySimpleHexAtoiGood<uint64_t>(0, 0); + VerifySimpleHexAtoiGood<uint64_t>(0x42, 0x42); + VerifySimpleHexAtoiBad<uint64_t>(-0x42); + + VerifySimpleHexAtoiBad<uint64_t>(std::numeric_limits<int32_t>::min()); + VerifySimpleHexAtoiGood<uint64_t>(std::numeric_limits<int32_t>::max(), + std::numeric_limits<int32_t>::max()); + VerifySimpleHexAtoiGood<uint64_t>(std::numeric_limits<uint32_t>::max(), + std::numeric_limits<uint32_t>::max()); + VerifySimpleHexAtoiBad<uint64_t>(std::numeric_limits<int64_t>::min()); + VerifySimpleHexAtoiGood<uint64_t>(std::numeric_limits<int64_t>::max(), + std::numeric_limits<int64_t>::max()); + VerifySimpleHexAtoiGood<uint64_t>(std::numeric_limits<uint64_t>::max(), + std::numeric_limits<uint64_t>::max()); + + // SimpleHexAtoi(absl::string_view, absl::uint128) + VerifySimpleHexAtoiGood<absl::uint128>(0, 0); + VerifySimpleHexAtoiGood<absl::uint128>(0x42, 0x42); + VerifySimpleHexAtoiBad<absl::uint128>(-0x42); + + VerifySimpleHexAtoiBad<absl::uint128>(std::numeric_limits<int32_t>::min()); + VerifySimpleHexAtoiGood<absl::uint128>(std::numeric_limits<int32_t>::max(), + std::numeric_limits<int32_t>::max()); + VerifySimpleHexAtoiGood<absl::uint128>(std::numeric_limits<uint32_t>::max(), + std::numeric_limits<uint32_t>::max()); + VerifySimpleHexAtoiBad<absl::uint128>(std::numeric_limits<int64_t>::min()); + VerifySimpleHexAtoiGood<absl::uint128>(std::numeric_limits<int64_t>::max(), + std::numeric_limits<int64_t>::max()); + VerifySimpleHexAtoiGood<absl::uint128>(std::numeric_limits<uint64_t>::max(), + std::numeric_limits<uint64_t>::max()); + VerifySimpleHexAtoiGood<absl::uint128>( + std::numeric_limits<absl::uint128>::max(), + std::numeric_limits<absl::uint128>::max()); + + // Some other types + VerifySimpleHexAtoiGood<int>(-0x42, -0x42); + VerifySimpleHexAtoiGood<int32_t>(-0x42, -0x42); + VerifySimpleHexAtoiGood<uint32_t>(0x42, 0x42); + VerifySimpleHexAtoiGood<unsigned int>(0x42, 0x42); + VerifySimpleHexAtoiGood<int64_t>(-0x42, -0x42); + VerifySimpleHexAtoiGood<long>(-0x42, -0x42); // NOLINT: runtime-int + VerifySimpleHexAtoiGood<uint64_t>(0x42, 0x42); + VerifySimpleHexAtoiGood<size_t>(0x42, 0x42); + VerifySimpleHexAtoiGood<std::string::size_type>(0x42, 0x42); + + // Number prefix + int32_t value; + EXPECT_TRUE(safe_strto32_base("0x34234324", &value, 16)); + EXPECT_EQ(0x34234324, value); + + EXPECT_TRUE(safe_strto32_base("0X34234324", &value, 16)); + EXPECT_EQ(0x34234324, value); + + // ASCII whitespace + EXPECT_TRUE(safe_strto32_base(" \t\n 34234324", &value, 16)); + EXPECT_EQ(0x34234324, value); + + EXPECT_TRUE(safe_strto32_base("34234324 \t\n ", &value, 16)); + EXPECT_EQ(0x34234324, value); +} + TEST(stringtest, safe_strto32_base) { int32_t value; EXPECT_TRUE(safe_strto32_base("0x34234324", &value, 16)); @@ -481,7 +680,7 @@ TEST(stringtest, safe_strto32_base) { EXPECT_TRUE(safe_strto32_base(std::string("0x1234"), &value, 16)); EXPECT_EQ(0x1234, value); - // Base-10 std::string version. + // Base-10 string version. EXPECT_TRUE(safe_strto32_base("1234", &value, 10)); EXPECT_EQ(1234, value); } @@ -622,7 +821,7 @@ TEST(stringtest, safe_strto64_base) { EXPECT_TRUE(safe_strto64_base(std::string("0x1234"), &value, 16)); EXPECT_EQ(0x1234, value); - // Base-10 std::string version. + // Base-10 string version. EXPECT_TRUE(safe_strto64_base("1234", &value, 10)); EXPECT_EQ(1234, value); } @@ -718,6 +917,51 @@ TEST(stringtest, safe_strtou128_random) { EXPECT_FALSE(parse_func(s, &parsed_value, base)); } } +TEST(stringtest, safe_strto128_random) { + // random number generators don't work for int128, and + // int128 can be streamed but not StrCat'd, so this code must be custom + // implemented for int128, but is generally the same as what's above. + // test_random_integer_parse_base<absl::int128>( + // &absl::numbers_internal::safe_strto128_base); + using RandomEngine = std::minstd_rand0; + using IntType = absl::int128; + constexpr auto parse_func = &absl::numbers_internal::safe_strto128_base; + + std::random_device rd; + RandomEngine rng(rd()); + std::uniform_int_distribution<int64_t> random_int64( + std::numeric_limits<int64_t>::min()); + std::uniform_int_distribution<uint64_t> random_uint64( + std::numeric_limits<uint64_t>::min()); + std::uniform_int_distribution<int> random_base(2, 35); + + for (size_t i = 0; i < kNumRandomTests; ++i) { + int64_t high = random_int64(rng); + uint64_t low = random_uint64(rng); + IntType value = absl::MakeInt128(high, low); + + int base = random_base(rng); + std::string str_value; + EXPECT_TRUE(Itoa<IntType>(value, base, &str_value)); + IntType parsed_value; + + // Test successful parse + EXPECT_TRUE(parse_func(str_value, &parsed_value, base)); + EXPECT_EQ(parsed_value, value); + + // Test overflow + std::string s; + absl::strings_internal::OStringStream(&s) + << std::numeric_limits<IntType>::max() << value; + EXPECT_FALSE(parse_func(s, &parsed_value, base)); + + // Test underflow + s.clear(); + absl::strings_internal::OStringStream(&s) + << std::numeric_limits<IntType>::min() << value; + EXPECT_FALSE(parse_func(s, &parsed_value, base)); + } +} TEST(stringtest, safe_strtou32_base) { for (int i = 0; strtouint32_test_cases()[i].str != nullptr; ++i) { diff --git a/third_party/abseil-cpp/absl/strings/str_cat.cc b/third_party/abseil-cpp/absl/strings/str_cat.cc index d9afe2f385..f4a77493a4 100644 --- a/third_party/abseil-cpp/absl/strings/str_cat.cc +++ b/third_party/abseil-cpp/absl/strings/str_cat.cc @@ -141,12 +141,12 @@ namespace strings_internal { std::string CatPieces(std::initializer_list<absl::string_view> pieces) { std::string result; size_t total_size = 0; - for (const absl::string_view piece : pieces) total_size += piece.size(); + for (const absl::string_view& piece : pieces) total_size += piece.size(); strings_internal::STLStringResizeUninitialized(&result, total_size); char* const begin = &result[0]; char* out = begin; - for (const absl::string_view piece : pieces) { + for (const absl::string_view& piece : pieces) { const size_t this_size = piece.size(); if (this_size != 0) { memcpy(out, piece.data(), this_size); @@ -170,15 +170,15 @@ void AppendPieces(std::string* dest, std::initializer_list<absl::string_view> pieces) { size_t old_size = dest->size(); size_t total_size = old_size; - for (const absl::string_view piece : pieces) { + for (const absl::string_view& piece : pieces) { ASSERT_NO_OVERLAP(*dest, piece); total_size += piece.size(); } - strings_internal::STLStringResizeUninitialized(dest, total_size); + strings_internal::STLStringResizeUninitializedAmortized(dest, total_size); char* const begin = &(*dest)[0]; char* out = begin + old_size; - for (const absl::string_view piece : pieces) { + for (const absl::string_view& piece : pieces) { const size_t this_size = piece.size(); if (this_size != 0) { memcpy(out, piece.data(), this_size); @@ -199,7 +199,7 @@ void StrAppend(std::string* dest, const AlphaNum& a, const AlphaNum& b) { ASSERT_NO_OVERLAP(*dest, a); ASSERT_NO_OVERLAP(*dest, b); std::string::size_type old_size = dest->size(); - strings_internal::STLStringResizeUninitialized( + strings_internal::STLStringResizeUninitializedAmortized( dest, old_size + a.size() + b.size()); char* const begin = &(*dest)[0]; char* out = begin + old_size; @@ -214,7 +214,7 @@ void StrAppend(std::string* dest, const AlphaNum& a, const AlphaNum& b, ASSERT_NO_OVERLAP(*dest, b); ASSERT_NO_OVERLAP(*dest, c); std::string::size_type old_size = dest->size(); - strings_internal::STLStringResizeUninitialized( + strings_internal::STLStringResizeUninitializedAmortized( dest, old_size + a.size() + b.size() + c.size()); char* const begin = &(*dest)[0]; char* out = begin + old_size; @@ -231,7 +231,7 @@ void StrAppend(std::string* dest, const AlphaNum& a, const AlphaNum& b, ASSERT_NO_OVERLAP(*dest, c); ASSERT_NO_OVERLAP(*dest, d); std::string::size_type old_size = dest->size(); - strings_internal::STLStringResizeUninitialized( + strings_internal::STLStringResizeUninitializedAmortized( dest, old_size + a.size() + b.size() + c.size() + d.size()); char* const begin = &(*dest)[0]; char* out = begin + old_size; diff --git a/third_party/abseil-cpp/absl/strings/str_cat.h b/third_party/abseil-cpp/absl/strings/str_cat.h index 292fa23597..a8a85c7322 100644 --- a/third_party/abseil-cpp/absl/strings/str_cat.h +++ b/third_party/abseil-cpp/absl/strings/str_cat.h @@ -253,7 +253,7 @@ class AlphaNum { const std::basic_string<char, std::char_traits<char>, Allocator>& str) : piece_(str) {} - // Use std::string literals ":" instead of character literals ':'. + // Use string literals ":" instead of character literals ':'. AlphaNum(char c) = delete; // NOLINT(runtime/explicit) AlphaNum(const AlphaNum&) = delete; diff --git a/third_party/abseil-cpp/absl/strings/str_cat_benchmark.cc b/third_party/abseil-cpp/absl/strings/str_cat_benchmark.cc index 14c63b3fc9..02c4dbe6d8 100644 --- a/third_party/abseil-cpp/absl/strings/str_cat_benchmark.cc +++ b/third_party/abseil-cpp/absl/strings/str_cat_benchmark.cc @@ -23,7 +23,7 @@ namespace { const char kStringOne[] = "Once Upon A Time, "; -const char kStringTwo[] = "There was a std::string benchmark"; +const char kStringTwo[] = "There was a string benchmark"; // We want to include negative numbers in the benchmark, so this function // is used to count 0, 1, -1, 2, -2, 3, -3, ... @@ -137,4 +137,51 @@ void BM_DoubleToString_By_SixDigits(benchmark::State& state) { } BENCHMARK(BM_DoubleToString_By_SixDigits); +template <typename... Chunks> +void BM_StrAppendImpl(benchmark::State& state, size_t total_bytes, + Chunks... chunks) { + for (auto s : state) { + std::string result; + while (result.size() < total_bytes) { + absl::StrAppend(&result, chunks...); + benchmark::DoNotOptimize(result); + } + } +} + +void BM_StrAppend(benchmark::State& state) { + const int total_bytes = state.range(0); + const int chunks_at_a_time = state.range(1); + const absl::string_view kChunk = "0123456789"; + + switch (chunks_at_a_time) { + case 1: + return BM_StrAppendImpl(state, total_bytes, kChunk); + case 2: + return BM_StrAppendImpl(state, total_bytes, kChunk, kChunk); + case 4: + return BM_StrAppendImpl(state, total_bytes, kChunk, kChunk, kChunk, + kChunk); + case 8: + return BM_StrAppendImpl(state, total_bytes, kChunk, kChunk, kChunk, + kChunk, kChunk, kChunk, kChunk, kChunk); + default: + std::abort(); + } +} + +template <typename B> +void StrAppendConfig(B* benchmark) { + for (int bytes : {10, 100, 1000, 10000}) { + for (int chunks : {1, 2, 4, 8}) { + // Only add the ones that divide properly. Otherwise we are over counting. + if (bytes % (10 * chunks) == 0) { + benchmark->Args({bytes, chunks}); + } + } + } +} + +BENCHMARK(BM_StrAppend)->Apply(StrAppendConfig); + } // namespace diff --git a/third_party/abseil-cpp/absl/strings/str_cat_test.cc b/third_party/abseil-cpp/absl/strings/str_cat_test.cc index be39880be7..f3770dc076 100644 --- a/third_party/abseil-cpp/absl/strings/str_cat_test.cc +++ b/third_party/abseil-cpp/absl/strings/str_cat_test.cc @@ -162,7 +162,7 @@ TEST(StrCat, Basics) { EXPECT_EQ(result, "12345678910, 10987654321!"); std::string one = - "1"; // Actually, it's the size of this std::string that we want; a + "1"; // Actually, it's the size of this string that we want; a // 64-bit build distinguishes between size_t and uint64_t, // even though they're both unsigned 64-bit values. result = absl::StrCat("And a ", one.size(), " and a ", @@ -375,7 +375,7 @@ TEST(StrAppend, Basics) { EXPECT_EQ(result.substr(old_size), "12345678910, 10987654321!"); std::string one = - "1"; // Actually, it's the size of this std::string that we want; a + "1"; // Actually, it's the size of this string that we want; a // 64-bit build distinguishes between size_t and uint64_t, // even though they're both unsigned 64-bit values. old_size = result.size(); @@ -463,7 +463,7 @@ TEST(StrAppend, CornerCases) { } TEST(StrAppend, CornerCasesNonEmptyAppend) { - for (std::string result : {"hello", "a std::string too long to fit in the SSO"}) { + for (std::string result : {"hello", "a string too long to fit in the SSO"}) { const std::string expected = result; absl::StrAppend(&result, ""); EXPECT_EQ(result, expected); diff --git a/third_party/abseil-cpp/absl/strings/str_format.h b/third_party/abseil-cpp/absl/strings/str_format.h index 2f9b4b2786..4b05c70c23 100644 --- a/third_party/abseil-cpp/absl/strings/str_format.h +++ b/third_party/abseil-cpp/absl/strings/str_format.h @@ -19,7 +19,7 @@ // // The `str_format` library is a typesafe replacement for the family of // `printf()` string formatting routines within the `<cstdio>` standard library -// header. Like the `printf` family, the `str_format` uses a "format string" to +// header. Like the `printf` family, `str_format` uses a "format string" to // perform argument substitutions based on types. See the `FormatSpec` section // below for format string documentation. // @@ -57,8 +57,7 @@ // arbitrary sink types: // // * A generic `Format()` function to write outputs to arbitrary sink types, -// which must implement a `RawSinkFormat` interface. (See -// `str_format_sink.h` for more information.) +// which must implement a `FormatRawSink` interface. // // * A `FormatUntyped()` function that is similar to `Format()` except it is // loosely typed. `FormatUntyped()` is not a template and does not perform @@ -66,8 +65,7 @@ // boolean from a runtime check. // // In addition, the `str_format` library provides extension points for -// augmenting formatting to new types. These extensions are fully documented -// within the `str_format_extension.h` header file. +// augmenting formatting to new types. See "StrFormat Extensions" below. #ifndef ABSL_STRINGS_STR_FORMAT_H_ #define ABSL_STRINGS_STR_FORMAT_H_ @@ -255,8 +253,8 @@ class FormatCountCapture { // argument, etc. template <typename... Args> -using FormatSpec = - typename str_format_internal::FormatSpecDeductionBarrier<Args...>::type; +using FormatSpec = str_format_internal::FormatSpecTemplate< + str_format_internal::ArgumentToConv<Args>()...>; // ParsedFormat // @@ -283,9 +281,36 @@ using FormatSpec = // } else { // ... error case ... // } + +#if defined(__cpp_nontype_template_parameter_auto) +// If C++17 is available, an 'extended' format is also allowed that can specify +// multiple conversion characters per format argument, using a combination of +// `absl::FormatConversionCharSet` enum values (logically a set union) +// via the `|` operator. (Single character-based arguments are still accepted, +// but cannot be combined). Some common conversions also have predefined enum +// values, such as `absl::FormatConversionCharSet::kIntegral`. +// +// Example: +// // Extended format supports multiple conversion characters per argument, +// // specified via a combination of `FormatConversionCharSet` enums. +// using MyFormat = absl::ParsedFormat<absl::FormatConversionCharSet::d | +// absl::FormatConversionCharSet::x>; +// MyFormat GetFormat(bool use_hex) { +// if (use_hex) return MyFormat("foo %x bar"); +// return MyFormat("foo %d bar"); +// } +// // `format` can be used with any value that supports 'd' and 'x', +// // like `int`. +// auto format = GetFormat(use_hex); +// value = StringF(format, i); +template <auto... Conv> +using ParsedFormat = absl::str_format_internal::ExtendedParsedFormat< + absl::str_format_internal::ToFormatConversionCharSet(Conv)...>; +#else template <char... Conv> using ParsedFormat = str_format_internal::ExtendedParsedFormat< - str_format_internal::ConversionCharToConv(Conv)...>; + absl::str_format_internal::ToFormatConversionCharSet(Conv)...>; +#endif // defined(__cpp_nontype_template_parameter_auto) // StrFormat() // @@ -432,6 +457,16 @@ int SNPrintF(char* output, std::size_t size, const FormatSpec<Args...>& format, // // FormatRawSink is a type erased wrapper around arbitrary sink objects // specifically used as an argument to `Format()`. +// +// All the object has to do define an overload of `AbslFormatFlush()` for the +// sink, usually by adding a ADL-based free function in the same namespace as +// the sink: +// +// void AbslFormatFlush(MySink* dest, absl::string_view part); +// +// where `dest` is the pointer passed to `absl::Format()`. The function should +// append `part` to `dest`. +// // FormatRawSink does not own the passed sink object. The passed object must // outlive the FormatRawSink. class FormatRawSink { @@ -455,12 +490,13 @@ class FormatRawSink { // `absl::FormatRawSink` interface), using a format string and zero or more // additional arguments. // -// By default, `std::string` and `std::ostream` are supported as destination -// objects. If a `std::string` is used the formatted string is appended to it. +// By default, `std::string`, `std::ostream`, and `absl::Cord` are supported as +// destination objects. If a `std::string` is used the formatted string is +// appended to it. // -// `absl::Format()` is a generic version of `absl::StrFormat(), for custom -// sinks. The format string, like format strings for `StrFormat()`, is checked -// at compile-time. +// `absl::Format()` is a generic version of `absl::StrAppendFormat()`, for +// custom sinks. The format string, like format strings for `StrFormat()`, is +// checked at compile-time. // // On failure, this function returns `false` and the state of the sink is // unspecified. @@ -500,8 +536,7 @@ using FormatArg = str_format_internal::FormatArgImpl; // The arguments are provided in an `absl::Span<const absl::FormatArg>`. // Each `absl::FormatArg` object binds to a single argument and keeps a // reference to it. The values used to create the `FormatArg` objects must -// outlive this function call. (See `str_format_arg.h` for information on -// the `FormatArg` class.)_ +// outlive this function call. // // Example: // @@ -531,6 +566,246 @@ ABSL_MUST_USE_RESULT inline bool FormatUntyped( str_format_internal::UntypedFormatSpecImpl::Extract(format), args); } +//------------------------------------------------------------------------------ +// StrFormat Extensions +//------------------------------------------------------------------------------ +// +// AbslFormatConvert() +// +// The StrFormat library provides a customization API for formatting +// user-defined types using absl::StrFormat(). The API relies on detecting an +// overload in the user-defined type's namespace of a free (non-member) +// `AbslFormatConvert()` function, usually as a friend definition with the +// following signature: +// +// absl::FormatConvertResult<...> AbslFormatConvert( +// const X& value, +// const absl::FormatConversionSpec& spec, +// absl::FormatSink *sink); +// +// An `AbslFormatConvert()` overload for a type should only be declared in the +// same file and namespace as said type. +// +// The abstractions within this definition include: +// +// * An `absl::FormatConversionSpec` to specify the fields to pull from a +// user-defined type's format string +// * An `absl::FormatSink` to hold the converted string data during the +// conversion process. +// * An `absl::FormatConvertResult` to hold the status of the returned +// formatting operation +// +// The return type encodes all the conversion characters that your +// AbslFormatConvert() routine accepts. The return value should be {true}. +// A return value of {false} will result in `StrFormat()` returning +// an empty string. This result will be propagated to the result of +// `FormatUntyped`. +// +// Example: +// +// struct Point { +// // To add formatting support to `Point`, we simply need to add a free +// // (non-member) function `AbslFormatConvert()`. This method interprets +// // `spec` to print in the request format. The allowed conversion characters +// // can be restricted via the type of the result, in this example +// // string and integral formatting are allowed (but not, for instance +// // floating point characters like "%f"). You can add such a free function +// // using a friend declaration within the body of the class: +// friend absl::FormatConvertResult<absl::FormatConversionCharSet::kString | +// absl::FormatConversionCharSet::kIntegral> +// AbslFormatConvert(const Point& p, const absl::FormatConversionSpec& spec, +// absl::FormatSink* s) { +// if (spec.conversion_char() == absl::FormatConversionChar::s) { +// s->Append(absl::StrCat("x=", p.x, " y=", p.y)); +// } else { +// s->Append(absl::StrCat(p.x, ",", p.y)); +// } +// return {true}; +// } +// +// int x; +// int y; +// }; + +// clang-format off + +// FormatConversionChar +// +// Specifies the formatting character provided in the format string +// passed to `StrFormat()`. +enum class FormatConversionChar : uint8_t { + c, s, // text + d, i, o, u, x, X, // int + f, F, e, E, g, G, a, A, // float + n, p // misc +}; +// clang-format on + +// FormatConversionSpec +// +// Specifies modifications to the conversion of the format string, through use +// of one or more format flags in the source format string. +class FormatConversionSpec { + public: + // FormatConversionSpec::is_basic() + // + // Indicates that width and precision are not specified, and no additional + // flags are set for this conversion character in the format string. + bool is_basic() const { return impl_.is_basic(); } + + // FormatConversionSpec::has_left_flag() + // + // Indicates whether the result should be left justified for this conversion + // character in the format string. This flag is set through use of a '-' + // character in the format string. E.g. "%-s" + bool has_left_flag() const { return impl_.has_left_flag(); } + + // FormatConversionSpec::has_show_pos_flag() + // + // Indicates whether a sign column is prepended to the result for this + // conversion character in the format string, even if the result is positive. + // This flag is set through use of a '+' character in the format string. + // E.g. "%+d" + bool has_show_pos_flag() const { return impl_.has_show_pos_flag(); } + + // FormatConversionSpec::has_sign_col_flag() + // + // Indicates whether a mandatory sign column is added to the result for this + // conversion character. This flag is set through use of a space character + // (' ') in the format string. E.g. "% i" + bool has_sign_col_flag() const { return impl_.has_sign_col_flag(); } + + // FormatConversionSpec::has_alt_flag() + // + // Indicates whether an "alternate" format is applied to the result for this + // conversion character. Alternative forms depend on the type of conversion + // character, and unallowed alternatives are undefined. This flag is set + // through use of a '#' character in the format string. E.g. "%#h" + bool has_alt_flag() const { return impl_.has_alt_flag(); } + + // FormatConversionSpec::has_zero_flag() + // + // Indicates whether zeroes should be prepended to the result for this + // conversion character instead of spaces. This flag is set through use of the + // '0' character in the format string. E.g. "%0f" + bool has_zero_flag() const { return impl_.has_zero_flag(); } + + // FormatConversionSpec::conversion_char() + // + // Returns the underlying conversion character. + FormatConversionChar conversion_char() const { + return impl_.conversion_char(); + } + + // FormatConversionSpec::width() + // + // Returns the specified width (indicated through use of a non-zero integer + // value or '*' character) of the conversion character. If width is + // unspecified, it returns a negative value. + int width() const { return impl_.width(); } + + // FormatConversionSpec::precision() + // + // Returns the specified precision (through use of the '.' character followed + // by a non-zero integer value or '*' character) of the conversion character. + // If precision is unspecified, it returns a negative value. + int precision() const { return impl_.precision(); } + + private: + explicit FormatConversionSpec( + str_format_internal::FormatConversionSpecImpl impl) + : impl_(impl) {} + + friend str_format_internal::FormatConversionSpecImpl; + + absl::str_format_internal::FormatConversionSpecImpl impl_; +}; + +// Type safe OR operator for FormatConversionCharSet to allow accepting multiple +// conversion chars in custom format converters. +constexpr FormatConversionCharSet operator|(FormatConversionCharSet a, + FormatConversionCharSet b) { + return static_cast<FormatConversionCharSet>(static_cast<uint64_t>(a) | + static_cast<uint64_t>(b)); +} + +// FormatConversionCharSet +// +// Specifies the _accepted_ conversion types as a template parameter to +// FormatConvertResult for custom implementations of `AbslFormatConvert`. +// Note the helper predefined alias definitions (kIntegral, etc.) below. +enum class FormatConversionCharSet : uint64_t { + // text + c = str_format_internal::FormatConversionCharToConvInt('c'), + s = str_format_internal::FormatConversionCharToConvInt('s'), + // integer + d = str_format_internal::FormatConversionCharToConvInt('d'), + i = str_format_internal::FormatConversionCharToConvInt('i'), + o = str_format_internal::FormatConversionCharToConvInt('o'), + u = str_format_internal::FormatConversionCharToConvInt('u'), + x = str_format_internal::FormatConversionCharToConvInt('x'), + X = str_format_internal::FormatConversionCharToConvInt('X'), + // Float + f = str_format_internal::FormatConversionCharToConvInt('f'), + F = str_format_internal::FormatConversionCharToConvInt('F'), + e = str_format_internal::FormatConversionCharToConvInt('e'), + E = str_format_internal::FormatConversionCharToConvInt('E'), + g = str_format_internal::FormatConversionCharToConvInt('g'), + G = str_format_internal::FormatConversionCharToConvInt('G'), + a = str_format_internal::FormatConversionCharToConvInt('a'), + A = str_format_internal::FormatConversionCharToConvInt('A'), + // misc + n = str_format_internal::FormatConversionCharToConvInt('n'), + p = str_format_internal::FormatConversionCharToConvInt('p'), + + // Used for width/precision '*' specification. + kStar = static_cast<uint64_t>( + absl::str_format_internal::FormatConversionCharSetInternal::kStar), + // Some predefined values: + kIntegral = d | i | u | o | x | X, + kFloating = a | e | f | g | A | E | F | G, + kNumeric = kIntegral | kFloating, + kString = s, + kPointer = p, +}; + +// FormatSink +// +// An abstraction to which conversions write their string data. +// +class FormatSink { + public: + // Appends `count` copies of `ch`. + void Append(size_t count, char ch) { sink_->Append(count, ch); } + + void Append(string_view v) { sink_->Append(v); } + + // Appends the first `precision` bytes of `v`. If this is less than + // `width`, spaces will be appended first (if `left` is false), or + // after (if `left` is true) to ensure the total amount appended is + // at least `width`. + bool PutPaddedString(string_view v, int width, int precision, bool left) { + return sink_->PutPaddedString(v, width, precision, left); + } + + private: + friend str_format_internal::FormatSinkImpl; + explicit FormatSink(str_format_internal::FormatSinkImpl* s) : sink_(s) {} + str_format_internal::FormatSinkImpl* sink_; +}; + +// FormatConvertResult +// +// Indicates whether a call to AbslFormatConvert() was successful. +// This return type informs the StrFormat extension framework (through +// ADL but using the return type) of what conversion characters are supported. +// It is strongly discouraged to return {false}, as this will result in an +// empty string in StrFormat. +template <FormatConversionCharSet C> +struct FormatConvertResult { + bool value; +}; + ABSL_NAMESPACE_END } // namespace absl diff --git a/third_party/abseil-cpp/absl/strings/str_format_test.cc b/third_party/abseil-cpp/absl/strings/str_format_test.cc index acbdbf4a23..c60027ad29 100644 --- a/third_party/abseil-cpp/absl/strings/str_format_test.cc +++ b/third_party/abseil-cpp/absl/strings/str_format_test.cc @@ -1,3 +1,18 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/str_format.h" #include <cstdarg> #include <cstdint> @@ -6,7 +21,8 @@ #include "gmock/gmock.h" #include "gtest/gtest.h" -#include "absl/strings/str_format.h" +#include "absl/strings/cord.h" +#include "absl/strings/str_cat.h" #include "absl/strings/string_view.h" namespace absl { @@ -242,7 +258,7 @@ class TempFile { std::FILE* file() const { return file_; } - // Read the file into a std::string. + // Read the file into a string. std::string ReadFile() { std::fseek(file_, 0, SEEK_END); int size = std::ftell(file_); @@ -345,11 +361,12 @@ TEST(StrFormat, BehavesAsDocumented) { EXPECT_EQ(StrFormat("%c", int{'a'}), "a"); EXPECT_EQ(StrFormat("%c", long{'a'}), "a"); // NOLINT EXPECT_EQ(StrFormat("%c", uint64_t{'a'}), "a"); - // "s" - std::string Eg: "C" -> "C", std::string("C++") -> "C++" + // "s" - string Eg: "C" -> "C", std::string("C++") -> "C++" // Formats std::string, char*, string_view, and Cord. EXPECT_EQ(StrFormat("%s", "C"), "C"); EXPECT_EQ(StrFormat("%s", std::string("C++")), "C++"); EXPECT_EQ(StrFormat("%s", string_view("view")), "view"); + EXPECT_EQ(StrFormat("%s", absl::Cord("cord")), "cord"); // Integral Conversion // These format integral types: char, int, long, uint64_t, etc. EXPECT_EQ(StrFormat("%d", char{10}), "10"); @@ -450,7 +467,7 @@ struct SummarizeConsumer { if (conv.precision.is_from_arg()) { *out += "." + std::to_string(conv.precision.get_from_arg()) + "$*"; } - *out += FormatConversionCharToChar(conv.conv); + *out += str_format_internal::FormatConversionCharToChar(conv.conv); *out += "}"; return true; } @@ -532,76 +549,152 @@ TEST_F(ParsedFormatTest, SimpleUncheckedIncorrect) { EXPECT_FALSE((ParsedFormat<'s', 'd', 'g'>::New(format))); } -using str_format_internal::Conv; +#if defined(__cpp_nontype_template_parameter_auto) + +template <auto T> +std::true_type IsValidParsedFormatArgTest(ParsedFormat<T>*); + +template <auto T> +std::false_type IsValidParsedFormatArgTest(...); + +template <auto T> +using IsValidParsedFormatArg = decltype(IsValidParsedFormatArgTest<T>(nullptr)); + +TEST_F(ParsedFormatTest, OnlyValidTypesAllowed) { + ASSERT_TRUE(IsValidParsedFormatArg<'c'>::value); + + ASSERT_TRUE(IsValidParsedFormatArg<FormatConversionCharSet::d>::value); + + ASSERT_TRUE(IsValidParsedFormatArg<absl::FormatConversionCharSet::d | + absl::FormatConversionCharSet::x>::value); + ASSERT_TRUE( + IsValidParsedFormatArg<absl::FormatConversionCharSet::kIntegral>::value); + + // This is an easy mistake to make, however, this will reduce to an integer + // which has no meaning, so we need to ensure it doesn't compile. + ASSERT_FALSE(IsValidParsedFormatArg<'x' | 'd'>::value); + + // For now, we disallow construction based on ConversionChar (rather than + // CharSet) + ASSERT_FALSE(IsValidParsedFormatArg<absl::FormatConversionChar::d>::value); +} + +TEST_F(ParsedFormatTest, ExtendedTyping) { + EXPECT_FALSE(ParsedFormat<FormatConversionCharSet::d>::New("")); + ASSERT_TRUE(ParsedFormat<absl::FormatConversionCharSet::d>::New("%d")); + auto v1 = ParsedFormat<'d', absl::FormatConversionCharSet::s>::New("%d%s"); + ASSERT_TRUE(v1); + auto v2 = ParsedFormat<absl::FormatConversionCharSet::d, 's'>::New("%d%s"); + ASSERT_TRUE(v2); + auto v3 = ParsedFormat<absl::FormatConversionCharSet::d | + absl::FormatConversionCharSet::s, + 's'>::New("%d%s"); + ASSERT_TRUE(v3); + auto v4 = ParsedFormat<absl::FormatConversionCharSet::d | + absl::FormatConversionCharSet::s, + 's'>::New("%s%s"); + ASSERT_TRUE(v4); +} +#endif TEST_F(ParsedFormatTest, UncheckedCorrect) { - auto f = ExtendedParsedFormat<Conv::d>::New("ABC%dDEF"); + auto f = + ExtendedParsedFormat<absl::FormatConversionCharSet::d>::New("ABC%dDEF"); ASSERT_TRUE(f); EXPECT_EQ("[ABC]{d:1$d}[DEF]", SummarizeParsedFormat(*f)); std::string format = "%sFFF%dZZZ%f"; - auto f2 = - ExtendedParsedFormat<Conv::string, Conv::d, Conv::floating>::New(format); + auto f2 = ExtendedParsedFormat< + absl::FormatConversionCharSet::kString, absl::FormatConversionCharSet::d, + absl::FormatConversionCharSet::kFloating>::New(format); ASSERT_TRUE(f2); EXPECT_EQ("{s:1$s}[FFF]{d:2$d}[ZZZ]{f:3$f}", SummarizeParsedFormat(*f2)); - f2 = ExtendedParsedFormat<Conv::string, Conv::d, Conv::floating>::New( - "%s %d %f"); + f2 = ExtendedParsedFormat< + absl::FormatConversionCharSet::kString, absl::FormatConversionCharSet::d, + absl::FormatConversionCharSet::kFloating>::New("%s %d %f"); ASSERT_TRUE(f2); EXPECT_EQ("{s:1$s}[ ]{d:2$d}[ ]{f:3$f}", SummarizeParsedFormat(*f2)); - auto star = ExtendedParsedFormat<Conv::star, Conv::d>::New("%*d"); + auto star = + ExtendedParsedFormat<absl::FormatConversionCharSet::kStar, + absl::FormatConversionCharSet::d>::New("%*d"); ASSERT_TRUE(star); EXPECT_EQ("{*d:2$1$*d}", SummarizeParsedFormat(*star)); - auto dollar = ExtendedParsedFormat<Conv::d, Conv::s>::New("%2$s %1$d"); + auto dollar = + ExtendedParsedFormat<absl::FormatConversionCharSet::d, + absl::FormatConversionCharSet::s>::New("%2$s %1$d"); ASSERT_TRUE(dollar); EXPECT_EQ("{2$s:2$s}[ ]{1$d:1$d}", SummarizeParsedFormat(*dollar)); // with reuse - dollar = ExtendedParsedFormat<Conv::d, Conv::s>::New("%2$s %1$d %1$d"); + dollar = ExtendedParsedFormat< + absl::FormatConversionCharSet::d, + absl::FormatConversionCharSet::s>::New("%2$s %1$d %1$d"); ASSERT_TRUE(dollar); EXPECT_EQ("{2$s:2$s}[ ]{1$d:1$d}[ ]{1$d:1$d}", SummarizeParsedFormat(*dollar)); } TEST_F(ParsedFormatTest, UncheckedIgnoredArgs) { - EXPECT_FALSE((ExtendedParsedFormat<Conv::d, Conv::s>::New("ABC"))); - EXPECT_FALSE((ExtendedParsedFormat<Conv::d, Conv::s>::New("%dABC"))); - EXPECT_FALSE((ExtendedParsedFormat<Conv::d, Conv::s>::New("ABC%2$s"))); - auto f = ExtendedParsedFormat<Conv::d, Conv::s>::NewAllowIgnored("ABC"); + EXPECT_FALSE( + (ExtendedParsedFormat<absl::FormatConversionCharSet::d, + absl::FormatConversionCharSet::s>::New("ABC"))); + EXPECT_FALSE( + (ExtendedParsedFormat<absl::FormatConversionCharSet::d, + absl::FormatConversionCharSet::s>::New("%dABC"))); + EXPECT_FALSE( + (ExtendedParsedFormat<absl::FormatConversionCharSet::d, + absl::FormatConversionCharSet::s>::New("ABC%2$s"))); + auto f = ExtendedParsedFormat< + absl::FormatConversionCharSet::d, + absl::FormatConversionCharSet::s>::NewAllowIgnored("ABC"); ASSERT_TRUE(f); EXPECT_EQ("[ABC]", SummarizeParsedFormat(*f)); - f = ExtendedParsedFormat<Conv::d, Conv::s>::NewAllowIgnored("%dABC"); + f = ExtendedParsedFormat< + absl::FormatConversionCharSet::d, + absl::FormatConversionCharSet::s>::NewAllowIgnored("%dABC"); ASSERT_TRUE(f); EXPECT_EQ("{d:1$d}[ABC]", SummarizeParsedFormat(*f)); - f = ExtendedParsedFormat<Conv::d, Conv::s>::NewAllowIgnored("ABC%2$s"); + f = ExtendedParsedFormat< + absl::FormatConversionCharSet::d, + absl::FormatConversionCharSet::s>::NewAllowIgnored("ABC%2$s"); ASSERT_TRUE(f); EXPECT_EQ("[ABC]{2$s:2$s}", SummarizeParsedFormat(*f)); } TEST_F(ParsedFormatTest, UncheckedMultipleTypes) { - auto dx = ExtendedParsedFormat<Conv::d | Conv::x>::New("%1$d %1$x"); + auto dx = + ExtendedParsedFormat<absl::FormatConversionCharSet::d | + absl::FormatConversionCharSet::x>::New("%1$d %1$x"); EXPECT_TRUE(dx); EXPECT_EQ("{1$d:1$d}[ ]{1$x:1$x}", SummarizeParsedFormat(*dx)); - dx = ExtendedParsedFormat<Conv::d | Conv::x>::New("%1$d"); + dx = ExtendedParsedFormat<absl::FormatConversionCharSet::d | + absl::FormatConversionCharSet::x>::New("%1$d"); EXPECT_TRUE(dx); EXPECT_EQ("{1$d:1$d}", SummarizeParsedFormat(*dx)); } TEST_F(ParsedFormatTest, UncheckedIncorrect) { - EXPECT_FALSE(ExtendedParsedFormat<Conv::d>::New("")); + EXPECT_FALSE(ExtendedParsedFormat<absl::FormatConversionCharSet::d>::New("")); - EXPECT_FALSE(ExtendedParsedFormat<Conv::d>::New("ABC%dDEF%d")); + EXPECT_FALSE(ExtendedParsedFormat<absl::FormatConversionCharSet::d>::New( + "ABC%dDEF%d")); std::string format = "%sFFF%dZZZ%f"; - EXPECT_FALSE((ExtendedParsedFormat<Conv::s, Conv::d, Conv::g>::New(format))); + EXPECT_FALSE( + (ExtendedParsedFormat<absl::FormatConversionCharSet::s, + absl::FormatConversionCharSet::d, + absl::FormatConversionCharSet::g>::New(format))); } TEST_F(ParsedFormatTest, RegressionMixPositional) { - EXPECT_FALSE((ExtendedParsedFormat<Conv::d, Conv::o>::New("%1$d %o"))); + EXPECT_FALSE( + (ExtendedParsedFormat<absl::FormatConversionCharSet::d, + absl::FormatConversionCharSet::o>::New("%1$d %o"))); } using FormatWrapperTest = ::testing::Test; @@ -626,6 +719,38 @@ TEST_F(FormatWrapperTest, ParsedFormat) { ABSL_NAMESPACE_END } // namespace absl +using FormatExtensionTest = ::testing::Test; + +struct Point { + friend absl::FormatConvertResult<absl::FormatConversionCharSet::kString | + absl::FormatConversionCharSet::kIntegral> + AbslFormatConvert(const Point& p, const absl::FormatConversionSpec& spec, + absl::FormatSink* s) { + if (spec.conversion_char() == absl::FormatConversionChar::s) { + s->Append(absl::StrCat("x=", p.x, " y=", p.y)); + } else { + s->Append(absl::StrCat(p.x, ",", p.y)); + } + return {true}; + } + + int x = 10; + int y = 20; +}; + +TEST_F(FormatExtensionTest, AbslFormatConvertExample) { + Point p; + EXPECT_EQ(absl::StrFormat("a %s z", p), "a x=10 y=20 z"); + EXPECT_EQ(absl::StrFormat("a %d z", p), "a 10,20 z"); + + // Typed formatting will fail to compile an invalid format. + // StrFormat("%f", p); // Does not compile. + std::string actual; + absl::UntypedFormatSpec f1("%f"); + // FormatUntyped will return false for bad character. + EXPECT_FALSE(absl::FormatUntyped(&actual, f1, {absl::FormatArg(p)})); +} + // Some codegen thunks that we can use to easily dump the generated assembly for // different StrFormat calls. diff --git a/third_party/abseil-cpp/absl/strings/str_join.h b/third_party/abseil-cpp/absl/strings/str_join.h index ae5731a42b..33534536cf 100644 --- a/third_party/abseil-cpp/absl/strings/str_join.h +++ b/third_party/abseil-cpp/absl/strings/str_join.h @@ -144,7 +144,7 @@ strings_internal::DereferenceFormatterImpl<Formatter> DereferenceFormatter( std::forward<Formatter>(f)); } -// Function overload of `DererefenceFormatter()` for using a default +// Function overload of `DereferenceFormatter()` for using a default // `AlphaNumFormatter()`. inline strings_internal::DereferenceFormatterImpl< strings_internal::AlphaNumFormatterImpl> diff --git a/third_party/abseil-cpp/absl/strings/str_join_test.cc b/third_party/abseil-cpp/absl/strings/str_join_test.cc index 921d9c2bfa..2be6256e43 100644 --- a/third_party/abseil-cpp/absl/strings/str_join_test.cc +++ b/third_party/abseil-cpp/absl/strings/str_join_test.cc @@ -134,26 +134,26 @@ TEST(StrJoin, APIExamples) { // { - // Empty range yields an empty std::string. + // Empty range yields an empty string. std::vector<std::string> v; EXPECT_EQ("", absl::StrJoin(v, "-")); } { - // A range of 1 element gives a std::string with that element but no + // A range of 1 element gives a string with that element but no // separator. std::vector<std::string> v = {"foo"}; EXPECT_EQ("foo", absl::StrJoin(v, "-")); } { - // A range with a single empty std::string element + // A range with a single empty string element std::vector<std::string> v = {""}; EXPECT_EQ("", absl::StrJoin(v, "-")); } { - // A range with 2 elements, one of which is an empty std::string + // A range with 2 elements, one of which is an empty string std::vector<std::string> v = {"a", ""}; EXPECT_EQ("a-", absl::StrJoin(v, "-")); } diff --git a/third_party/abseil-cpp/absl/strings/str_replace_benchmark.cc b/third_party/abseil-cpp/absl/strings/str_replace_benchmark.cc index 95b2dc105e..01331da29f 100644 --- a/third_party/abseil-cpp/absl/strings/str_replace_benchmark.cc +++ b/third_party/abseil-cpp/absl/strings/str_replace_benchmark.cc @@ -62,7 +62,7 @@ void SetUpStrings() { } } // big_string->resize(50); - // OK, we've set up the std::string, now let's set up expectations - first by + // OK, we've set up the string, now let's set up expectations - first by // just replacing "the" with "box" after_replacing_the = new std::string(*big_string); for (size_t pos = 0; diff --git a/third_party/abseil-cpp/absl/strings/str_replace_test.cc b/third_party/abseil-cpp/absl/strings/str_replace_test.cc index 1ca23aff55..9d8c7f75b5 100644 --- a/third_party/abseil-cpp/absl/strings/str_replace_test.cc +++ b/third_party/abseil-cpp/absl/strings/str_replace_test.cc @@ -25,7 +25,7 @@ TEST(StrReplaceAll, OneReplacement) { std::string s; - // Empty std::string. + // Empty string. s = absl::StrReplaceAll(s, {{"", ""}}); EXPECT_EQ(s, ""); s = absl::StrReplaceAll(s, {{"x", ""}}); @@ -47,7 +47,7 @@ TEST(StrReplaceAll, OneReplacement) { s = absl::StrReplaceAll("abc", {{"xyz", "123"}}); EXPECT_EQ(s, "abc"); - // Replace entire std::string. + // Replace entire string. s = absl::StrReplaceAll("abc", {{"abc", "xyz"}}); EXPECT_EQ(s, "xyz"); @@ -88,7 +88,7 @@ TEST(StrReplaceAll, OneReplacement) { TEST(StrReplaceAll, ManyReplacements) { std::string s; - // Empty std::string. + // Empty string. s = absl::StrReplaceAll("", {{"", ""}, {"x", ""}, {"", "y"}, {"x", "y"}}); EXPECT_EQ(s, ""); @@ -96,7 +96,7 @@ TEST(StrReplaceAll, ManyReplacements) { s = absl::StrReplaceAll("abc", {{"", ""}, {"", "y"}, {"x", ""}}); EXPECT_EQ(s, "abc"); - // Replace entire std::string, one char at a time + // Replace entire string, one char at a time s = absl::StrReplaceAll("abc", {{"a", "x"}, {"b", "y"}, {"c", "z"}}); EXPECT_EQ(s, "xyz"); s = absl::StrReplaceAll("zxy", {{"z", "x"}, {"x", "y"}, {"y", "z"}}); @@ -264,7 +264,7 @@ TEST(StrReplaceAll, Inplace) { std::string s; int reps; - // Empty std::string. + // Empty string. s = ""; reps = absl::StrReplaceAll({{"", ""}, {"x", ""}, {"", "y"}, {"x", "y"}}, &s); EXPECT_EQ(reps, 0); @@ -276,7 +276,7 @@ TEST(StrReplaceAll, Inplace) { EXPECT_EQ(reps, 0); EXPECT_EQ(s, "abc"); - // Replace entire std::string, one char at a time + // Replace entire string, one char at a time s = "abc"; reps = absl::StrReplaceAll({{"a", "x"}, {"b", "y"}, {"c", "z"}}, &s); EXPECT_EQ(reps, 3); diff --git a/third_party/abseil-cpp/absl/strings/str_split.cc b/third_party/abseil-cpp/absl/strings/str_split.cc index d0f8666910..e08c26b6bb 100644 --- a/third_party/abseil-cpp/absl/strings/str_split.cc +++ b/third_party/abseil-cpp/absl/strings/str_split.cc @@ -42,7 +42,7 @@ absl::string_view GenericFind(absl::string_view text, absl::string_view delimiter, size_t pos, FindPolicy find_policy) { if (delimiter.empty() && text.length() > 0) { - // Special case for empty std::string delimiters: always return a zero-length + // Special case for empty string delimiters: always return a zero-length // absl::string_view referring to the item at position 1 past pos. return absl::string_view(text.data() + pos + 1, 0); } @@ -127,7 +127,7 @@ absl::string_view ByLength::Find(absl::string_view text, size_t pos) const { pos = std::min(pos, text.size()); // truncate `pos` absl::string_view substr = text.substr(pos); - // If the std::string is shorter than the chunk size we say we + // If the string is shorter than the chunk size we say we // "can't find the delimiter" so this will be the last chunk. if (substr.length() <= static_cast<size_t>(length_)) return absl::string_view(text.data() + text.size(), 0); diff --git a/third_party/abseil-cpp/absl/strings/str_split.h b/third_party/abseil-cpp/absl/strings/str_split.h index a79cd4a0ce..bfbca422a8 100644 --- a/third_party/abseil-cpp/absl/strings/str_split.h +++ b/third_party/abseil-cpp/absl/strings/str_split.h @@ -44,6 +44,7 @@ #include <vector> #include "absl/base/internal/raw_logging.h" +#include "absl/base/macros.h" #include "absl/strings/internal/str_split_internal.h" #include "absl/strings/string_view.h" #include "absl/strings/strip.h" @@ -368,6 +369,12 @@ struct SkipWhitespace { } }; +template <typename T> +using EnableSplitIfString = + typename std::enable_if<std::is_same<T, std::string>::value || + std::is_same<T, const std::string>::value, + int>::type; + //------------------------------------------------------------------------------ // StrSplit() //------------------------------------------------------------------------------ @@ -488,22 +495,50 @@ struct SkipWhitespace { // Try not to depend on this distinction because the bug may one day be fixed. template <typename Delimiter> strings_internal::Splitter< - typename strings_internal::SelectDelimiter<Delimiter>::type, AllowEmpty> + typename strings_internal::SelectDelimiter<Delimiter>::type, AllowEmpty, + absl::string_view> StrSplit(strings_internal::ConvertibleToStringView text, Delimiter d) { using DelimiterType = typename strings_internal::SelectDelimiter<Delimiter>::type; - return strings_internal::Splitter<DelimiterType, AllowEmpty>( + return strings_internal::Splitter<DelimiterType, AllowEmpty, + absl::string_view>( + text.value(), DelimiterType(d), AllowEmpty()); +} + +template <typename Delimiter, typename StringType, + EnableSplitIfString<StringType> = 0> +strings_internal::Splitter< + typename strings_internal::SelectDelimiter<Delimiter>::type, AllowEmpty, + std::string> +StrSplit(StringType&& text, Delimiter d) { + using DelimiterType = + typename strings_internal::SelectDelimiter<Delimiter>::type; + return strings_internal::Splitter<DelimiterType, AllowEmpty, std::string>( std::move(text), DelimiterType(d), AllowEmpty()); } template <typename Delimiter, typename Predicate> strings_internal::Splitter< - typename strings_internal::SelectDelimiter<Delimiter>::type, Predicate> + typename strings_internal::SelectDelimiter<Delimiter>::type, Predicate, + absl::string_view> StrSplit(strings_internal::ConvertibleToStringView text, Delimiter d, Predicate p) { using DelimiterType = typename strings_internal::SelectDelimiter<Delimiter>::type; - return strings_internal::Splitter<DelimiterType, Predicate>( + return strings_internal::Splitter<DelimiterType, Predicate, + absl::string_view>( + text.value(), DelimiterType(d), std::move(p)); +} + +template <typename Delimiter, typename Predicate, typename StringType, + EnableSplitIfString<StringType> = 0> +strings_internal::Splitter< + typename strings_internal::SelectDelimiter<Delimiter>::type, Predicate, + std::string> +StrSplit(StringType&& text, Delimiter d, Predicate p) { + using DelimiterType = + typename strings_internal::SelectDelimiter<Delimiter>::type; + return strings_internal::Splitter<DelimiterType, Predicate, std::string>( std::move(text), DelimiterType(d), std::move(p)); } diff --git a/third_party/abseil-cpp/absl/strings/str_split_test.cc b/third_party/abseil-cpp/absl/strings/str_split_test.cc index 02f27bc414..1b4427b849 100644 --- a/third_party/abseil-cpp/absl/strings/str_split_test.cc +++ b/third_party/abseil-cpp/absl/strings/str_split_test.cc @@ -27,8 +27,12 @@ #include "gmock/gmock.h" #include "gtest/gtest.h" -#include "absl/base/dynamic_annotations.h" // for RunningOnValgrind +#include "absl/base/dynamic_annotations.h" #include "absl/base/macros.h" +#include "absl/container/btree_map.h" +#include "absl/container/btree_set.h" +#include "absl/container/flat_hash_map.h" +#include "absl/container/node_hash_map.h" #include "absl/strings/numbers.h" namespace { @@ -71,7 +75,7 @@ TEST(Split, TraitsTest) { // namespaces just like callers will need to use. TEST(Split, APIExamples) { { - // Passes std::string delimiter. Assumes the default of ByString. + // Passes string delimiter. Assumes the default of ByString. std::vector<std::string> v = absl::StrSplit("a,b,c", ","); // NOLINT EXPECT_THAT(v, ElementsAre("a", "b", "c")); @@ -97,7 +101,7 @@ TEST(Split, APIExamples) { } { - // Uses the Literal std::string "=>" as the delimiter. + // Uses the Literal string "=>" as the delimiter. const std::vector<std::string> v = absl::StrSplit("a=>b=>c", "=>"); EXPECT_THAT(v, ElementsAre("a", "b", "c")); } @@ -121,17 +125,17 @@ TEST(Split, APIExamples) { } { - // Splits the input std::string into individual characters by using an empty - // std::string as the delimiter. + // Splits the input string into individual characters by using an empty + // string as the delimiter. std::vector<std::string> v = absl::StrSplit("abc", ""); EXPECT_THAT(v, ElementsAre("a", "b", "c")); } { - // Splits std::string data with embedded NUL characters, using NUL as the + // Splits string data with embedded NUL characters, using NUL as the // delimiter. A simple delimiter of "\0" doesn't work because strlen() will - // say that's the empty std::string when constructing the absl::string_view - // delimiter. Instead, a non-empty std::string containing NUL can be used as the + // say that's the empty string when constructing the absl::string_view + // delimiter. Instead, a non-empty string containing NUL can be used as the // delimiter. std::string embedded_nulls("a\0b\0c", 5); std::string null_delim("\0", 1); @@ -365,7 +369,7 @@ TEST(SplitIterator, EqualityAsEndCondition) { TEST(Splitter, RangeIterators) { auto splitter = absl::StrSplit("a,b,c", ','); std::vector<absl::string_view> output; - for (const absl::string_view p : splitter) { + for (const absl::string_view& p : splitter) { output.push_back(p); } EXPECT_THAT(output, ElementsAre("a", "b", "c")); @@ -403,6 +407,10 @@ TEST(Splitter, ConversionOperator) { TestConversionOperator<std::set<std::string>>(splitter); TestConversionOperator<std::multiset<absl::string_view>>(splitter); TestConversionOperator<std::multiset<std::string>>(splitter); + TestConversionOperator<absl::btree_set<absl::string_view>>(splitter); + TestConversionOperator<absl::btree_set<std::string>>(splitter); + TestConversionOperator<absl::btree_multiset<absl::string_view>>(splitter); + TestConversionOperator<absl::btree_multiset<std::string>>(splitter); TestConversionOperator<std::unordered_set<std::string>>(splitter); // Tests conversion to map-like objects. @@ -419,8 +427,36 @@ TEST(Splitter, ConversionOperator) { TestMapConversionOperator<std::multimap<std::string, absl::string_view>>( splitter); TestMapConversionOperator<std::multimap<std::string, std::string>>(splitter); + TestMapConversionOperator< + absl::btree_map<absl::string_view, absl::string_view>>(splitter); + TestMapConversionOperator<absl::btree_map<absl::string_view, std::string>>( + splitter); + TestMapConversionOperator<absl::btree_map<std::string, absl::string_view>>( + splitter); + TestMapConversionOperator<absl::btree_map<std::string, std::string>>( + splitter); + TestMapConversionOperator< + absl::btree_multimap<absl::string_view, absl::string_view>>(splitter); + TestMapConversionOperator< + absl::btree_multimap<absl::string_view, std::string>>(splitter); + TestMapConversionOperator< + absl::btree_multimap<std::string, absl::string_view>>(splitter); + TestMapConversionOperator<absl::btree_multimap<std::string, std::string>>( + splitter); TestMapConversionOperator<std::unordered_map<std::string, std::string>>( splitter); + TestMapConversionOperator< + absl::node_hash_map<absl::string_view, absl::string_view>>(splitter); + TestMapConversionOperator< + absl::node_hash_map<absl::string_view, std::string>>(splitter); + TestMapConversionOperator< + absl::node_hash_map<std::string, absl::string_view>>(splitter); + TestMapConversionOperator< + absl::flat_hash_map<absl::string_view, absl::string_view>>(splitter); + TestMapConversionOperator< + absl::flat_hash_map<absl::string_view, std::string>>(splitter); + TestMapConversionOperator< + absl::flat_hash_map<std::string, absl::string_view>>(splitter); // Tests conversion to std::pair @@ -436,7 +472,7 @@ TEST(Splitter, ConversionOperator) { // less-than, equal-to, and more-than 2 strings. TEST(Splitter, ToPair) { { - // Empty std::string + // Empty string std::pair<std::string, std::string> p = absl::StrSplit("", ','); EXPECT_EQ("", p.first); EXPECT_EQ("", p.second); @@ -565,7 +601,7 @@ TEST(Split, AcceptsCertainTemporaries) { TEST(Split, Temporary) { // Use a std::string longer than the SSO length, so that when the temporary is - // destroyed, if the splitter keeps a reference to the std::string's contents, + // destroyed, if the splitter keeps a reference to the string's contents, // it'll reference freed memory instead of just dead on-stack memory. const char input[] = "a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u"; EXPECT_LT(sizeof(std::string), ABSL_ARRAYSIZE(input)) @@ -651,14 +687,14 @@ TEST(Split, UTF8) { // Tests splitting utf8 strings and utf8 delimiters. std::string utf8_string = u8"\u03BA\u1F79\u03C3\u03BC\u03B5"; { - // A utf8 input std::string with an ascii delimiter. + // A utf8 input string with an ascii delimiter. std::string to_split = "a," + utf8_string; std::vector<absl::string_view> v = absl::StrSplit(to_split, ','); EXPECT_THAT(v, ElementsAre("a", utf8_string)); } { - // A utf8 input std::string and a utf8 delimiter. + // A utf8 input string and a utf8 delimiter. std::string to_split = "a," + utf8_string + ",b"; std::string unicode_delimiter = "," + utf8_string + ","; std::vector<absl::string_view> v = @@ -667,7 +703,7 @@ TEST(Split, UTF8) { } { - // A utf8 input std::string and ByAnyChar with ascii chars. + // A utf8 input string and ByAnyChar with ascii chars. std::vector<absl::string_view> v = absl::StrSplit(u8"Foo h\u00E4llo th\u4E1Ere", absl::ByAnyChar(" \t")); EXPECT_THAT(v, ElementsAre("Foo", u8"h\u00E4llo", u8"th\u4E1Ere")); @@ -814,10 +850,10 @@ TEST(Delimiter, ByString) { ByString comma_string(","); TestComma(comma_string); - // The first occurrence of empty std::string ("") in a std::string is at position 0. + // The first occurrence of empty string ("") in a string is at position 0. // There is a test below that demonstrates this for absl::string_view::find(). // If the ByString delimiter returned position 0 for this, there would - // be an infinite loop in the SplitIterator code. To avoid this, empty std::string + // be an infinite loop in the SplitIterator code. To avoid this, empty string // is a special case in that it always returns the item at position 1. absl::string_view abc("abc"); EXPECT_EQ(0, abc.find("")); // "" is found at position 0 @@ -876,7 +912,7 @@ TEST(Delimiter, ByAnyChar) { EXPECT_FALSE(IsFoundAt("=", two_delims, -1)); // ByAnyChar behaves just like ByString when given a delimiter of empty - // std::string. That is, it always returns a zero-length absl::string_view + // string. That is, it always returns a zero-length absl::string_view // referring to the item at position 1, not position 0. ByAnyChar empty(""); EXPECT_FALSE(IsFoundAt("", empty, 0)); @@ -907,13 +943,19 @@ TEST(Delimiter, ByLength) { } TEST(Split, WorksWithLargeStrings) { +#if defined(ABSL_HAVE_ADDRESS_SANITIZER) || \ + defined(ABSL_HAVE_MEMORY_SANITIZER) || defined(ABSL_HAVE_THREAD_SANITIZER) + constexpr size_t kSize = (uint32_t{1} << 26) + 1; // 64M + 1 byte +#else + constexpr size_t kSize = (uint32_t{1} << 31) + 1; // 2G + 1 byte +#endif if (sizeof(size_t) > 4) { - std::string s((uint32_t{1} << 31) + 1, 'x'); // 2G + 1 byte + std::string s(kSize, 'x'); s.back() = '-'; std::vector<absl::string_view> v = absl::StrSplit(s, '-'); EXPECT_EQ(2, v.size()); // The first element will contain 2G of 'x's. - // testing::StartsWith is too slow with a 2G std::string. + // testing::StartsWith is too slow with a 2G string. EXPECT_EQ('x', v[0][0]); EXPECT_EQ('x', v[0][1]); EXPECT_EQ('x', v[0][3]); diff --git a/third_party/abseil-cpp/absl/strings/string_view.cc b/third_party/abseil-cpp/absl/strings/string_view.cc index c5f5de936d..d596e08cde 100644 --- a/third_party/abseil-cpp/absl/strings/string_view.cc +++ b/third_party/abseil-cpp/absl/strings/string_view.cc @@ -78,8 +78,8 @@ std::ostream& operator<<(std::ostream& o, string_view piece) { return o; } -string_view::size_type string_view::find(string_view s, size_type pos) const - noexcept { +string_view::size_type string_view::find(string_view s, + size_type pos) const noexcept { if (empty() || pos > length_) { if (empty() && pos == 0 && s.empty()) return 0; return npos; @@ -98,8 +98,8 @@ string_view::size_type string_view::find(char c, size_type pos) const noexcept { return result != nullptr ? result - ptr_ : npos; } -string_view::size_type string_view::rfind(string_view s, size_type pos) const - noexcept { +string_view::size_type string_view::rfind(string_view s, + size_type pos) const noexcept { if (length_ < s.length_) return npos; if (s.empty()) return std::min(length_, pos); const char* last = ptr_ + std::min(length_ - s.length_, pos) + s.length_; @@ -108,8 +108,8 @@ string_view::size_type string_view::rfind(string_view s, size_type pos) const } // Search range is [0..pos] inclusive. If pos == npos, search everything. -string_view::size_type string_view::rfind(char c, size_type pos) const - noexcept { +string_view::size_type string_view::rfind(char c, + size_type pos) const noexcept { // Note: memrchr() is not available on Windows. if (empty()) return npos; for (size_type i = std::min(pos, length_ - 1);; --i) { @@ -121,9 +121,8 @@ string_view::size_type string_view::rfind(char c, size_type pos) const return npos; } -string_view::size_type string_view::find_first_of(string_view s, - size_type pos) const - noexcept { +string_view::size_type string_view::find_first_of( + string_view s, size_type pos) const noexcept { if (empty() || s.empty()) { return npos; } @@ -138,9 +137,8 @@ string_view::size_type string_view::find_first_of(string_view s, return npos; } -string_view::size_type string_view::find_first_not_of(string_view s, - size_type pos) const - noexcept { +string_view::size_type string_view::find_first_not_of( + string_view s, size_type pos) const noexcept { if (empty()) return npos; // Avoid the cost of LookupTable() for a single-character search. if (s.length_ == 1) return find_first_not_of(s.ptr_[0], pos); @@ -153,9 +151,8 @@ string_view::size_type string_view::find_first_not_of(string_view s, return npos; } -string_view::size_type string_view::find_first_not_of(char c, - size_type pos) const - noexcept { +string_view::size_type string_view::find_first_not_of( + char c, size_type pos) const noexcept { if (empty()) return npos; for (; pos < length_; ++pos) { if (ptr_[pos] != c) { @@ -180,9 +177,8 @@ string_view::size_type string_view::find_last_of(string_view s, return npos; } -string_view::size_type string_view::find_last_not_of(string_view s, - size_type pos) const - noexcept { +string_view::size_type string_view::find_last_not_of( + string_view s, size_type pos) const noexcept { if (empty()) return npos; size_type i = std::min(pos, length_ - 1); if (s.empty()) return i; @@ -198,9 +194,8 @@ string_view::size_type string_view::find_last_not_of(string_view s, return npos; } -string_view::size_type string_view::find_last_not_of(char c, - size_type pos) const - noexcept { +string_view::size_type string_view::find_last_not_of( + char c, size_type pos) const noexcept { if (empty()) return npos; size_type i = std::min(pos, length_ - 1); for (;; --i) { diff --git a/third_party/abseil-cpp/absl/strings/string_view.h b/third_party/abseil-cpp/absl/strings/string_view.h index 1861ea62a9..a4c9a6526c 100644 --- a/third_party/abseil-cpp/absl/strings/string_view.h +++ b/third_party/abseil-cpp/absl/strings/string_view.h @@ -36,6 +36,7 @@ #include <limits> #include <string> +#include "absl/base/attributes.h" #include "absl/base/config.h" #include "absl/base/internal/throw_delegate.h" #include "absl/base/macros.h" @@ -48,7 +49,7 @@ namespace absl { ABSL_NAMESPACE_BEGIN -using std::string_view; +using string_view = std::string_view; ABSL_NAMESPACE_END } // namespace absl @@ -61,6 +62,12 @@ ABSL_NAMESPACE_END #define ABSL_INTERNAL_STRING_VIEW_MEMCMP memcmp #endif // ABSL_HAVE_BUILTIN(__builtin_memcmp) +#if defined(__cplusplus) && __cplusplus >= 201402L +#define ABSL_INTERNAL_STRING_VIEW_CXX14_CONSTEXPR constexpr +#else +#define ABSL_INTERNAL_STRING_VIEW_CXX14_CONSTEXPR +#endif + namespace absl { ABSL_NAMESPACE_BEGIN @@ -111,6 +118,11 @@ ABSL_NAMESPACE_BEGIN // example, when splitting a string, `std::vector<absl::string_view>` is a // natural data type for the output. // +// For another example, a Cord is a non-contiguous, potentially very +// long string-like object. The Cord class has an interface that iteratively +// provides string_view objects that point to the successive pieces of a Cord +// object. +// // When constructed from a source which is NUL-terminated, the `string_view` // itself will not include the NUL-terminator unless a specific size (including // the NUL) is passed to the constructor. As a result, common idioms that work @@ -175,18 +187,20 @@ class string_view { template <typename Allocator> string_view( // NOLINT(runtime/explicit) - const std::basic_string<char, std::char_traits<char>, Allocator>& - str) noexcept + const std::basic_string<char, std::char_traits<char>, Allocator>& str + ABSL_ATTRIBUTE_LIFETIME_BOUND) noexcept // This is implemented in terms of `string_view(p, n)` so `str.size()` // doesn't need to be reevaluated after `ptr_` is set. - : string_view(str.data(), str.size()) {} + // The length check is also skipped since it is unnecessary and causes + // code bloat. + : string_view(str.data(), str.size(), SkipCheckLengthTag{}) {} // Implicit constructor of a `string_view` from NUL-terminated `str`. When // accepting possibly null strings, use `absl::NullSafeStringView(str)` // instead (see below). + // The length check is skipped since it is unnecessary and causes code bloat. constexpr string_view(const char* str) // NOLINT(runtime/explicit) - : ptr_(str), - length_(str ? CheckLengthInternal(StrlenInternal(str)) : 0) {} + : ptr_(str), length_(str ? StrlenInternal(str) : 0) {} // Implicit constructor of a `string_view` from a `const char*` and length. constexpr string_view(const char* data, size_type len) @@ -259,9 +273,7 @@ class string_view { // string_view::size() // // Returns the number of characters in the `string_view`. - constexpr size_type size() const noexcept { - return length_; - } + constexpr size_type size() const noexcept { return length_; } // string_view::length() // @@ -283,7 +295,7 @@ class string_view { // Returns the ith element of the `string_view` using the array operator. // Note that this operator does not perform any bounds checking. constexpr const_reference operator[](size_type i) const { - return ABSL_ASSERT(i < size()), ptr_[i]; + return ABSL_HARDENING_ASSERT(i < size()), ptr_[i]; } // string_view::at() @@ -303,14 +315,14 @@ class string_view { // // Returns the first element of a `string_view`. constexpr const_reference front() const { - return ABSL_ASSERT(!empty()), ptr_[0]; + return ABSL_HARDENING_ASSERT(!empty()), ptr_[0]; } // string_view::back() // // Returns the last element of a `string_view`. constexpr const_reference back() const { - return ABSL_ASSERT(!empty()), ptr_[size() - 1]; + return ABSL_HARDENING_ASSERT(!empty()), ptr_[size() - 1]; } // string_view::data() @@ -319,7 +331,7 @@ class string_view { // stored elsewhere). Note that `string_view::data()` may contain embedded nul // characters, but the returned buffer may or may not be NUL-terminated; // therefore, do not pass `data()` to a routine that expects a NUL-terminated - // std::string. + // string. constexpr const_pointer data() const noexcept { return ptr_; } // Modifiers @@ -327,9 +339,9 @@ class string_view { // string_view::remove_prefix() // // Removes the first `n` characters from the `string_view`. Note that the - // underlying std::string is not changed, only the view. - void remove_prefix(size_type n) { - assert(n <= length_); + // underlying string is not changed, only the view. + ABSL_INTERNAL_STRING_VIEW_CXX14_CONSTEXPR void remove_prefix(size_type n) { + ABSL_HARDENING_ASSERT(n <= length_); ptr_ += n; length_ -= n; } @@ -337,16 +349,16 @@ class string_view { // string_view::remove_suffix() // // Removes the last `n` characters from the `string_view`. Note that the - // underlying std::string is not changed, only the view. - void remove_suffix(size_type n) { - assert(n <= length_); + // underlying string is not changed, only the view. + ABSL_INTERNAL_STRING_VIEW_CXX14_CONSTEXPR void remove_suffix(size_type n) { + ABSL_HARDENING_ASSERT(n <= length_); length_ -= n; } // string_view::swap() // // Swaps this `string_view` with another `string_view`. - void swap(string_view& s) noexcept { + ABSL_INTERNAL_STRING_VIEW_CXX14_CONSTEXPR void swap(string_view& s) noexcept { auto t = *this; *this = s; s = t; @@ -382,21 +394,21 @@ class string_view { // Returns a "substring" of the `string_view` (at offset `pos` and length // `n`) as another string_view. This function throws `std::out_of_bounds` if // `pos > size`. - string_view substr(size_type pos, size_type n = npos) const { - if (ABSL_PREDICT_FALSE(pos > length_)) - base_internal::ThrowStdOutOfRange("absl::string_view::substr"); - n = (std::min)(n, length_ - pos); - return string_view(ptr_ + pos, n); + // Use absl::ClippedSubstr if you need a truncating substr operation. + constexpr string_view substr(size_type pos = 0, size_type n = npos) const { + return ABSL_PREDICT_FALSE(pos > length_) + ? (base_internal::ThrowStdOutOfRange( + "absl::string_view::substr"), + string_view()) + : string_view(ptr_ + pos, Min(n, length_ - pos)); } // string_view::compare() // - // Performs a lexicographical comparison between the `string_view` and - // another `absl::string_view`, returning -1 if `this` is less than, 0 if - // `this` is equal to, and 1 if `this` is greater than the passed std::string - // view. Note that in the case of data equality, a further comparison is made - // on the respective sizes of the two `string_view`s to determine which is - // smaller, equal, or greater. + // Performs a lexicographical comparison between this `string_view` and + // another `string_view` `x`, returning a negative value if `*this` is less + // than `x`, 0 if `*this` is equal to `x`, and a positive value if `*this` + // is greater than `x`. constexpr int compare(string_view x) const noexcept { return CompareImpl(length_, x.length_, Min(length_, x.length_) == 0 @@ -407,31 +419,31 @@ class string_view { // Overload of `string_view::compare()` for comparing a substring of the // 'string_view` and another `absl::string_view`. - int compare(size_type pos1, size_type count1, string_view v) const { + constexpr int compare(size_type pos1, size_type count1, string_view v) const { return substr(pos1, count1).compare(v); } // Overload of `string_view::compare()` for comparing a substring of the // `string_view` and a substring of another `absl::string_view`. - int compare(size_type pos1, size_type count1, string_view v, size_type pos2, - size_type count2) const { + constexpr int compare(size_type pos1, size_type count1, string_view v, + size_type pos2, size_type count2) const { return substr(pos1, count1).compare(v.substr(pos2, count2)); } // Overload of `string_view::compare()` for comparing a `string_view` and a - // a different C-style std::string `s`. - int compare(const char* s) const { return compare(string_view(s)); } + // a different C-style string `s`. + constexpr int compare(const char* s) const { return compare(string_view(s)); } // Overload of `string_view::compare()` for comparing a substring of the - // `string_view` and a different std::string C-style std::string `s`. - int compare(size_type pos1, size_type count1, const char* s) const { + // `string_view` and a different string C-style string `s`. + constexpr int compare(size_type pos1, size_type count1, const char* s) const { return substr(pos1, count1).compare(string_view(s)); } // Overload of `string_view::compare()` for comparing a substring of the - // `string_view` and a substring of a different C-style std::string `s`. - int compare(size_type pos1, size_type count1, const char* s, - size_type count2) const { + // `string_view` and a substring of a different C-style string `s`. + constexpr int compare(size_type pos1, size_type count1, const char* s, + size_type count2) const { return substr(pos1, count1).compare(string_view(s, count2)); } @@ -448,48 +460,92 @@ class string_view { // within the `string_view`. size_type find(char c, size_type pos = 0) const noexcept; + // Overload of `string_view::find()` for finding a substring of a different + // C-style string `s` within the `string_view`. + size_type find(const char* s, size_type pos, size_type count) const { + return find(string_view(s, count), pos); + } + + // Overload of `string_view::find()` for finding a different C-style string + // `s` within the `string_view`. + size_type find(const char* s, size_type pos = 0) const { + return find(string_view(s), pos); + } + // string_view::rfind() // // Finds the last occurrence of a substring `s` within the `string_view`, // returning the position of the first character's match, or `npos` if no // match was found. - size_type rfind(string_view s, size_type pos = npos) const - noexcept; + size_type rfind(string_view s, size_type pos = npos) const noexcept; // Overload of `string_view::rfind()` for finding the last given character `c` // within the `string_view`. size_type rfind(char c, size_type pos = npos) const noexcept; + // Overload of `string_view::rfind()` for finding a substring of a different + // C-style string `s` within the `string_view`. + size_type rfind(const char* s, size_type pos, size_type count) const { + return rfind(string_view(s, count), pos); + } + + // Overload of `string_view::rfind()` for finding a different C-style string + // `s` within the `string_view`. + size_type rfind(const char* s, size_type pos = npos) const { + return rfind(string_view(s), pos); + } + // string_view::find_first_of() // // Finds the first occurrence of any of the characters in `s` within the // `string_view`, returning the start position of the match, or `npos` if no // match was found. - size_type find_first_of(string_view s, size_type pos = 0) const - noexcept; + size_type find_first_of(string_view s, size_type pos = 0) const noexcept; // Overload of `string_view::find_first_of()` for finding a character `c` // within the `string_view`. - size_type find_first_of(char c, size_type pos = 0) const - noexcept { + size_type find_first_of(char c, size_type pos = 0) const noexcept { return find(c, pos); } + // Overload of `string_view::find_first_of()` for finding a substring of a + // different C-style string `s` within the `string_view`. + size_type find_first_of(const char* s, size_type pos, + size_type count) const { + return find_first_of(string_view(s, count), pos); + } + + // Overload of `string_view::find_first_of()` for finding a different C-style + // string `s` within the `string_view`. + size_type find_first_of(const char* s, size_type pos = 0) const { + return find_first_of(string_view(s), pos); + } + // string_view::find_last_of() // // Finds the last occurrence of any of the characters in `s` within the // `string_view`, returning the start position of the match, or `npos` if no // match was found. - size_type find_last_of(string_view s, size_type pos = npos) const - noexcept; + size_type find_last_of(string_view s, size_type pos = npos) const noexcept; // Overload of `string_view::find_last_of()` for finding a character `c` // within the `string_view`. - size_type find_last_of(char c, size_type pos = npos) const - noexcept { + size_type find_last_of(char c, size_type pos = npos) const noexcept { return rfind(c, pos); } + // Overload of `string_view::find_last_of()` for finding a substring of a + // different C-style string `s` within the `string_view`. + size_type find_last_of(const char* s, size_type pos, size_type count) const { + return find_last_of(string_view(s, count), pos); + } + + // Overload of `string_view::find_last_of()` for finding a different C-style + // string `s` within the `string_view`. + size_type find_last_of(const char* s, size_type pos = npos) const { + return find_last_of(string_view(s), pos); + } + // string_view::find_first_not_of() // // Finds the first occurrence of any of the characters not in `s` within the @@ -501,25 +557,56 @@ class string_view { // that is not `c` within the `string_view`. size_type find_first_not_of(char c, size_type pos = 0) const noexcept; + // Overload of `string_view::find_first_not_of()` for finding a substring of a + // different C-style string `s` within the `string_view`. + size_type find_first_not_of(const char* s, size_type pos, + size_type count) const { + return find_first_not_of(string_view(s, count), pos); + } + + // Overload of `string_view::find_first_not_of()` for finding a different + // C-style string `s` within the `string_view`. + size_type find_first_not_of(const char* s, size_type pos = 0) const { + return find_first_not_of(string_view(s), pos); + } + // string_view::find_last_not_of() // // Finds the last occurrence of any of the characters not in `s` within the // `string_view`, returning the start position of the last non-match, or // `npos` if no non-match was found. size_type find_last_not_of(string_view s, - size_type pos = npos) const noexcept; + size_type pos = npos) const noexcept; // Overload of `string_view::find_last_not_of()` for finding a character // that is not `c` within the `string_view`. - size_type find_last_not_of(char c, size_type pos = npos) const - noexcept; + size_type find_last_not_of(char c, size_type pos = npos) const noexcept; + + // Overload of `string_view::find_last_not_of()` for finding a substring of a + // different C-style string `s` within the `string_view`. + size_type find_last_not_of(const char* s, size_type pos, + size_type count) const { + return find_last_not_of(string_view(s, count), pos); + } + + // Overload of `string_view::find_last_not_of()` for finding a different + // C-style string `s` within the `string_view`. + size_type find_last_not_of(const char* s, size_type pos = npos) const { + return find_last_not_of(string_view(s), pos); + } private: + // The constructor from std::string delegates to this constructor. + // See the comment on that constructor for the rationale. + struct SkipCheckLengthTag {}; + string_view(const char* data, size_type len, SkipCheckLengthTag) noexcept + : ptr_(data), length_(len) {} + static constexpr size_type kMaxSize = (std::numeric_limits<difference_type>::max)(); static constexpr size_type CheckLengthInternal(size_type len) { - return (void)ABSL_ASSERT(len <= kMaxSize), len; + return ABSL_HARDENING_ASSERT(len <= kMaxSize), len; } static constexpr size_type StrlenInternal(const char* str) { @@ -590,6 +677,7 @@ std::ostream& operator<<(std::ostream& o, string_view piece); ABSL_NAMESPACE_END } // namespace absl +#undef ABSL_INTERNAL_STRING_VIEW_CXX14_CONSTEXPR #undef ABSL_INTERNAL_STRING_VIEW_MEMCMP #endif // ABSL_USES_STD_STRING_VIEW diff --git a/third_party/abseil-cpp/absl/strings/string_view_test.cc b/third_party/abseil-cpp/absl/strings/string_view_test.cc index 7b1d56fac7..2c13dd1c14 100644 --- a/third_party/abseil-cpp/absl/strings/string_view_test.cc +++ b/third_party/abseil-cpp/absl/strings/string_view_test.cc @@ -28,6 +28,7 @@ #include "gtest/gtest.h" #include "absl/base/config.h" #include "absl/base/dynamic_annotations.h" +#include "absl/base/options.h" #if defined(ABSL_HAVE_STD_STRING_VIEW) || defined(__ANDROID__) // We don't control the death messaging when using std::string_view. @@ -410,7 +411,7 @@ TEST(StringViewTest, STL2) { EXPECT_EQ(a.find(e, 17), 17); absl::string_view g("xx not found bb"); EXPECT_EQ(a.find(g), absl::string_view::npos); - // empty std::string nonsense + // empty string nonsense EXPECT_EQ(d.find(b), absl::string_view::npos); EXPECT_EQ(e.find(b), absl::string_view::npos); EXPECT_EQ(d.find(b, 4), absl::string_view::npos); @@ -438,7 +439,7 @@ TEST(StringViewTest, STL2) { EXPECT_EQ(g.find('o', 4), 4); EXPECT_EQ(g.find('o', 5), 8); EXPECT_EQ(a.find('b', 5), absl::string_view::npos); - // empty std::string nonsense + // empty string nonsense EXPECT_EQ(d.find('\0'), absl::string_view::npos); EXPECT_EQ(e.find('\0'), absl::string_view::npos); EXPECT_EQ(d.find('\0', 4), absl::string_view::npos); @@ -448,6 +449,24 @@ TEST(StringViewTest, STL2) { EXPECT_EQ(d.find('x', 4), absl::string_view::npos); EXPECT_EQ(e.find('x', 7), absl::string_view::npos); + EXPECT_EQ(a.find(b.data(), 1, 0), 1); + EXPECT_EQ(a.find(c.data(), 9, 0), 9); + EXPECT_EQ(a.find(c.data(), absl::string_view::npos, 0), + absl::string_view::npos); + EXPECT_EQ(b.find(c.data(), absl::string_view::npos, 0), + absl::string_view::npos); + // empty string nonsense + EXPECT_EQ(d.find(b.data(), 4, 0), absl::string_view::npos); + EXPECT_EQ(e.find(b.data(), 7, 0), absl::string_view::npos); + + EXPECT_EQ(a.find(b.data(), 1), absl::string_view::npos); + EXPECT_EQ(a.find(c.data(), 9), 23); + EXPECT_EQ(a.find(c.data(), absl::string_view::npos), absl::string_view::npos); + EXPECT_EQ(b.find(c.data(), absl::string_view::npos), absl::string_view::npos); + // empty string nonsense + EXPECT_EQ(d.find(b.data(), 4), absl::string_view::npos); + EXPECT_EQ(e.find(b.data(), 7), absl::string_view::npos); + EXPECT_EQ(a.rfind(b), 0); EXPECT_EQ(a.rfind(b, 1), 0); EXPECT_EQ(a.rfind(c), 23); @@ -465,7 +484,7 @@ TEST(StringViewTest, STL2) { EXPECT_EQ(e.rfind(b), absl::string_view::npos); EXPECT_EQ(d.rfind(b, 4), absl::string_view::npos); EXPECT_EQ(e.rfind(b, 7), absl::string_view::npos); - // empty std::string nonsense + // empty string nonsense EXPECT_EQ(d.rfind(d, 4), std::string().rfind(std::string())); EXPECT_EQ(e.rfind(d, 7), std::string().rfind(std::string())); EXPECT_EQ(d.rfind(e, 4), std::string().rfind(std::string())); @@ -484,11 +503,19 @@ TEST(StringViewTest, STL2) { EXPECT_EQ(f.rfind('\0', 12), 3); EXPECT_EQ(f.rfind('3'), 2); EXPECT_EQ(f.rfind('5'), 5); - // empty std::string nonsense + // empty string nonsense EXPECT_EQ(d.rfind('o'), absl::string_view::npos); EXPECT_EQ(e.rfind('o'), absl::string_view::npos); EXPECT_EQ(d.rfind('o', 4), absl::string_view::npos); EXPECT_EQ(e.rfind('o', 7), absl::string_view::npos); + + EXPECT_EQ(a.rfind(b.data(), 1, 0), 1); + EXPECT_EQ(a.rfind(c.data(), 22, 0), 22); + EXPECT_EQ(a.rfind(c.data(), 1, 0), 1); + EXPECT_EQ(a.rfind(c.data(), 0, 0), 0); + EXPECT_EQ(b.rfind(c.data(), 0, 0), 0); + EXPECT_EQ(d.rfind(b.data(), 4, 0), 0); + EXPECT_EQ(e.rfind(b.data(), 7, 0), 0); } // Continued from STL2 @@ -520,7 +547,7 @@ TEST(StringViewTest, STL2FindFirst) { EXPECT_EQ(g.find_first_of(c), 0); EXPECT_EQ(a.find_first_of(f), absl::string_view::npos); EXPECT_EQ(f.find_first_of(a), absl::string_view::npos); - // empty std::string nonsense + // empty string nonsense EXPECT_EQ(a.find_first_of(d), absl::string_view::npos); EXPECT_EQ(a.find_first_of(e), absl::string_view::npos); EXPECT_EQ(d.find_first_of(b), absl::string_view::npos); @@ -538,7 +565,7 @@ TEST(StringViewTest, STL2FindFirst) { EXPECT_EQ(a.find_first_not_of(f), 0); EXPECT_EQ(a.find_first_not_of(d), 0); EXPECT_EQ(a.find_first_not_of(e), 0); - // empty std::string nonsense + // empty string nonsense EXPECT_EQ(a.find_first_not_of(d), 0); EXPECT_EQ(a.find_first_not_of(e), 0); EXPECT_EQ(a.find_first_not_of(d, 1), 1); @@ -566,7 +593,7 @@ TEST(StringViewTest, STL2FindFirst) { EXPECT_EQ(f.find_first_not_of('\0'), 0); EXPECT_EQ(f.find_first_not_of('\0', 3), 4); EXPECT_EQ(f.find_first_not_of('\0', 2), 2); - // empty std::string nonsense + // empty string nonsense EXPECT_EQ(d.find_first_not_of('x'), absl::string_view::npos); EXPECT_EQ(e.find_first_not_of('x'), absl::string_view::npos); EXPECT_EQ(d.find_first_not_of('\0'), absl::string_view::npos); @@ -606,7 +633,7 @@ TEST(StringViewTest, STL2FindLast) { EXPECT_EQ(f.find_last_of(i, 5), 5); EXPECT_EQ(f.find_last_of(i, 6), 6); EXPECT_EQ(f.find_last_of(a, 4), absl::string_view::npos); - // empty std::string nonsense + // empty string nonsense EXPECT_EQ(f.find_last_of(d), absl::string_view::npos); EXPECT_EQ(f.find_last_of(e), absl::string_view::npos); EXPECT_EQ(f.find_last_of(d, 4), absl::string_view::npos); @@ -632,7 +659,7 @@ TEST(StringViewTest, STL2FindLast) { EXPECT_EQ(a.find_last_not_of(c, 24), 22); EXPECT_EQ(a.find_last_not_of(b, 3), 3); EXPECT_EQ(a.find_last_not_of(b, 2), absl::string_view::npos); - // empty std::string nonsense + // empty string nonsense EXPECT_EQ(f.find_last_not_of(d), f.size()-1); EXPECT_EQ(f.find_last_not_of(e), f.size()-1); EXPECT_EQ(f.find_last_not_of(d, 4), 4); @@ -656,7 +683,7 @@ TEST(StringViewTest, STL2FindLast) { EXPECT_EQ(h.find_last_not_of('x', 2), 2); EXPECT_EQ(h.find_last_not_of('=', 2), absl::string_view::npos); EXPECT_EQ(b.find_last_not_of('b', 1), 0); - // empty std::string nonsense + // empty string nonsense EXPECT_EQ(d.find_last_not_of('x'), absl::string_view::npos); EXPECT_EQ(e.find_last_not_of('x'), absl::string_view::npos); EXPECT_EQ(d.find_last_not_of('\0'), absl::string_view::npos); @@ -677,8 +704,9 @@ TEST(StringViewTest, STL2Substr) { EXPECT_EQ(a.substr(23, 3), c); EXPECT_EQ(a.substr(23, 99), c); EXPECT_EQ(a.substr(0), a); + EXPECT_EQ(a.substr(), a); EXPECT_EQ(a.substr(3, 2), "de"); - // empty std::string nonsense + // empty string nonsense EXPECT_EQ(d.substr(0, 99), e); // use of npos EXPECT_EQ(a.substr(0, absl::string_view::npos), a); @@ -820,7 +848,7 @@ TEST(StringViewTest, FrontBackSingleChar) { TEST(StringViewTest, FrontBackEmpty) { #ifndef ABSL_USES_STD_STRING_VIEW -#ifndef NDEBUG +#if !defined(NDEBUG) || ABSL_OPTION_HARDENED // Abseil's string_view implementation has debug assertions that check that // front() and back() are not called on an empty string_view. absl::string_view sv; @@ -859,7 +887,7 @@ TEST(StringViewTest, NULLInput) { EXPECT_EQ(s.size(), 0); // .ToString() on a absl::string_view with nullptr should produce the empty - // std::string. + // string. EXPECT_EQ("", std::string(s)); #endif // ABSL_HAVE_STRING_VIEW_FROM_NULLPTR } @@ -914,9 +942,9 @@ TEST(StringViewTest, At) { EXPECT_EQ(abc.at(1), 'b'); EXPECT_EQ(abc.at(2), 'c'); #ifdef ABSL_HAVE_EXCEPTIONS - EXPECT_THROW(abc.at(3), std::out_of_range); + EXPECT_THROW((void)abc.at(3), std::out_of_range); #else - ABSL_EXPECT_DEATH_IF_SUPPORTED(abc.at(3), "absl::string_view::at"); + ABSL_EXPECT_DEATH_IF_SUPPORTED((void)abc.at(3), "absl::string_view::at"); #endif } @@ -977,7 +1005,7 @@ TEST(StringViewTest, ConstexprCompiles) { #if defined(ABSL_USES_STD_STRING_VIEW) // In libstdc++ (as of 7.2), `std::string_view::string_view(const char*)` - // calls `std::char_traits<char>::length(const char*)` to get the std::string + // calls `std::char_traits<char>::length(const char*)` to get the string // length, but it is not marked constexpr yet. See GCC bug: // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=78156 // Also, there is a LWG issue that adds constexpr to length() which was just @@ -1086,6 +1114,31 @@ TEST(StringViewTest, ConstexprCompiles) { EXPECT_EQ(sp_npos, -1); } +constexpr char ConstexprMethodsHelper() { +#if defined(__cplusplus) && __cplusplus >= 201402L + absl::string_view str("123", 3); + str.remove_prefix(1); + str.remove_suffix(1); + absl::string_view bar; + str.swap(bar); + return bar.front(); +#else + return '2'; +#endif +} + +TEST(StringViewTest, ConstexprMethods) { + // remove_prefix, remove_suffix, swap + static_assert(ConstexprMethodsHelper() == '2', ""); + + // substr + constexpr absl::string_view foobar("foobar", 6); + constexpr absl::string_view foo = foobar.substr(0, 3); + constexpr absl::string_view bar = foobar.substr(3); + EXPECT_EQ(foo, "foo"); + EXPECT_EQ(bar, "bar"); +} + TEST(StringViewTest, Noexcept) { EXPECT_TRUE((std::is_nothrow_constructible<absl::string_view, const std::string&>::value)); @@ -1122,7 +1175,7 @@ TEST(StringViewTest, Noexcept) { TEST(StringViewTest, BoundsCheck) { #ifndef ABSL_USES_STD_STRING_VIEW -#ifndef NDEBUG +#if !defined(NDEBUG) || ABSL_OPTION_HARDENED // Abseil's string_view implementation has bounds-checking in debug mode. absl::string_view h = "hello"; ABSL_EXPECT_DEATH_IF_SUPPORTED(h[5], ""); @@ -1168,11 +1221,11 @@ TEST(FindOneCharTest, EdgeCases) { EXPECT_EQ(absl::string_view::npos, a.rfind('x')); } -#ifndef THREAD_SANITIZER // Allocates too much memory for tsan. +#ifndef ABSL_HAVE_THREAD_SANITIZER // Allocates too much memory for tsan. TEST(HugeStringView, TwoPointTwoGB) { - if (sizeof(size_t) <= 4 || RunningOnValgrind()) + if (sizeof(size_t) <= 4) return; - // Try a huge std::string piece. + // Try a huge string piece. const size_t size = size_t{2200} * 1000 * 1000; std::string s(size, 'a'); absl::string_view sp(s); @@ -1182,7 +1235,7 @@ TEST(HugeStringView, TwoPointTwoGB) { sp.remove_suffix(2); EXPECT_EQ(size - 1 - 2, sp.length()); } -#endif // THREAD_SANITIZER +#endif // ABSL_HAVE_THREAD_SANITIZER #if !defined(NDEBUG) && !defined(ABSL_USES_STD_STRING_VIEW) TEST(NonNegativeLenTest, NonNegativeLen) { diff --git a/third_party/abseil-cpp/absl/strings/substitute.cc b/third_party/abseil-cpp/absl/strings/substitute.cc index 5b69a3ef00..8980b198c2 100644 --- a/third_party/abseil-cpp/absl/strings/substitute.cc +++ b/third_party/abseil-cpp/absl/strings/substitute.cc @@ -36,7 +36,7 @@ void SubstituteAndAppendArray(std::string* output, absl::string_view format, if (i + 1 >= format.size()) { #ifndef NDEBUG ABSL_RAW_LOG(FATAL, - "Invalid absl::Substitute() format std::string: \"%s\".", + "Invalid absl::Substitute() format string: \"%s\".", absl::CEscape(format).c_str()); #endif return; @@ -46,8 +46,8 @@ void SubstituteAndAppendArray(std::string* output, absl::string_view format, #ifndef NDEBUG ABSL_RAW_LOG( FATAL, - "Invalid absl::Substitute() format std::string: asked for \"$" - "%d\", but only %d args were given. Full format std::string was: " + "Invalid absl::Substitute() format string: asked for \"$" + "%d\", but only %d args were given. Full format string was: " "\"%s\".", index, static_cast<int>(num_args), absl::CEscape(format).c_str()); #endif @@ -61,7 +61,7 @@ void SubstituteAndAppendArray(std::string* output, absl::string_view format, } else { #ifndef NDEBUG ABSL_RAW_LOG(FATAL, - "Invalid absl::Substitute() format std::string: \"%s\".", + "Invalid absl::Substitute() format string: \"%s\".", absl::CEscape(format).c_str()); #endif return; @@ -73,9 +73,10 @@ void SubstituteAndAppendArray(std::string* output, absl::string_view format, if (size == 0) return; - // Build the std::string. + // Build the string. size_t original_size = output->size(); - strings_internal::STLStringResizeUninitialized(output, original_size + size); + strings_internal::STLStringResizeUninitializedAmortized(output, + original_size + size); char* target = &(*output)[original_size]; for (size_t i = 0; i < format.size(); i++) { if (format[i] == '$') { diff --git a/third_party/abseil-cpp/absl/strings/substitute.h b/third_party/abseil-cpp/absl/strings/substitute.h index 4d0984d3d1..151c56f543 100644 --- a/third_party/abseil-cpp/absl/strings/substitute.h +++ b/third_party/abseil-cpp/absl/strings/substitute.h @@ -50,7 +50,7 @@ // // Supported types: // * absl::string_view, std::string, const char* (null is equivalent to "") -// * int32_t, int64_t, uint32_t, uint64 +// * int32_t, int64_t, uint32_t, uint64_t // * float, double // * bool (Printed as "true" or "false") // * pointer types other than char* (Printed as "0x<lower case hex string>", @@ -99,7 +99,7 @@ namespace substitute_internal { // This class has implicit constructors. class Arg { public: - // Overloads for std::string-y things + // Overloads for string-y things // // Explicitly overload `const char*` so the compiler doesn't cast to `bool`. Arg(const char* value) // NOLINT(runtime/explicit) @@ -120,7 +120,9 @@ class Arg { // representation. However, we can't really know, so we make the caller decide // what to do. Arg(char value) // NOLINT(runtime/explicit) - : piece_(scratch_, 1) { scratch_[0] = value; } + : piece_(scratch_, 1) { + scratch_[0] = value; + } Arg(short value) // NOLINT(*) : piece_(scratch_, numbers_internal::FastIntToBuffer(value, scratch_) - scratch_) {} @@ -203,10 +205,11 @@ constexpr const char* SkipNumber(const char* format) { } constexpr int PlaceholderBitmask(const char* format) { - return !*format ? 0 : *format != '$' - ? PlaceholderBitmask(format + 1) - : (CalculateOneBit(format + 1) | - PlaceholderBitmask(SkipNumber(format + 1))); + return !*format + ? 0 + : *format != '$' ? PlaceholderBitmask(format + 1) + : (CalculateOneBit(format + 1) | + PlaceholderBitmask(SkipNumber(format + 1))); } #endif // ABSL_BAD_CALL_IF @@ -358,43 +361,49 @@ inline void SubstituteAndAppend( // This body of functions catches cases where the number of placeholders // doesn't match the number of data arguments. void SubstituteAndAppend(std::string* output, const char* format) - ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 0, - "There were no substitution arguments " - "but this format std::string has a $[0-9] in it"); + ABSL_BAD_CALL_IF( + substitute_internal::PlaceholderBitmask(format) != 0, + "There were no substitution arguments " + "but this format string either has a $[0-9] in it or contains " + "an unescaped $ character (use $$ instead)"); void SubstituteAndAppend(std::string* output, const char* format, const substitute_internal::Arg& a0) ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 1, "There was 1 substitution argument given, but " - "this format std::string is either missing its $0, or " - "contains one of $1-$9"); + "this format string is missing its $0, contains " + "one of $1-$9, or contains an unescaped $ character (use " + "$$ instead)"); void SubstituteAndAppend(std::string* output, const char* format, const substitute_internal::Arg& a0, const substitute_internal::Arg& a1) - ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 3, - "There were 2 substitution arguments given, but " - "this format std::string is either missing its $0/$1, or " - "contains one of $2-$9"); + ABSL_BAD_CALL_IF( + substitute_internal::PlaceholderBitmask(format) != 3, + "There were 2 substitution arguments given, but this format string is " + "missing its $0/$1, contains one of $2-$9, or contains an " + "unescaped $ character (use $$ instead)"); void SubstituteAndAppend(std::string* output, const char* format, const substitute_internal::Arg& a0, const substitute_internal::Arg& a1, const substitute_internal::Arg& a2) - ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 7, - "There were 3 substitution arguments given, but " - "this format std::string is either missing its $0/$1/$2, or " - "contains one of $3-$9"); + ABSL_BAD_CALL_IF( + substitute_internal::PlaceholderBitmask(format) != 7, + "There were 3 substitution arguments given, but " + "this format string is missing its $0/$1/$2, contains one of " + "$3-$9, or contains an unescaped $ character (use $$ instead)"); void SubstituteAndAppend(std::string* output, const char* format, const substitute_internal::Arg& a0, const substitute_internal::Arg& a1, const substitute_internal::Arg& a2, const substitute_internal::Arg& a3) - ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 15, - "There were 4 substitution arguments given, but " - "this format std::string is either missing its $0-$3, or " - "contains one of $4-$9"); + ABSL_BAD_CALL_IF( + substitute_internal::PlaceholderBitmask(format) != 15, + "There were 4 substitution arguments given, but " + "this format string is missing its $0-$3, contains one of " + "$4-$9, or contains an unescaped $ character (use $$ instead)"); void SubstituteAndAppend(std::string* output, const char* format, const substitute_internal::Arg& a0, @@ -402,10 +411,11 @@ void SubstituteAndAppend(std::string* output, const char* format, const substitute_internal::Arg& a2, const substitute_internal::Arg& a3, const substitute_internal::Arg& a4) - ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 31, - "There were 5 substitution arguments given, but " - "this format std::string is either missing its $0-$4, or " - "contains one of $5-$9"); + ABSL_BAD_CALL_IF( + substitute_internal::PlaceholderBitmask(format) != 31, + "There were 5 substitution arguments given, but " + "this format string is missing its $0-$4, contains one of " + "$5-$9, or contains an unescaped $ character (use $$ instead)"); void SubstituteAndAppend(std::string* output, const char* format, const substitute_internal::Arg& a0, @@ -414,20 +424,22 @@ void SubstituteAndAppend(std::string* output, const char* format, const substitute_internal::Arg& a3, const substitute_internal::Arg& a4, const substitute_internal::Arg& a5) - ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 63, - "There were 6 substitution arguments given, but " - "this format std::string is either missing its $0-$5, or " - "contains one of $6-$9"); + ABSL_BAD_CALL_IF( + substitute_internal::PlaceholderBitmask(format) != 63, + "There were 6 substitution arguments given, but " + "this format string is missing its $0-$5, contains one of " + "$6-$9, or contains an unescaped $ character (use $$ instead)"); void SubstituteAndAppend( std::string* output, const char* format, const substitute_internal::Arg& a0, const substitute_internal::Arg& a1, const substitute_internal::Arg& a2, const substitute_internal::Arg& a3, const substitute_internal::Arg& a4, const substitute_internal::Arg& a5, const substitute_internal::Arg& a6) - ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 127, - "There were 7 substitution arguments given, but " - "this format std::string is either missing its $0-$6, or " - "contains one of $7-$9"); + ABSL_BAD_CALL_IF( + substitute_internal::PlaceholderBitmask(format) != 127, + "There were 7 substitution arguments given, but " + "this format string is missing its $0-$6, contains one of " + "$7-$9, or contains an unescaped $ character (use $$ instead)"); void SubstituteAndAppend( std::string* output, const char* format, const substitute_internal::Arg& a0, @@ -435,10 +447,11 @@ void SubstituteAndAppend( const substitute_internal::Arg& a3, const substitute_internal::Arg& a4, const substitute_internal::Arg& a5, const substitute_internal::Arg& a6, const substitute_internal::Arg& a7) - ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 255, - "There were 8 substitution arguments given, but " - "this format std::string is either missing its $0-$7, or " - "contains one of $8-$9"); + ABSL_BAD_CALL_IF( + substitute_internal::PlaceholderBitmask(format) != 255, + "There were 8 substitution arguments given, but " + "this format string is missing its $0-$7, contains one of " + "$8-$9, or contains an unescaped $ character (use $$ instead)"); void SubstituteAndAppend( std::string* output, const char* format, const substitute_internal::Arg& a0, @@ -449,7 +462,8 @@ void SubstituteAndAppend( ABSL_BAD_CALL_IF( substitute_internal::PlaceholderBitmask(format) != 511, "There were 9 substitution arguments given, but " - "this format std::string is either missing its $0-$8, or contains a $9"); + "this format string is missing its $0-$8, contains a $9, or " + "contains an unescaped $ character (use $$ instead)"); void SubstituteAndAppend( std::string* output, const char* format, const substitute_internal::Arg& a0, @@ -458,9 +472,11 @@ void SubstituteAndAppend( const substitute_internal::Arg& a5, const substitute_internal::Arg& a6, const substitute_internal::Arg& a7, const substitute_internal::Arg& a8, const substitute_internal::Arg& a9) - ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 1023, - "There were 10 substitution arguments given, but this " - "format std::string doesn't contain all of $0 through $9"); + ABSL_BAD_CALL_IF( + substitute_internal::PlaceholderBitmask(format) != 1023, + "There were 10 substitution arguments given, but this " + "format string either doesn't contain all of $0 through $9 or " + "contains an unescaped $ character (use $$ instead)"); #endif // ABSL_BAD_CALL_IF // Substitute() @@ -586,47 +602,53 @@ ABSL_MUST_USE_RESULT inline std::string Substitute( std::string Substitute(const char* format) ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 0, "There were no substitution arguments " - "but this format std::string has a $[0-9] in it"); + "but this format string either has a $[0-9] in it or " + "contains an unescaped $ character (use $$ instead)"); std::string Substitute(const char* format, const substitute_internal::Arg& a0) - ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 1, - "There was 1 substitution argument given, but " - "this format std::string is either missing its $0, or " - "contains one of $1-$9"); + ABSL_BAD_CALL_IF( + substitute_internal::PlaceholderBitmask(format) != 1, + "There was 1 substitution argument given, but " + "this format string is missing its $0, contains one of $1-$9, " + "or contains an unescaped $ character (use $$ instead)"); std::string Substitute(const char* format, const substitute_internal::Arg& a0, const substitute_internal::Arg& a1) - ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 3, - "There were 2 substitution arguments given, but " - "this format std::string is either missing its $0/$1, or " - "contains one of $2-$9"); + ABSL_BAD_CALL_IF( + substitute_internal::PlaceholderBitmask(format) != 3, + "There were 2 substitution arguments given, but " + "this format string is missing its $0/$1, contains one of " + "$2-$9, or contains an unescaped $ character (use $$ instead)"); std::string Substitute(const char* format, const substitute_internal::Arg& a0, const substitute_internal::Arg& a1, const substitute_internal::Arg& a2) - ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 7, - "There were 3 substitution arguments given, but " - "this format std::string is either missing its $0/$1/$2, or " - "contains one of $3-$9"); + ABSL_BAD_CALL_IF( + substitute_internal::PlaceholderBitmask(format) != 7, + "There were 3 substitution arguments given, but " + "this format string is missing its $0/$1/$2, contains one of " + "$3-$9, or contains an unescaped $ character (use $$ instead)"); std::string Substitute(const char* format, const substitute_internal::Arg& a0, const substitute_internal::Arg& a1, const substitute_internal::Arg& a2, const substitute_internal::Arg& a3) - ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 15, - "There were 4 substitution arguments given, but " - "this format std::string is either missing its $0-$3, or " - "contains one of $4-$9"); + ABSL_BAD_CALL_IF( + substitute_internal::PlaceholderBitmask(format) != 15, + "There were 4 substitution arguments given, but " + "this format string is missing its $0-$3, contains one of " + "$4-$9, or contains an unescaped $ character (use $$ instead)"); std::string Substitute(const char* format, const substitute_internal::Arg& a0, const substitute_internal::Arg& a1, const substitute_internal::Arg& a2, const substitute_internal::Arg& a3, const substitute_internal::Arg& a4) - ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 31, - "There were 5 substitution arguments given, but " - "this format std::string is either missing its $0-$4, or " - "contains one of $5-$9"); + ABSL_BAD_CALL_IF( + substitute_internal::PlaceholderBitmask(format) != 31, + "There were 5 substitution arguments given, but " + "this format string is missing its $0-$4, contains one of " + "$5-$9, or contains an unescaped $ character (use $$ instead)"); std::string Substitute(const char* format, const substitute_internal::Arg& a0, const substitute_internal::Arg& a1, @@ -634,10 +656,11 @@ std::string Substitute(const char* format, const substitute_internal::Arg& a0, const substitute_internal::Arg& a3, const substitute_internal::Arg& a4, const substitute_internal::Arg& a5) - ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 63, - "There were 6 substitution arguments given, but " - "this format std::string is either missing its $0-$5, or " - "contains one of $6-$9"); + ABSL_BAD_CALL_IF( + substitute_internal::PlaceholderBitmask(format) != 63, + "There were 6 substitution arguments given, but " + "this format string is missing its $0-$5, contains one of " + "$6-$9, or contains an unescaped $ character (use $$ instead)"); std::string Substitute(const char* format, const substitute_internal::Arg& a0, const substitute_internal::Arg& a1, @@ -646,10 +669,11 @@ std::string Substitute(const char* format, const substitute_internal::Arg& a0, const substitute_internal::Arg& a4, const substitute_internal::Arg& a5, const substitute_internal::Arg& a6) - ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 127, - "There were 7 substitution arguments given, but " - "this format std::string is either missing its $0-$6, or " - "contains one of $7-$9"); + ABSL_BAD_CALL_IF( + substitute_internal::PlaceholderBitmask(format) != 127, + "There were 7 substitution arguments given, but " + "this format string is missing its $0-$6, contains one of " + "$7-$9, or contains an unescaped $ character (use $$ instead)"); std::string Substitute(const char* format, const substitute_internal::Arg& a0, const substitute_internal::Arg& a1, @@ -659,10 +683,11 @@ std::string Substitute(const char* format, const substitute_internal::Arg& a0, const substitute_internal::Arg& a5, const substitute_internal::Arg& a6, const substitute_internal::Arg& a7) - ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 255, - "There were 8 substitution arguments given, but " - "this format std::string is either missing its $0-$7, or " - "contains one of $8-$9"); + ABSL_BAD_CALL_IF( + substitute_internal::PlaceholderBitmask(format) != 255, + "There were 8 substitution arguments given, but " + "this format string is missing its $0-$7, contains one of " + "$8-$9, or contains an unescaped $ character (use $$ instead)"); std::string Substitute( const char* format, const substitute_internal::Arg& a0, @@ -673,7 +698,8 @@ std::string Substitute( ABSL_BAD_CALL_IF( substitute_internal::PlaceholderBitmask(format) != 511, "There were 9 substitution arguments given, but " - "this format std::string is either missing its $0-$8, or contains a $9"); + "this format string is missing its $0-$8, contains a $9, or " + "contains an unescaped $ character (use $$ instead)"); std::string Substitute( const char* format, const substitute_internal::Arg& a0, @@ -682,9 +708,11 @@ std::string Substitute( const substitute_internal::Arg& a5, const substitute_internal::Arg& a6, const substitute_internal::Arg& a7, const substitute_internal::Arg& a8, const substitute_internal::Arg& a9) - ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 1023, - "There were 10 substitution arguments given, but this " - "format std::string doesn't contain all of $0 through $9"); + ABSL_BAD_CALL_IF( + substitute_internal::PlaceholderBitmask(format) != 1023, + "There were 10 substitution arguments given, but this " + "format string either doesn't contain all of $0 through $9 or " + "contains an unescaped $ character (use $$ instead)"); #endif // ABSL_BAD_CALL_IF ABSL_NAMESPACE_END diff --git a/third_party/abseil-cpp/absl/strings/substitute_test.cc b/third_party/abseil-cpp/absl/strings/substitute_test.cc index 450cd2bcff..442c921528 100644 --- a/third_party/abseil-cpp/absl/strings/substitute_test.cc +++ b/third_party/abseil-cpp/absl/strings/substitute_test.cc @@ -89,7 +89,7 @@ TEST(SubstituteTest, Substitute) { str = absl::Substitute("$0", char_buf); EXPECT_EQ("print me too", str); - // null char* is "doubly" special. Represented as the empty std::string. + // null char* is "doubly" special. Represented as the empty string. char_p = nullptr; str = absl::Substitute("$0", char_p); EXPECT_EQ("", str); @@ -189,14 +189,14 @@ TEST(SubstituteTest, VectorBoolRef) { TEST(SubstituteDeathTest, SubstituteDeath) { EXPECT_DEBUG_DEATH( static_cast<void>(absl::Substitute(absl::string_view("-$2"), "a", "b")), - "Invalid absl::Substitute\\(\\) format std::string: asked for \"\\$2\", " + "Invalid absl::Substitute\\(\\) format string: asked for \"\\$2\", " "but only 2 args were given."); EXPECT_DEBUG_DEATH( static_cast<void>(absl::Substitute(absl::string_view("-$z-"))), - "Invalid absl::Substitute\\(\\) format std::string: \"-\\$z-\""); + "Invalid absl::Substitute\\(\\) format string: \"-\\$z-\""); EXPECT_DEBUG_DEATH( static_cast<void>(absl::Substitute(absl::string_view("-$"))), - "Invalid absl::Substitute\\(\\) format std::string: \"-\\$\""); + "Invalid absl::Substitute\\(\\) format string: \"-\\$\""); } #endif // GTEST_HAS_DEATH_TEST diff --git a/third_party/abseil-cpp/absl/strings/testdata/getline-1.txt b/third_party/abseil-cpp/absl/strings/testdata/getline-1.txt deleted file mode 100644 index 19b909733a..0000000000 --- a/third_party/abseil-cpp/absl/strings/testdata/getline-1.txt +++ /dev/null @@ -1,3 +0,0 @@ -alpha - -beta gamma diff --git a/third_party/abseil-cpp/absl/strings/testdata/getline-2.txt b/third_party/abseil-cpp/absl/strings/testdata/getline-2.txt deleted file mode 100644 index d6842d8ee3..0000000000 --- a/third_party/abseil-cpp/absl/strings/testdata/getline-2.txt +++ /dev/null @@ -1 +0,0 @@ -one.two.three |