aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHaibo Huang <hhb@google.com>2020-04-15 23:47:17 +0000
committerAutomerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com>2020-04-15 23:47:17 +0000
commitdba5619fc6fd63d8ea355879ceabcece683aad81 (patch)
treee14feefc15be0a7f4621370bb1bce1db9b8c50fa
parent80f3373a22feb6431137de77ad4a727d27d8cc2e (diff)
parent74c9d9ef6a828998ac9aab120bc94410ce99e3d1 (diff)
downloadFXdiv-dba5619fc6fd63d8ea355879ceabcece683aad81.tar.gz
Upgrade FXdiv to 561254d968e5679460e6a0a743206410284d9f46 am: 74c9d9ef6aandroid-r-beta-3android-r-beta-2
Change-Id: If463523d54ae4582aa10026d81929ea2b0848f23
-rw-r--r--.gitignore11
-rw-r--r--BUILD.bazel85
-rw-r--r--CMakeLists.txt67
-rw-r--r--METADATA18
-rw-r--r--README.md14
-rw-r--r--WORKSPACE30
-rw-r--r--cmake/DownloadGoogleBenchmark.cmake10
-rw-r--r--cmake/DownloadGoogleTest.cmake10
-rw-r--r--include/fxdiv.h36
9 files changed, 219 insertions, 62 deletions
diff --git a/.gitignore b/.gitignore
index 73b2998..c10cb60 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,10 +2,17 @@
build.ninja
# Build objects and artifacts
-deps/
-build/
+bazel-bin
+bazel-genfiles
+bazel-out
+bazel-testlogs
+bazel-FXdiv
bin/
+build/
+build-*/
+deps/
lib/
+libs/
*.pyc
*.pyo
diff --git a/BUILD.bazel b/BUILD.bazel
new file mode 100644
index 0000000..7b0ba72
--- /dev/null
+++ b/BUILD.bazel
@@ -0,0 +1,85 @@
+load("@rules_cc//cc:defs.bzl", "cc_binary", "cc_library", "cc_test")
+
+licenses(["notice"])
+
+################################# FXdiv library ################################
+
+cc_library(
+ name = "FXdiv",
+ hdrs = [
+ "include/fxdiv.h",
+ ],
+ includes = [
+ "include",
+ ],
+ strip_include_prefix = "include",
+ deps = [],
+ visibility = ["//visibility:public"],
+)
+
+################################## Unit tests ##################################
+
+cc_test(
+ name = "multiply_high_test",
+ srcs = ["test/multiply-high.cc"],
+ deps = [
+ ":FXdiv",
+ "@com_google_googletest//:gtest_main",
+ ],
+)
+
+cc_test(
+ name = "quotient_test",
+ srcs = ["test/quotient.cc"],
+ deps = [
+ ":FXdiv",
+ "@com_google_googletest//:gtest_main",
+ ],
+)
+
+################################## Benchmarks ##################################
+
+cc_binary(
+ name = "init_bench",
+ srcs = ["bench/init.cc"],
+ deps = [
+ ":FXdiv",
+ "@com_google_benchmark//:benchmark",
+ ],
+)
+
+cc_binary(
+ name = "multiply_bench",
+ srcs = ["bench/multiply.cc"],
+ deps = [
+ ":FXdiv",
+ "@com_google_benchmark//:benchmark",
+ ],
+)
+
+cc_binary(
+ name = "divide_bench",
+ srcs = ["bench/divide.cc"],
+ deps = [
+ ":FXdiv",
+ "@com_google_benchmark//:benchmark",
+ ],
+)
+
+cc_binary(
+ name = "quotient_bench",
+ srcs = ["bench/quotient.cc"],
+ deps = [
+ ":FXdiv",
+ "@com_google_benchmark//:benchmark",
+ ],
+)
+
+cc_binary(
+ name = "round_down_bench",
+ srcs = ["bench/round-down.cc"],
+ deps = [
+ ":FXdiv",
+ "@com_google_benchmark//:benchmark",
+ ],
+)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index a74d59d..bcae6b5 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,6 +1,4 @@
-CMAKE_MINIMUM_REQUIRED(VERSION 2.8.12 FATAL_ERROR)
-
-INCLUDE(GNUInstallDirs)
+CMAKE_MINIMUM_REQUIRED(VERSION 3.5 FATAL_ERROR)
# ---[ Project
PROJECT(FXdiv C CXX)
@@ -16,45 +14,39 @@ ELSE()
ENDIF()
# ---[ CMake options
+INCLUDE(GNUInstallDirs)
+
IF(FXDIV_BUILD_TESTS)
ENABLE_TESTING()
ENDIF()
# ---[ Download deps
-SET(CONFU_DEPENDENCIES_SOURCE_DIR ${CMAKE_SOURCE_DIR}/deps
- CACHE PATH "Confu-style dependencies source directory")
-SET(CONFU_DEPENDENCIES_BINARY_DIR ${CMAKE_BINARY_DIR}/deps
- CACHE PATH "Confu-style dependencies binary directory")
-
IF(FXDIV_BUILD_TESTS AND NOT DEFINED GOOGLETEST_SOURCE_DIR)
- MESSAGE(STATUS "Downloading Google Test to ${CONFU_DEPENDENCIES_SOURCE_DIR}/googletest (define GOOGLETEST_SOURCE_DIR to avoid it)")
- CONFIGURE_FILE(cmake/DownloadGoogleTest.cmake "${CONFU_DEPENDENCIES_BINARY_DIR}/googletest-download/CMakeLists.txt")
+ MESSAGE(STATUS "Downloading Google Test to ${CMAKE_BINARY_DIR}/googletest-source (define GOOGLETEST_SOURCE_DIR to avoid it)")
+ CONFIGURE_FILE(cmake/DownloadGoogleTest.cmake "${CMAKE_BINARY_DIR}/googletest-download/CMakeLists.txt")
EXECUTE_PROCESS(COMMAND "${CMAKE_COMMAND}" -G "${CMAKE_GENERATOR}" .
- WORKING_DIRECTORY "${CONFU_DEPENDENCIES_BINARY_DIR}/googletest-download")
+ WORKING_DIRECTORY "${CMAKE_BINARY_DIR}/googletest-download")
EXECUTE_PROCESS(COMMAND "${CMAKE_COMMAND}" --build .
- WORKING_DIRECTORY "${CONFU_DEPENDENCIES_BINARY_DIR}/googletest-download")
- SET(GOOGLETEST_SOURCE_DIR "${CONFU_DEPENDENCIES_SOURCE_DIR}/googletest" CACHE STRING "Google Test source directory")
+ WORKING_DIRECTORY "${CMAKE_BINARY_DIR}/googletest-download")
+ SET(GOOGLETEST_SOURCE_DIR "${CMAKE_BINARY_DIR}/googletest-source" CACHE STRING "Google Test source directory")
ENDIF()
IF(FXDIV_BUILD_BENCHMARKS AND NOT DEFINED GOOGLEBENCHMARK_SOURCE_DIR)
- MESSAGE(STATUS "Downloading Google Benchmark to ${CONFU_DEPENDENCIES_SOURCE_DIR}/googlebenchmark (define GOOGLEBENCHMARK_SOURCE_DIR to avoid it)")
- CONFIGURE_FILE(cmake/DownloadGoogleBenchmark.cmake "${CONFU_DEPENDENCIES_BINARY_DIR}/googlebenchmark-download/CMakeLists.txt")
+ MESSAGE(STATUS "Downloading Google Benchmark to ${CMAKE_BINARY_DIR}/googlebenchmark-source (define GOOGLEBENCHMARK_SOURCE_DIR to avoid it)")
+ CONFIGURE_FILE(cmake/DownloadGoogleBenchmark.cmake "${CMAKE_BINARY_DIR}/googlebenchmark-download/CMakeLists.txt")
EXECUTE_PROCESS(COMMAND "${CMAKE_COMMAND}" -G "${CMAKE_GENERATOR}" .
- WORKING_DIRECTORY "${CONFU_DEPENDENCIES_BINARY_DIR}/googlebenchmark-download")
+ WORKING_DIRECTORY "${CMAKE_BINARY_DIR}/googlebenchmark-download")
EXECUTE_PROCESS(COMMAND "${CMAKE_COMMAND}" --build .
- WORKING_DIRECTORY "${CONFU_DEPENDENCIES_BINARY_DIR}/googlebenchmark-download")
- SET(GOOGLEBENCHMARK_SOURCE_DIR "${CONFU_DEPENDENCIES_SOURCE_DIR}/googlebenchmark" CACHE STRING "Google Benchmark source directory")
+ WORKING_DIRECTORY "${CMAKE_BINARY_DIR}/googlebenchmark-download")
+ SET(GOOGLEBENCHMARK_SOURCE_DIR "${CMAKE_BINARY_DIR}/googlebenchmark-source" CACHE STRING "Google Benchmark source directory")
ENDIF()
# ---[ FXdiv library
-IF(${CMAKE_VERSION} VERSION_LESS "3.0")
- ADD_LIBRARY(fxdiv STATIC include/fxdiv.h)
- SET_TARGET_PROPERTIES(fxdiv PROPERTIES LINKER_LANGUAGE C)
-ELSE()
- ADD_LIBRARY(fxdiv INTERFACE)
-ENDIF()
+ADD_LIBRARY(fxdiv INTERFACE)
TARGET_INCLUDE_DIRECTORIES(fxdiv INTERFACE include)
-IF(NOT FXDIV_USE_INLINE_ASSEMBLY)
+IF(FXDIV_USE_INLINE_ASSEMBLY)
+ TARGET_COMPILE_DEFINITIONS(fxdiv INTERFACE FXDIV_USE_INLINE_ASSEMBLY=1)
+ELSE()
TARGET_COMPILE_DEFINITIONS(fxdiv INTERFACE FXDIV_USE_INLINE_ASSEMBLY=0)
ENDIF()
@@ -66,14 +58,20 @@ IF(FXDIV_BUILD_TESTS)
SET(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
ADD_SUBDIRECTORY(
"${GOOGLETEST_SOURCE_DIR}"
- "${CONFU_DEPENDENCIES_BINARY_DIR}/googletest")
+ "${CMAKE_BINARY_DIR}/googletest")
ENDIF()
ADD_EXECUTABLE(multiply-high-test test/multiply-high.cc)
+ SET_TARGET_PROPERTIES(multiply-high-test PROPERTIES
+ CXX_STANDARD 11
+ CXX_EXTENSIONS YES)
TARGET_LINK_LIBRARIES(multiply-high-test fxdiv gtest gtest_main)
ADD_TEST(multiply-high multiply-high-test)
ADD_EXECUTABLE(quotient-test test/quotient.cc)
+ SET_TARGET_PROPERTIES(quotient-test PROPERTIES
+ CXX_STANDARD 11
+ CXX_EXTENSIONS YES)
TARGET_LINK_LIBRARIES(quotient-test fxdiv gtest gtest_main)
ADD_TEST(quotient quotient-test)
ENDIF()
@@ -84,21 +82,36 @@ IF(FXDIV_BUILD_BENCHMARKS)
SET(BENCHMARK_ENABLE_TESTING OFF CACHE BOOL "" FORCE)
ADD_SUBDIRECTORY(
"${GOOGLEBENCHMARK_SOURCE_DIR}"
- "${CONFU_DEPENDENCIES_BINARY_DIR}/googlebenchmark")
+ "${CMAKE_BINARY_DIR}/googlebenchmark")
ENDIF()
ADD_EXECUTABLE(init-bench bench/init.cc)
+ SET_TARGET_PROPERTIES(init-bench PROPERTIES
+ CXX_STANDARD 11
+ CXX_EXTENSIONS YES)
TARGET_LINK_LIBRARIES(init-bench fxdiv benchmark)
ADD_EXECUTABLE(multiply-bench bench/multiply.cc)
+ SET_TARGET_PROPERTIES(multiply-bench PROPERTIES
+ CXX_STANDARD 11
+ CXX_EXTENSIONS YES)
TARGET_LINK_LIBRARIES(multiply-bench fxdiv benchmark)
ADD_EXECUTABLE(divide-bench bench/divide.cc)
+ SET_TARGET_PROPERTIES(divide-bench PROPERTIES
+ CXX_STANDARD 11
+ CXX_EXTENSIONS YES)
TARGET_LINK_LIBRARIES(divide-bench fxdiv benchmark)
ADD_EXECUTABLE(quotient-bench bench/quotient.cc)
+ SET_TARGET_PROPERTIES(quotient-bench PROPERTIES
+ CXX_STANDARD 11
+ CXX_EXTENSIONS YES)
TARGET_LINK_LIBRARIES(quotient-bench fxdiv benchmark)
ADD_EXECUTABLE(round-down-bench bench/round-down.cc)
+ SET_TARGET_PROPERTIES(round-down-bench PROPERTIES
+ CXX_STANDARD 11
+ CXX_EXTENSIONS YES)
TARGET_LINK_LIBRARIES(round-down-bench fxdiv benchmark)
ENDIF()
diff --git a/METADATA b/METADATA
index 8360525..f0b139b 100644
--- a/METADATA
+++ b/METADATA
@@ -1,13 +1,5 @@
name: "FXdiv"
-description:
- "Header-only library for division via fixed-point multiplication by inverse "
- " "
- "On modern CPUs and GPUs integer division is several times slower than "
- "multiplication. FXdiv implements an algorithm to replace an integer "
- "division with a multiplication and two shifts. This algorithm improves "
- "performance when an application performs repeated divisions by the same "
- "divisor."
-
+description: "Header-only library for division via fixed-point multiplication by inverse On modern CPUs and GPUs integer division is several times slower than multiplication. FXdiv implements an algorithm to replace an integer division with a multiplication and two shifts. This algorithm improves performance when an application performs repeated divisions by the same divisor."
third_party {
url {
type: HOMEPAGE
@@ -17,7 +9,11 @@ third_party {
type: GIT
value: "https://github.com/Maratyszcza/FXdiv"
}
- version: "fd804a929fc64be9e40ee58bb51ed9f9cac98244"
- last_upgrade_date { year: 2020 month: 2 day: 3 }
+ version: "561254d968e5679460e6a0a743206410284d9f46"
license_type: NOTICE
+ last_upgrade_date {
+ year: 2020
+ month: 4
+ day: 13
+ }
}
diff --git a/README.md b/README.md
index 2e9e231..b8ef0d3 100644
--- a/README.md
+++ b/README.md
@@ -40,21 +40,31 @@ void divide_array_fxdiv(size_t length, uint32_t array[], uint32_t divisor) {
## Status
-Project is in alpha stage. API is unstable. Currently working features:
+Currently working features:
| Platform | uint32_t | uint64_t | size_t |
| --------------- |:--------:|:--------:|:--------:|
| x86-64 gcc | Works | Works | Works |
+| x86-64 clang | Works | Works | Works |
| x86-64 MSVC | Works | Works | Works |
| x86 gcc | Works | Works | Works |
+| x86 clang | Works | Works | Works |
| x86 MSVC | Works | Works | Works |
| ARMv7 gcc | Works | Works | Works |
+| ARMv7 clang | Works | Works | Works |
+| ARMv7 MSVC* | Compiles | Compiles | Compiles |
+| ARM64 gcc | Works | Works | Works |
+| ARM64 clang | Works | Works | Works |
+| ARM64 MSVC* | Compiles | Compiles | Compiles |
| PPC64 gcc | Works | Works | Works |
-| PNaCl clang | Works | Works | Works |
+| WAsm clang | Works | Works | Works |
| Asm.js clang | Works | Works | Works |
+| PNaCl clang | Works | Works | Works |
| CUDA | Untested | Untested | Untested |
| OpenCL | Untested | Untested | Untested |
+*ARMv7 and ARM64 builds with MSVC are presumed to work, but were only verified to compile successfully
+
## References
- Granlund, Torbjörn, and Peter L. Montgomery. "Division by invariant integers using multiplication." In ACM SIGPLAN Notices, vol. 29, no. 6, pp. 61-72. ACM, 1994. Available: [gmplib.org/~tege/divcnst-pldi94.pdf](https://gmplib.org/~tege/divcnst-pldi94.pdf)
diff --git a/WORKSPACE b/WORKSPACE
new file mode 100644
index 0000000..4fbe23d
--- /dev/null
+++ b/WORKSPACE
@@ -0,0 +1,30 @@
+workspace(name = "FXdiv")
+
+load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
+
+# Bazel rule definitions
+http_archive(
+ name = "rules_cc",
+ strip_prefix = "rules_cc-master",
+ urls = ["https://github.com/bazelbuild/rules_cc/archive/master.zip"],
+)
+
+# Google Test framework, used by most unit-tests.
+http_archive(
+ name = "com_google_googletest",
+ strip_prefix = "googletest-master",
+ urls = ["https://github.com/google/googletest/archive/master.zip"],
+)
+
+# Google Benchmark library, used in micro-benchmarks.
+http_archive(
+ name = "com_google_benchmark",
+ strip_prefix = "benchmark-master",
+ urls = ["https://github.com/google/benchmark/archive/master.zip"],
+)
+
+# Android NDK location and version is auto-detected from $ANDROID_NDK_HOME environment variable
+android_ndk_repository(name = "androidndk")
+
+# Android SDK location and API is auto-detected from $ANDROID_HOME environment variable
+android_sdk_repository(name = "androidsdk")
diff --git a/cmake/DownloadGoogleBenchmark.cmake b/cmake/DownloadGoogleBenchmark.cmake
index 349e7cb..d042e07 100644
--- a/cmake/DownloadGoogleBenchmark.cmake
+++ b/cmake/DownloadGoogleBenchmark.cmake
@@ -1,13 +1,13 @@
-CMAKE_MINIMUM_REQUIRED(VERSION 2.8.2)
+CMAKE_MINIMUM_REQUIRED(VERSION 3.5 FATAL_ERROR)
PROJECT(googlebenchmark-download NONE)
INCLUDE(ExternalProject)
ExternalProject_Add(googlebenchmark
- URL https://github.com/google/benchmark/archive/v1.2.0.zip
- URL_HASH SHA256=cc463b28cb3701a35c0855fbcefb75b29068443f1952b64dd5f4f669272e95ea
- SOURCE_DIR "${CONFU_DEPENDENCIES_SOURCE_DIR}/googlebenchmark"
- BINARY_DIR "${CONFU_DEPENDENCIES_BINARY_DIR}/googlebenchmark"
+ URL https://github.com/google/benchmark/archive/v1.5.0.zip
+ URL_HASH SHA256=2d22dd3758afee43842bb504af1a8385cccb3ee1f164824e4837c1c1b04d92a0
+ SOURCE_DIR "${CMAKE_BINARY_DIR}/googlebenchmark-source"
+ BINARY_DIR "${CMAKE_BINARY_DIR}/googlebenchmark"
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
INSTALL_COMMAND ""
diff --git a/cmake/DownloadGoogleTest.cmake b/cmake/DownloadGoogleTest.cmake
index 19f5eb1..1a0c152 100644
--- a/cmake/DownloadGoogleTest.cmake
+++ b/cmake/DownloadGoogleTest.cmake
@@ -1,13 +1,13 @@
-CMAKE_MINIMUM_REQUIRED(VERSION 2.8.2)
+CMAKE_MINIMUM_REQUIRED(VERSION 3.5 FATAL_ERROR)
PROJECT(googletest-download NONE)
INCLUDE(ExternalProject)
ExternalProject_Add(googletest
- URL https://github.com/google/googletest/archive/release-1.8.0.zip
- URL_HASH SHA256=f3ed3b58511efd272eb074a3a6d6fb79d7c2e6a0e374323d1e6bcbcc1ef141bf
- SOURCE_DIR "${CONFU_DEPENDENCIES_SOURCE_DIR}/googletest"
- BINARY_DIR "${CONFU_DEPENDENCIES_BINARY_DIR}/googletest"
+ URL https://github.com/google/googletest/archive/release-1.10.0.zip
+ URL_HASH SHA256=94c634d499558a76fa649edb13721dce6e98fb1e7018dfaeba3cd7a083945e91
+ SOURCE_DIR "${CMAKE_BINARY_DIR}/googletest-source"
+ BINARY_DIR "${CMAKE_BINARY_DIR}/googletest"
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
INSTALL_COMMAND ""
diff --git a/include/fxdiv.h b/include/fxdiv.h
index 21a3dc1..f5a09d0 100644
--- a/include/fxdiv.h
+++ b/include/fxdiv.h
@@ -14,10 +14,13 @@
#if defined(_MSC_VER)
#include <intrin.h>
+ #if defined(_M_IX86) || defined(_M_X64)
+ #include <immintrin.h>
+ #endif
#endif
#ifndef FXDIV_USE_INLINE_ASSEMBLY
- #define FXDIV_USE_INLINE_ASSEMBLY 1
+ #define FXDIV_USE_INLINE_ASSEMBLY 0
#endif
static inline uint64_t fxdiv_mulext_uint32_t(uint32_t a, uint32_t b) {
@@ -121,14 +124,15 @@ static inline struct fxdiv_divisor_uint32_t fxdiv_init_uint32_t(uint32_t d) {
const uint32_t l_minus_1 = 31 - clz(d - 1);
#elif defined(__CUDA_ARCH__)
const uint32_t l_minus_1 = 31 - __clz((int) (d - 1));
- #elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))
+ #elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64) || defined(_M_ARM) || defined(_M_ARM64))
unsigned long l_minus_1;
_BitScanReverse(&l_minus_1, (unsigned long) (d - 1));
#elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) && FXDIV_USE_INLINE_ASSEMBLY
uint32_t l_minus_1;
__asm__("BSRL %[d_minus_1], %[l_minus_1]"
: [l_minus_1] "=r" (l_minus_1)
- : [d_minus_1] "r" (d - 1));
+ : [d_minus_1] "r" (d - 1)
+ : "cc");
#elif defined(__GNUC__)
const uint32_t l_minus_1 = 31 - __builtin_clz(d - 1);
#else
@@ -167,7 +171,11 @@ static inline struct fxdiv_divisor_uint32_t fxdiv_init_uint32_t(uint32_t d) {
uint32_t q;
__asm__("DIVL %[d]"
: "=a" (q), "+d" (u_hi)
- : [d] "r" (d), "a" (0));
+ : [d] "r" (d), "a" (0)
+ : "cc");
+ #elif (defined(_MSC_VER) && _MSC_VER >= 1920) && (defined(_M_IX86) || defined(_M_X64))
+ unsigned int remainder;
+ const uint32_t q = (uint32_t) _udiv64((unsigned __int64) ((uint64_t) u_hi << 32), (unsigned int) d, &remainder);
#else
const uint32_t q = ((uint64_t) u_hi << 32) / d;
#endif
@@ -192,13 +200,13 @@ static inline struct fxdiv_divisor_uint64_t fxdiv_init_uint64_t(uint64_t d) {
#elif defined(__CUDA_ARCH__)
const uint32_t nlz_d = __clzll((long long) d);
const uint32_t l_minus_1 = 63 - __clzll((long long) (d - 1));
- #elif defined(_MSC_VER) && defined(_M_X64)
+ #elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_ARM64))
unsigned long l_minus_1;
_BitScanReverse64(&l_minus_1, (unsigned __int64) (d - 1));
unsigned long bsr_d;
_BitScanReverse64(&bsr_d, (unsigned __int64) d);
const uint32_t nlz_d = bsr_d ^ 0x3F;
- #elif defined(_MSC_VER) && defined(_M_IX86)
+ #elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_ARM))
const uint64_t d_minus_1 = d - 1;
const uint8_t d_is_power_of_2 = (d & d_minus_1) == 0;
unsigned long l_minus_1;
@@ -213,7 +221,8 @@ static inline struct fxdiv_divisor_uint64_t fxdiv_init_uint64_t(uint64_t d) {
uint64_t l_minus_1;
__asm__("BSRQ %[d_minus_1], %[l_minus_1]"
: [l_minus_1] "=r" (l_minus_1)
- : [d_minus_1] "r" (d - 1));
+ : [d_minus_1] "r" (d - 1)
+ : "cc");
#elif defined(__GNUC__)
const uint32_t l_minus_1 = 63 - __builtin_clzll(d - 1);
const uint32_t nlz_d = __builtin_clzll(d);
@@ -221,8 +230,8 @@ static inline struct fxdiv_divisor_uint64_t fxdiv_init_uint64_t(uint64_t d) {
/* Based on Algorithm 2 from Hacker's delight */
const uint64_t d_minus_1 = d - 1;
const uint32_t d_is_power_of_2 = (d & d_minus_1) == 0;
- uint64_t l_minus_1 = 0;
- uint32_t x = d_minus_1;
+ uint32_t l_minus_1 = 0;
+ uint32_t x = (uint32_t) d_minus_1;
uint32_t y = d_minus_1 >> 32;
if (y != 0) {
l_minus_1 += 32;
@@ -260,7 +269,14 @@ static inline struct fxdiv_divisor_uint64_t fxdiv_init_uint64_t(uint64_t d) {
uint64_t q;
__asm__("DIVQ %[d]"
: "=a" (q), "+d" (u_hi)
- : [d] "r" (d), "a" (UINT64_C(0)));
+ : [d] "r" (d), "a" (UINT64_C(0))
+ : "cc");
+ #elif 0 && defined(__GNUC__) && defined(__SIZEOF_INT128__)
+ /* GCC, Clang, and Intel Compiler fail to inline optimized implementation and call into support library for 128-bit division */
+ const uint64_t q = (uint64_t) (((unsigned __int128) u_hi << 64) / ((unsigned __int128) d));
+ #elif (defined(_MSC_VER) && _MSC_VER >= 1920) && defined(_M_X64)
+ unsigned __int64 remainder;
+ const uint64_t q = (uint64_t) _udiv128((unsigned __int64) u_hi, 0, (unsigned __int64) d, &remainder);
#else
/* Implementation based on code from Hacker's delight */