#ifndef MARISA_BASE_H_ #define MARISA_BASE_H_ // Old Visual C++ does not provide stdint.h. #ifndef _MSC_VER #include #endif // _MSC_VER #ifdef __cplusplus #include #else // __cplusplus #include #endif // __cplusplus #ifdef __cplusplus extern "C" { #endif // __cplusplus #ifdef _MSC_VER typedef unsigned __int8 marisa_uint8; typedef unsigned __int16 marisa_uint16; typedef unsigned __int32 marisa_uint32; typedef unsigned __int64 marisa_uint64; #else // _MSC_VER typedef uint8_t marisa_uint8; typedef uint16_t marisa_uint16; typedef uint32_t marisa_uint32; typedef uint64_t marisa_uint64; #endif // _MSC_VER #if defined(_WIN64) || defined(__amd64__) || defined(__x86_64__) || \ defined(__ia64__) || defined(__ppc64__) || defined(__powerpc64__) || \ defined(__sparc64__) || defined(__mips64__) || defined(__aarch64__) || \ defined(__s390x__) #define MARISA_WORD_SIZE 64 #else // defined(_WIN64), etc. #define MARISA_WORD_SIZE 32 #endif // defined(_WIN64), etc. //#define MARISA_WORD_SIZE (sizeof(void *) * 8) #define MARISA_UINT8_MAX ((marisa_uint8)~(marisa_uint8)0) #define MARISA_UINT16_MAX ((marisa_uint16)~(marisa_uint16)0) #define MARISA_UINT32_MAX ((marisa_uint32)~(marisa_uint32)0) #define MARISA_UINT64_MAX ((marisa_uint64)~(marisa_uint64)0) #define MARISA_SIZE_MAX ((size_t)~(size_t)0) #define MARISA_INVALID_LINK_ID MARISA_UINT32_MAX #define MARISA_INVALID_KEY_ID MARISA_UINT32_MAX #define MARISA_INVALID_EXTRA (MARISA_UINT32_MAX >> 8) // Error codes are defined as members of marisa_error_code. This library throws // an exception with one of the error codes when an error occurs. typedef enum marisa_error_code_ { // MARISA_OK means that a requested operation has succeeded. In practice, an // exception never has MARISA_OK because it is not an error. MARISA_OK = 0, // MARISA_STATE_ERROR means that an object was not ready for a requested // operation. For example, an operation to modify a fixed vector throws an // exception with MARISA_STATE_ERROR. MARISA_STATE_ERROR = 1, // MARISA_NULL_ERROR means that an invalid NULL pointer has been given. MARISA_NULL_ERROR = 2, // MARISA_BOUND_ERROR means that an operation has tried to access an out of // range address. MARISA_BOUND_ERROR = 3, // MARISA_RANGE_ERROR means that an out of range value has appeared in // operation. MARISA_RANGE_ERROR = 4, // MARISA_CODE_ERROR means that an undefined code has appeared in operation. MARISA_CODE_ERROR = 5, // MARISA_RESET_ERROR means that a smart pointer has tried to reset itself. MARISA_RESET_ERROR = 6, // MARISA_SIZE_ERROR means that a size has exceeded a library limitation. MARISA_SIZE_ERROR = 7, // MARISA_MEMORY_ERROR means that a memory allocation has failed. MARISA_MEMORY_ERROR = 8, // MARISA_IO_ERROR means that an I/O operation has failed. MARISA_IO_ERROR = 9, // MARISA_FORMAT_ERROR means that input was in invalid format. MARISA_FORMAT_ERROR = 10, } marisa_error_code; // Min/max values, flags and masks for dictionary settings are defined below. // Please note that unspecified settings will be replaced with the default // settings. For example, 0 is equivalent to (MARISA_DEFAULT_NUM_TRIES | // MARISA_DEFAULT_TRIE | MARISA_DEFAULT_TAIL | MARISA_DEFAULT_ORDER). // A dictionary consists of 3 tries in default. Usually more tries make a // dictionary space-efficient but time-inefficient. typedef enum marisa_num_tries_ { MARISA_MIN_NUM_TRIES = 0x00001, MARISA_MAX_NUM_TRIES = 0x0007F, MARISA_DEFAULT_NUM_TRIES = 0x00003, } marisa_num_tries; // This library uses a cache technique to accelerate search functions. The // following enumerated type marisa_cache_level gives a list of available cache // size options. A larger cache enables faster search but takes a more space. typedef enum marisa_cache_level_ { MARISA_HUGE_CACHE = 0x00080, MARISA_LARGE_CACHE = 0x00100, MARISA_NORMAL_CACHE = 0x00200, MARISA_SMALL_CACHE = 0x00400, MARISA_TINY_CACHE = 0x00800, MARISA_DEFAULT_CACHE = MARISA_NORMAL_CACHE } marisa_cache_level; // This library provides 2 kinds of TAIL implementations. typedef enum marisa_tail_mode_ { // MARISA_TEXT_TAIL merges last labels as zero-terminated strings. So, it is // available if and only if the last labels do not contain a NULL character. // If MARISA_TEXT_TAIL is specified and a NULL character exists in the last // labels, the setting is automatically switched to MARISA_BINARY_TAIL. MARISA_TEXT_TAIL = 0x01000, // MARISA_BINARY_TAIL also merges last labels but as byte sequences. It uses // a bit vector to detect the end of a sequence, instead of NULL characters. // So, MARISA_BINARY_TAIL requires a larger space if the average length of // labels is greater than 8. MARISA_BINARY_TAIL = 0x02000, MARISA_DEFAULT_TAIL = MARISA_TEXT_TAIL, } marisa_tail_mode; // The arrangement of nodes affects the time cost of matching and the order of // predictive search. typedef enum marisa_node_order_ { // MARISA_LABEL_ORDER arranges nodes in ascending label order. // MARISA_LABEL_ORDER is useful if an application needs to predict keys in // label order. MARISA_LABEL_ORDER = 0x10000, // MARISA_WEIGHT_ORDER arranges nodes in descending weight order. // MARISA_WEIGHT_ORDER is generally a better choice because it enables faster // matching. MARISA_WEIGHT_ORDER = 0x20000, MARISA_DEFAULT_ORDER = MARISA_WEIGHT_ORDER, } marisa_node_order; typedef enum marisa_config_mask_ { MARISA_NUM_TRIES_MASK = 0x0007F, MARISA_CACHE_LEVEL_MASK = 0x00F80, MARISA_TAIL_MODE_MASK = 0x0F000, MARISA_NODE_ORDER_MASK = 0xF0000, MARISA_CONFIG_MASK = 0xFFFFF } marisa_config_mask; #ifdef __cplusplus } // extern "C" #endif // __cplusplus #ifdef __cplusplus // `std::swap` is in since C++ 11 but in in C++ 98: #if __cplusplus >= 201103L #include #else #include #endif namespace marisa { typedef ::marisa_uint8 UInt8; typedef ::marisa_uint16 UInt16; typedef ::marisa_uint32 UInt32; typedef ::marisa_uint64 UInt64; typedef ::marisa_error_code ErrorCode; typedef ::marisa_cache_level CacheLevel; typedef ::marisa_tail_mode TailMode; typedef ::marisa_node_order NodeOrder; using std::swap; } // namespace marisa #endif // __cplusplus #ifdef __cplusplus #include "marisa/exception.h" #include "marisa/scoped-ptr.h" #include "marisa/scoped-array.h" #endif // __cplusplus #endif // MARISA_BASE_H_