blob: f42f190980090057b79e443625f4500ac20d612c (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
|
// Copyright 2023 The PDFium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef CORE_FXCRT_UTF16_H_
#define CORE_FXCRT_UTF16_H_
#include "third_party/base/check.h"
namespace pdfium {
// The number of suffix bits in a UTF-16 surrogate.
inline constexpr int kSurrogateBits = 10;
// A bitmask for the suffix of a UTF-16 surrogate.
inline constexpr char16_t kSurrogateMask = (1 << kSurrogateBits) - 1;
// The first supplementary code point, `U+10000`.
inline constexpr char32_t kMinimumSupplementaryCodePoint = 0x10000;
// The last supplementary code point, `U+10FFFF`.
inline constexpr char32_t kMaximumSupplementaryCodePoint =
kMinimumSupplementaryCodePoint +
(kSurrogateMask << kSurrogateBits | kSurrogateMask);
// The first UTF-16 high surrogate code unit, `U+D800`.
inline constexpr char16_t kMinimumHighSurrogateCodeUnit = 0xd800;
// The last UTF-16 high surrogate code unit, `U+DBFF`.
inline constexpr char16_t kMaximumHighSurrogateCodeUnit =
kMinimumHighSurrogateCodeUnit | kSurrogateMask;
// The first UTF-16 low surrogate code unit, `U+DC00`.
inline constexpr char16_t kMinimumLowSurrogateCodeUnit =
kMaximumHighSurrogateCodeUnit + 1;
// The last UTF-16 low surrogate code unit, `U+DFFF`.
inline constexpr char16_t kMaximumLowSurrogateCodeUnit =
kMinimumLowSurrogateCodeUnit | kSurrogateMask;
// Returns `true` if `code_point` is in a supplementary plane, and therefore
// requires encoding as a UTF-16 surrogate pair.
constexpr bool IsSupplementary(char32_t code_point) {
return code_point >= kMinimumSupplementaryCodePoint &&
code_point <= kMaximumSupplementaryCodePoint;
}
// Returns `true` if `code_point` is a UTF-16 high surrogate.
constexpr bool IsHighSurrogate(char32_t code_point) {
return code_point >= kMinimumHighSurrogateCodeUnit &&
code_point <= kMaximumHighSurrogateCodeUnit;
}
// Returns `true` if `code_point` is a UTF-16 low surrogate.
constexpr bool IsLowSurrogate(char32_t code_point) {
return code_point >= kMinimumLowSurrogateCodeUnit &&
code_point <= kMaximumLowSurrogateCodeUnit;
}
// A UTF-16 surrogate pair.
class SurrogatePair final {
public:
// Constructs a surrogate pair from a high and a low surrogate.
constexpr SurrogatePair(char16_t high, char16_t low)
: high_(high), low_(low) {
DCHECK(IsHighSurrogate(high_));
DCHECK(IsLowSurrogate(low_));
}
// Constructs a surrogate pair from a code point.
explicit constexpr SurrogatePair(char32_t code_point)
: high_(GetHighSurrogate(code_point)), low_(GetLowSurrogate(code_point)) {
// This constructor initializes `high_` and `low_` using helper functions
// because C++17 requires it for `constexpr` constructors.
DCHECK(IsSupplementary(code_point));
}
constexpr char16_t high() const { return high_; }
constexpr char16_t low() const { return low_; }
// Decodes this surrogate pair to a code point.
constexpr char32_t ToCodePoint() const {
char32_t code_point = low_ & kSurrogateMask;
code_point |= (high_ & kSurrogateMask) << kSurrogateBits;
return kMinimumSupplementaryCodePoint + code_point;
}
private:
static constexpr char16_t GetHighSurrogate(char32_t code_point) {
code_point -= kMinimumSupplementaryCodePoint;
char16_t code_unit = (code_point >> kSurrogateBits) & kSurrogateMask;
return kMinimumHighSurrogateCodeUnit | code_unit;
}
static constexpr char16_t GetLowSurrogate(char32_t code_point) {
code_point -= kMinimumSupplementaryCodePoint;
char16_t code_unit = code_point & kSurrogateMask;
return kMinimumLowSurrogateCodeUnit | code_unit;
}
char16_t high_;
char16_t low_;
};
} // namespace pdfium
#endif // CORE_FXCRT_UTF16_H_
|