summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSeigo Nonaka <nona@google.com>2023-10-05 16:36:57 +0900
committerSeigo Nonaka <nona@google.com>2023-10-05 16:40:37 +0900
commit5c98310927866ab4dd8c8e07b4020b7a8bf3795e (patch)
tree19e485b0d7527b526a1809c8fb9dc171f978148d
parent86acf0d3f37a7e70556791d8cf65e089c62d0e2b (diff)
downloadminikin-5c98310927866ab4dd8c8e07b4020b7a8bf3795e.tar.gz
Add support of no break style
Bug: 283193586 Test: minikin_tests Test: atest StaticLayoutLineBreakingNoBreakSpanTest Change-Id: I20aed04583198ce1cc4c79a74c594531f6f6c00c
-rw-r--r--include/minikin/LineBreakStyle.h1
-rw-r--r--libs/minikin/GreedyLineBreaker.cpp5
-rw-r--r--libs/minikin/LineBreakerUtil.h7
-rw-r--r--libs/minikin/WordBreaker.cpp52
-rw-r--r--libs/minikin/WordBreaker.h41
-rw-r--r--tests/unittest/WordBreakerTests.cpp2
6 files changed, 87 insertions, 21 deletions
diff --git a/include/minikin/LineBreakStyle.h b/include/minikin/LineBreakStyle.h
index f474cbd..052ffeb 100644
--- a/include/minikin/LineBreakStyle.h
+++ b/include/minikin/LineBreakStyle.h
@@ -26,6 +26,7 @@ enum class LineBreakStyle : uint8_t {
Loose = 1,
Normal = 2,
Strict = 3,
+ NoBreak = 4,
};
// The line break word style(lw) of the strings.
diff --git a/libs/minikin/GreedyLineBreaker.cpp b/libs/minikin/GreedyLineBreaker.cpp
index 66e3dd9..2d89cf2 100644
--- a/libs/minikin/GreedyLineBreaker.cpp
+++ b/libs/minikin/GreedyLineBreaker.cpp
@@ -431,18 +431,21 @@ void GreedyLineBreaker::process() {
// Following two will be initialized after the first iteration.
uint32_t localeListId = LocaleListCache::kInvalidListId;
+ LineBreakStyle lineBreakStyle;
uint32_t nextWordBoundaryOffset = 0;
for (const auto& run : mMeasuredText.runs) {
const Range range = run->getRange();
// Update locale if necessary.
uint32_t newLocaleListId = run->getLocaleListId();
- if (localeListId != newLocaleListId) {
+ LineBreakStyle newLineBreakStyle = run->lineBreakStyle();
+ if (localeListId != newLocaleListId || lineBreakStyle != newLineBreakStyle) {
Locale locale = getEffectiveLocale(newLocaleListId);
nextWordBoundaryOffset = wordBreaker.followingWithLocale(
locale, run->lineBreakStyle(), run->lineBreakWordStyle(), range.getStart());
mHyphenator = HyphenatorMap::lookup(locale);
localeListId = newLocaleListId;
+ lineBreakStyle = newLineBreakStyle;
}
for (uint32_t i = range.getStart(); i < range.getEnd(); ++i) {
diff --git a/libs/minikin/LineBreakerUtil.h b/libs/minikin/LineBreakerUtil.h
index 6e572f1..b48948e 100644
--- a/libs/minikin/LineBreakerUtil.h
+++ b/libs/minikin/LineBreakerUtil.h
@@ -186,13 +186,15 @@ struct CharProcessor {
// time before feeding characters.
void updateLocaleIfNecessary(const Run& run) {
uint32_t newLocaleListId = run.getLocaleListId();
- if (localeListId != newLocaleListId) {
+ LineBreakStyle newLineBreakStyle = run.lineBreakStyle();
+ if (localeListId != newLocaleListId || lineBreakStyle != newLineBreakStyle) {
Locale locale = getEffectiveLocale(newLocaleListId);
nextWordBreak = breaker.followingWithLocale(locale, run.lineBreakStyle(),
run.lineBreakWordStyle(),
run.getRange().getStart());
hyphenator = HyphenatorMap::lookup(locale);
localeListId = newLocaleListId;
+ lineBreakStyle = newLineBreakStyle;
}
}
@@ -221,8 +223,9 @@ struct CharProcessor {
}
private:
- // The current locale list id.
+ // The current locale list id, line break style, line break word style.
uint32_t localeListId = LocaleListCache::kInvalidListId;
+ LineBreakStyle lineBreakStyle;
WordBreaker breaker;
};
diff --git a/libs/minikin/WordBreaker.cpp b/libs/minikin/WordBreaker.cpp
index ae79d3c..e3578b4 100644
--- a/libs/minikin/WordBreaker.cpp
+++ b/libs/minikin/WordBreaker.cpp
@@ -32,17 +32,41 @@
namespace minikin {
namespace {
-static UBreakIterator* createNewIterator(const Locale& locale, LineBreakStyle lbStyle,
- LineBreakWordStyle lbWordStyle) {
+static std::unique_ptr<BreakIterator> createNewIterator(const Locale& locale,
+ LineBreakStyle lbStyle,
+ LineBreakWordStyle lbWordStyle) {
// TODO: handle failure status
- UErrorCode status = U_ZERO_ERROR;
- char localeID[ULOC_FULLNAME_CAPACITY] = {};
- uloc_forLanguageTag(locale.getStringWithLineBreakOption(lbStyle, lbWordStyle).c_str(), localeID,
- ULOC_FULLNAME_CAPACITY, nullptr, &status);
- return ubrk_open(UBreakIteratorType::UBRK_LINE, localeID, nullptr, 0, &status);
+ if (lbStyle == LineBreakStyle::NoBreak) {
+ return std::make_unique<NoBreakBreakIterator>();
+ } else {
+ UErrorCode status = U_ZERO_ERROR;
+ char localeID[ULOC_FULLNAME_CAPACITY] = {};
+ uloc_forLanguageTag(locale.getStringWithLineBreakOption(lbStyle, lbWordStyle).c_str(),
+ localeID, ULOC_FULLNAME_CAPACITY, nullptr, &status);
+ IcuUbrkUniquePtr icuBrkPtr(
+ ubrk_open(UBreakIteratorType::UBRK_LINE, localeID, nullptr, 0, &status));
+ return std::make_unique<ICUBreakIterator>(std::move(icuBrkPtr));
+ }
}
} // namespace
+void ICUBreakIterator::setText(UText* text, size_t) {
+ UErrorCode status = U_ZERO_ERROR;
+ ubrk_setUText(mBreaker.get(), text, &status);
+}
+
+bool ICUBreakIterator::isBoundary(int32_t i) {
+ return ubrk_isBoundary(mBreaker.get(), i);
+}
+
+int32_t ICUBreakIterator::following(size_t i) {
+ return ubrk_following(mBreaker.get(), i);
+}
+
+int32_t ICUBreakIterator::next() {
+ return ubrk_next(mBreaker.get());
+}
+
ICULineBreakerPool::Slot ICULineBreakerPoolImpl::acquire(const Locale& locale,
LineBreakStyle lbStyle,
LineBreakWordStyle lbWordStyle) {
@@ -57,8 +81,7 @@ ICULineBreakerPool::Slot ICULineBreakerPoolImpl::acquire(const Locale& locale,
}
// Not found in pool. Create new one.
- return {id, lbStyle, lbWordStyle,
- IcuUbrkUniquePtr(createNewIterator(locale, lbStyle, lbWordStyle))};
+ return {id, lbStyle, lbWordStyle, createNewIterator(locale, lbStyle, lbWordStyle)};
}
void ICULineBreakerPoolImpl::release(ICULineBreakerPool::Slot&& slot) {
@@ -86,10 +109,9 @@ ssize_t WordBreaker::followingWithLocale(const Locale& locale, LineBreakStyle lb
return mCurrent;
}
mIcuBreaker = mPool->acquire(locale, lbStyle, lbWordStyle);
- UErrorCode status = U_ZERO_ERROR;
MINIKIN_ASSERT(mText != nullptr, "setText must be called first");
// TODO: handle failure status
- ubrk_setUText(mIcuBreaker.breaker.get(), mUText.get(), &status);
+ mIcuBreaker.breaker->setText(mUText.get(), mTextSize);
if (mInEmailOrUrl) {
// Note:
// Don't reset mCurrent, mLast, or mScanOffset for keeping email/URL context.
@@ -171,9 +193,9 @@ static bool isValidBreak(const uint16_t* buf, size_t bufEnd, int32_t i) {
// Customized iteratorNext that takes care of both resets and our modifications
// to ICU's behavior.
int32_t WordBreaker::iteratorNext() {
- int32_t result = ubrk_following(mIcuBreaker.breaker.get(), mCurrent);
+ int32_t result = mIcuBreaker.breaker->following(mCurrent);
while (!isValidBreak(mText, mTextSize, result)) {
- result = ubrk_next(mIcuBreaker.breaker.get());
+ result = mIcuBreaker.breaker->next();
}
return result;
}
@@ -221,11 +243,11 @@ void WordBreaker::detectEmailOrUrl() {
}
}
if (state == SAW_AT || state == SAW_COLON_SLASH_SLASH) {
- if (!ubrk_isBoundary(mIcuBreaker.breaker.get(), i)) {
+ if (!mIcuBreaker.breaker->isBoundary(i)) {
// If there are combining marks or such at the end of the URL or the email address,
// consider them a part of the URL or the email, and skip to the next actual
// boundary.
- i = ubrk_following(mIcuBreaker.breaker.get(), i);
+ i = mIcuBreaker.breaker->following(i);
}
mInEmailOrUrl = true;
} else {
diff --git a/libs/minikin/WordBreaker.h b/libs/minikin/WordBreaker.h
index c4af635..ccb3186 100644
--- a/libs/minikin/WordBreaker.h
+++ b/libs/minikin/WordBreaker.h
@@ -37,6 +37,16 @@
namespace minikin {
+class BreakIterator {
+public:
+ BreakIterator() {}
+ virtual ~BreakIterator() {}
+ virtual void setText(UText* text, size_t size) = 0;
+ virtual bool isBoundary(int32_t i) = 0;
+ virtual int32_t following(size_t i) = 0;
+ virtual int32_t next() = 0;
+};
+
// A class interface for providing pooling implementation of ICU's line breaker.
// The implementation can be customized for testing purposes.
class ICULineBreakerPool {
@@ -44,7 +54,7 @@ public:
struct Slot {
Slot() : localeId(0), breaker(nullptr) {}
Slot(uint64_t localeId, LineBreakStyle lbStyle, LineBreakWordStyle lbWordStyle,
- IcuUbrkUniquePtr&& breaker)
+ std::unique_ptr<BreakIterator>&& breaker)
: localeId(localeId),
lbStyle(lbStyle),
lbWordStyle(lbWordStyle),
@@ -60,7 +70,7 @@ public:
uint64_t localeId;
LineBreakStyle lbStyle;
LineBreakWordStyle lbWordStyle;
- IcuUbrkUniquePtr breaker;
+ std::unique_ptr<BreakIterator> breaker;
};
virtual ~ICULineBreakerPool() {}
virtual Slot acquire(const Locale& locale, LineBreakStyle lbStyle,
@@ -95,6 +105,33 @@ private:
mutable std::mutex mMutex;
};
+class ICUBreakIterator : public BreakIterator {
+public:
+ ICUBreakIterator(IcuUbrkUniquePtr&& breaker) : mBreaker(std::move(breaker)) {}
+ virtual ~ICUBreakIterator() {}
+ virtual void setText(UText* text, size_t size);
+ virtual bool isBoundary(int32_t i);
+ virtual int32_t following(size_t i);
+ virtual int32_t next();
+
+private:
+ IcuUbrkUniquePtr mBreaker;
+};
+
+class NoBreakBreakIterator : public BreakIterator {
+public:
+ NoBreakBreakIterator() {}
+ virtual ~NoBreakBreakIterator() {}
+
+ virtual void setText(UText*, size_t size) { mSize = size; }
+ virtual bool isBoundary(int32_t i) { return i == 0 || i == static_cast<int32_t>(mSize); }
+ virtual int32_t following(size_t) { return mSize; }
+ virtual int32_t next() { return mSize; }
+
+private:
+ size_t mSize = 0;
+};
+
class WordBreaker {
public:
virtual ~WordBreaker() { finish(); }
diff --git a/tests/unittest/WordBreakerTests.cpp b/tests/unittest/WordBreakerTests.cpp
index 0c20a80..fe7f953 100644
--- a/tests/unittest/WordBreakerTests.cpp
+++ b/tests/unittest/WordBreakerTests.cpp
@@ -672,7 +672,7 @@ TEST(WordBreakerTest, LineBreakerPool_acquire_with_release) {
pool.acquire(enUS, LineBreakStyle::Loose, LineBreakWordStyle::None);
uint64_t enUSBreakerLocaleId = enUSBreaker.localeId;
- UBreakIterator* enUSBreakerPtr = enUSBreaker.breaker.get();
+ auto* enUSBreakerPtr = enUSBreaker.breaker.get();
pool.release(std::move(enUSBreaker));
EXPECT_EQ(nullptr, enUSBreaker.breaker.get());