diff options
author | Seigo Nonaka <nona@google.com> | 2023-10-11 15:42:18 +0900 |
---|---|---|
committer | Seigo Nonaka <nona@google.com> | 2023-10-25 10:54:54 +0900 |
commit | fd8349e7a33b3db583e4ea49714c585d282fea52 (patch) | |
tree | ab301dba2d5266bf672e954662260a41bbc9b980 /libs/minikin | |
parent | 308a35adb98cc428f0a2ab022958d8fcb1710bd0 (diff) | |
download | minikin-fd8349e7a33b3db583e4ea49714c585d282fea52.tar.gz |
Add LineBreakWordStyle::Auto and LineBreakStyle::Auto to minikin
Bug: 280005585
Test: minikin_tests
Change-Id: Ib2810ff27f6cf3ff38b8b73703704ff823902a8f
Diffstat (limited to 'libs/minikin')
-rw-r--r-- | libs/minikin/FeatureFlags.h | 8 | ||||
-rw-r--r-- | libs/minikin/GreedyLineBreaker.cpp | 59 | ||||
-rw-r--r-- | libs/minikin/LineBreakerUtil.h | 87 | ||||
-rw-r--r-- | libs/minikin/Locale.cpp | 15 | ||||
-rw-r--r-- | libs/minikin/Locale.h | 6 | ||||
-rw-r--r-- | libs/minikin/MeasuredText.cpp | 2 | ||||
-rw-r--r-- | libs/minikin/OptimalLineBreaker.cpp | 42 | ||||
-rw-r--r-- | libs/minikin/WordBreaker.cpp | 10 |
8 files changed, 191 insertions, 38 deletions
diff --git a/libs/minikin/FeatureFlags.h b/libs/minikin/FeatureFlags.h index 3aa2be6..3c2e455 100644 --- a/libs/minikin/FeatureFlags.h +++ b/libs/minikin/FeatureFlags.h @@ -31,6 +31,14 @@ inline bool phrase_strict_fallback() { #endif // __ANDROID__ } +inline bool word_style_auto() { +#ifdef __ANDROID__ + return com_android_text_flags_word_style_auto(); +#else + return true; +#endif // __ANDROID__ +} + } // namespace features #endif // FEATURE_FLAGS diff --git a/libs/minikin/GreedyLineBreaker.cpp b/libs/minikin/GreedyLineBreaker.cpp index 2d89cf2..c4ceabe 100644 --- a/libs/minikin/GreedyLineBreaker.cpp +++ b/libs/minikin/GreedyLineBreaker.cpp @@ -49,10 +49,12 @@ public: mEnableHyphenation(enableHyphenation), mUseBoundsForWidth(useBoundsForWidth) {} - void process(); + void process(bool forceWordStyleAutoToPhrase); LineBreakResult getResult() const; + bool retryWithPhraseWordBreak = false; + private: struct BreakPoint { BreakPoint(uint32_t offset, float lineWidth, StartHyphenEdit startHyphen, @@ -425,27 +427,27 @@ void GreedyLineBreaker::processLineBreak(uint32_t offset, WordBreaker* breaker, } } -void GreedyLineBreaker::process() { +void GreedyLineBreaker::process(bool forceWordStyleAutoToPhrase) { WordBreaker wordBreaker; wordBreaker.setText(mTextBuf.data(), mTextBuf.size()); - // Following two will be initialized after the first iteration. - uint32_t localeListId = LocaleListCache::kInvalidListId; - LineBreakStyle lineBreakStyle; + WordBreakerTransitionTracker wbTracker; uint32_t nextWordBoundaryOffset = 0; for (const auto& run : mMeasuredText.runs) { const Range range = run->getRange(); // Update locale if necessary. - uint32_t newLocaleListId = run->getLocaleListId(); - LineBreakStyle newLineBreakStyle = run->lineBreakStyle(); - if (localeListId != newLocaleListId || lineBreakStyle != newLineBreakStyle) { - Locale locale = getEffectiveLocale(newLocaleListId); - nextWordBoundaryOffset = wordBreaker.followingWithLocale( - locale, run->lineBreakStyle(), run->lineBreakWordStyle(), range.getStart()); + if (wbTracker.update(*run)) { + const LocaleList& localeList = wbTracker.getCurrentLocaleList(); + const Locale locale = localeList.empty() ? Locale() : localeList[0]; + + LineBreakWordStyle lbWordStyle = wbTracker.getCurrentLineBreakWordStyle(); + std::tie(lbWordStyle, retryWithPhraseWordBreak) = + resolveWordStyleAuto(lbWordStyle, localeList, forceWordStyleAutoToPhrase); + + nextWordBoundaryOffset = wordBreaker.followingWithLocale(locale, run->lineBreakStyle(), + lbWordStyle, range.getStart()); mHyphenator = HyphenatorMap::lookup(locale); - localeListId = newLocaleListId; - lineBreakStyle = newLineBreakStyle; } for (uint32_t i = range.getStart(); i < range.getEnd(); ++i) { @@ -523,8 +525,35 @@ LineBreakResult breakLineGreedy(const U16StringPiece& textBuf, const MeasuredTex } GreedyLineBreaker lineBreaker(textBuf, measured, lineWidthLimits, tabStops, enableHyphenation, useBoundsForWidth); - lineBreaker.process(); - return lineBreaker.getResult(); + lineBreaker.process(false); + LineBreakResult res = lineBreaker.getResult(); + + if (!features::word_style_auto()) { + return res; + } + + // The line breaker says that retry with phrase based word break because of the auto option and + // given locales. + if (!lineBreaker.retryWithPhraseWordBreak) { + return res; + } + + // If the line break result is more than heuristics threshold, don't try pharse based word + // break. + if (res.breakPoints.size() >= LBW_AUTO_HEURISTICS_LINE_COUNT) { + return res; + } + + GreedyLineBreaker phLineBreaker(textBuf, measured, lineWidthLimits, tabStops, enableHyphenation, + useBoundsForWidth); + phLineBreaker.process(true); + LineBreakResult res2 = phLineBreaker.getResult(); + + if (res2.breakPoints.size() < LBW_AUTO_HEURISTICS_LINE_COUNT) { + return res2; + } else { + return res; + } } } // namespace minikin diff --git a/libs/minikin/LineBreakerUtil.h b/libs/minikin/LineBreakerUtil.h index b48948e..030e677 100644 --- a/libs/minikin/LineBreakerUtil.h +++ b/libs/minikin/LineBreakerUtil.h @@ -19,19 +19,21 @@ #include <vector> -#include "minikin/Hyphenator.h" -#include "minikin/MeasuredText.h" -#include "minikin/U16StringPiece.h" - #include "HyphenatorMap.h" #include "LayoutUtils.h" #include "Locale.h" #include "LocaleListCache.h" #include "MinikinInternal.h" #include "WordBreaker.h" +#include "minikin/Hyphenator.h" +#include "minikin/LineBreakStyle.h" +#include "minikin/MeasuredText.h" +#include "minikin/U16StringPiece.h" namespace minikin { +constexpr uint32_t LBW_AUTO_HEURISTICS_LINE_COUNT = 5; + // ParaWidth is used to hold cumulative width from beginning of paragraph. Note that for very large // paragraphs, accuracy could degrade using only 32-bit float. Note however that float is used // extensively on the Java side for this. This is a typedef so that we can easily change it based @@ -134,6 +136,58 @@ inline void populateHyphenationPoints( } } +// Class for tracking the word breaker transition point. +class WordBreakerTransitionTracker { +public: + // Update the word breaker transition information. This function return true if the word + // breaker need to be updated. + bool update(const Run& run) { + const uint32_t newLocaleListId = run.getLocaleListId(); + const LineBreakStyle newLineBreakStyle = run.lineBreakStyle(); + const LineBreakWordStyle newLineBreakWordStyle = run.lineBreakWordStyle(); + const bool isUpdate = localeListId != newLocaleListId || + lineBreakStyle != newLineBreakStyle || + lineBreakWordStyle != newLineBreakWordStyle; + + localeListId = newLocaleListId; + lineBreakStyle = newLineBreakStyle; + lineBreakWordStyle = newLineBreakWordStyle; + + return isUpdate; + } + + const LocaleList& getCurrentLocaleList() const { + return LocaleListCache::getById(localeListId); + } + + LineBreakStyle getCurrentLineBreakStyle() const { return lineBreakStyle; } + + LineBreakWordStyle getCurrentLineBreakWordStyle() const { return lineBreakWordStyle; } + +private: + uint32_t localeListId = LocaleListCache::kInvalidListId; + LineBreakStyle lineBreakStyle = LineBreakStyle::None; + LineBreakWordStyle lineBreakWordStyle = LineBreakWordStyle::None; +}; + +inline std::pair<LineBreakWordStyle, bool> resolveWordStyleAuto(LineBreakWordStyle lbWordStyle, + const LocaleList& localeList, + bool forceWordStyleAutoToPhrase) { + if (lbWordStyle == LineBreakWordStyle::Auto) { + if (forceWordStyleAutoToPhrase) { + return std::make_pair(LineBreakWordStyle::Phrase, false); + } else if (localeList.hasKorean()) { + return std::make_pair(LineBreakWordStyle::Phrase, false); + } else if (localeList.hasJapanese()) { + return std::make_pair(LineBreakWordStyle::None, true); + } else { + return std::make_pair(LineBreakWordStyle::None, false); + } + } else { + return std::make_pair(lbWordStyle, false); + } +} + // Processes and retrieve informations from characters in the paragraph. struct CharProcessor { // The number of spaces. @@ -166,6 +220,8 @@ struct CharProcessor { // The current hyphenator. const Hyphenator* hyphenator = nullptr; + bool retryWithPhraseWordBreak = false; + // Retrieve the current word range. inline Range wordRange() const { return breaker.wordRange(); } @@ -184,17 +240,17 @@ struct CharProcessor { // The user of CharProcessor must call updateLocaleIfNecessary with valid locale at least one // time before feeding characters. - void updateLocaleIfNecessary(const Run& run) { - uint32_t newLocaleListId = run.getLocaleListId(); - LineBreakStyle newLineBreakStyle = run.lineBreakStyle(); - if (localeListId != newLocaleListId || lineBreakStyle != newLineBreakStyle) { - Locale locale = getEffectiveLocale(newLocaleListId); - nextWordBreak = breaker.followingWithLocale(locale, run.lineBreakStyle(), - run.lineBreakWordStyle(), + void updateLocaleIfNecessary(const Run& run, bool forceWordStyleAutoToPhrase) { + if (wbTracker.update(run)) { + const LocaleList& localeList = wbTracker.getCurrentLocaleList(); + const Locale locale = localeList.empty() ? Locale() : localeList[0]; + + LineBreakWordStyle lbWordStyle = wbTracker.getCurrentLineBreakWordStyle(); + std::tie(lbWordStyle, retryWithPhraseWordBreak) = + resolveWordStyleAuto(lbWordStyle, localeList, forceWordStyleAutoToPhrase); + nextWordBreak = breaker.followingWithLocale(locale, run.lineBreakStyle(), lbWordStyle, run.getRange().getStart()); hyphenator = HyphenatorMap::lookup(locale); - localeListId = newLocaleListId; - lineBreakStyle = newLineBreakStyle; } } @@ -223,10 +279,7 @@ struct CharProcessor { } private: - // The current locale list id, line break style, line break word style. - uint32_t localeListId = LocaleListCache::kInvalidListId; - LineBreakStyle lineBreakStyle; - + WordBreakerTransitionTracker wbTracker; WordBreaker breaker; }; } // namespace minikin diff --git a/libs/minikin/Locale.cpp b/libs/minikin/Locale.cpp index a399944..f7e7323 100644 --- a/libs/minikin/Locale.cpp +++ b/libs/minikin/Locale.cpp @@ -467,6 +467,12 @@ bool Locale::supportsScript(uint32_t script) const { return supportsScript(mSubScriptBits, scriptToSubScriptBits(packedScript)); } +bool Locale::supportsScript(char c1, char c2, char c3, char c4) const { + uint32_t packedScript = packScript(c1, c2, c3, c4); + if (packedScript == mScript) return true; + return supportsScript(mSubScriptBits, scriptToSubScriptBits(packedScript)); +} + int Locale::calcScoreFor(const LocaleList& supported) const { bool languageScriptMatch = false; bool subtagMatch = false; @@ -544,4 +550,13 @@ bool LocaleList::atLeastOneScriptMatch(const LocaleList& list) const { return false; } +bool LocaleList::hasScript(char c1, char c2, char c3, char c4) const { + for (const Locale& locale : mLocales) { + if (locale.supportsScript(c1, c2, c3, c4)) { + return true; + } + } + return false; +} + } // namespace minikin diff --git a/libs/minikin/Locale.h b/libs/minikin/Locale.h index 31bd51f..a19daa9 100644 --- a/libs/minikin/Locale.h +++ b/libs/minikin/Locale.h @@ -123,6 +123,7 @@ public: // Returns true if this script supports the given script. For example, ja-Jpan supports Hira, // ja-Hira doesn't support Jpan. bool supportsScript(uint32_t script) const; + bool supportsScript(char c1, char c2, char c3, char c4) const; std::string getString() const; @@ -219,6 +220,9 @@ public: bool atLeastOneScriptMatch(const LocaleList& list) const; + bool hasJapanese() const { return hasScript('J', 'p', 'a', 'n'); } + bool hasKorean() const { return hasScript('K', 'o', 'r', 'e'); } + private: friend struct Locale; // for calcScoreFor @@ -233,6 +237,8 @@ private: uint8_t getUnionOfSubScriptBits() const { return mUnionOfSubScriptBits; } bool isAllTheSameLocale() const { return mIsAllTheSameLocale; } + bool hasScript(char c1, char c2, char c3, char c4) const; + // Do not copy and assign. LocaleList(const LocaleList&) = delete; void operator=(const LocaleList&) = delete; diff --git a/libs/minikin/MeasuredText.cpp b/libs/minikin/MeasuredText.cpp index 3ae19a0..8a37dc7 100644 --- a/libs/minikin/MeasuredText.cpp +++ b/libs/minikin/MeasuredText.cpp @@ -185,7 +185,7 @@ void MeasuredText::measure(const U16StringPiece& textBuf, bool computeHyphenatio continue; } - proc.updateLocaleIfNecessary(*run); + proc.updateLocaleIfNecessary(*run, false /* forceWordStyleAutoToPhrase */); for (uint32_t i = range.getStart(); i < range.getEnd(); ++i) { // Even if the run is not a candidate of line break, treat the end of run as the line // break candidate. diff --git a/libs/minikin/OptimalLineBreaker.cpp b/libs/minikin/OptimalLineBreaker.cpp index d3db0ec..1d11532 100644 --- a/libs/minikin/OptimalLineBreaker.cpp +++ b/libs/minikin/OptimalLineBreaker.cpp @@ -97,6 +97,8 @@ struct OptimizeContext { // fonts), it's only guaranteed to pick one. float spaceWidth = 0.0f; + bool retryWithPhraseWordBreak = false; + // Append desperate break point to the candidates. inline void pushDesperate(uint32_t offset, ParaWidth sumOfCharWidths, float score, uint32_t spaceCount, bool isRtl) { @@ -239,7 +241,7 @@ void appendWithMerging(std::vector<HyphenBreak>::const_iterator hyIter, // Enumerate all line break candidates. OptimizeContext populateCandidates(const U16StringPiece& textBuf, const MeasuredText& measured, const LineWidth& lineWidth, HyphenationFrequency frequency, - bool isJustified) { + bool isJustified, bool forceWordStyleAutoToPhrase) { const ParaWidth minLineWidth = lineWidth.getMin(); CharProcessor proc(textBuf); @@ -260,7 +262,7 @@ OptimizeContext populateCandidates(const U16StringPiece& textBuf, const Measured result.linePenalty = std::max(penalties.second, result.linePenalty); } - proc.updateLocaleIfNecessary(*run); + proc.updateLocaleIfNecessary(*run, forceWordStyleAutoToPhrase); for (uint32_t i = range.getStart(); i < range.getEnd(); ++i) { MINIKIN_ASSERT(textBuf[i] != CHAR_TAB, "TAB is not supported in optimal line breaker"); @@ -301,6 +303,7 @@ OptimizeContext populateCandidates(const U16StringPiece& textBuf, const Measured } } result.spaceWidth = proc.spaceWidth; + result.retryWithPhraseWordBreak = proc.retryWithPhraseWordBreak; return result; } @@ -484,11 +487,40 @@ LineBreakResult breakLineOptimal(const U16StringPiece& textBuf, const MeasuredTe if (textBuf.size() == 0) { return LineBreakResult(); } + const OptimizeContext context = - populateCandidates(textBuf, measured, lineWidth, frequency, justified); + populateCandidates(textBuf, measured, lineWidth, frequency, justified, + false /* forceWordStyleAutoToPhrase */); LineBreakOptimizer optimizer; - return optimizer.computeBreaks(context, textBuf, measured, lineWidth, strategy, justified, - useBoundsForWidth); + LineBreakResult res = optimizer.computeBreaks(context, textBuf, measured, lineWidth, strategy, + justified, useBoundsForWidth); + + if (!features::word_style_auto()) { + return res; + } + + // The line breaker says that retry with phrase based word break because of the auto option and + // given locales. + if (!context.retryWithPhraseWordBreak) { + return res; + } + + // If the line break result is more than heuristics threshold, don't try pharse based word + // break. + if (res.breakPoints.size() >= LBW_AUTO_HEURISTICS_LINE_COUNT) { + return res; + } + + const OptimizeContext phContext = + populateCandidates(textBuf, measured, lineWidth, frequency, justified, + true /* forceWordStyleAutoToPhrase */); + LineBreakResult res2 = optimizer.computeBreaks(phContext, textBuf, measured, lineWidth, + strategy, justified, useBoundsForWidth); + if (res2.breakPoints.size() < LBW_AUTO_HEURISTICS_LINE_COUNT) { + return res2; + } else { + return res; + } } } // namespace minikin diff --git a/libs/minikin/WordBreaker.cpp b/libs/minikin/WordBreaker.cpp index e3578b4..a1e9526 100644 --- a/libs/minikin/WordBreaker.cpp +++ b/libs/minikin/WordBreaker.cpp @@ -35,6 +35,11 @@ namespace { static std::unique_ptr<BreakIterator> createNewIterator(const Locale& locale, LineBreakStyle lbStyle, LineBreakWordStyle lbWordStyle) { + MINIKIN_ASSERT(lbStyle != LineBreakStyle::Auto, + "LineBreakStyle::Auto must be resolved beforehand."); + MINIKIN_ASSERT(lbWordStyle != LineBreakWordStyle::Auto, + "LineBreakWordStyle::Auto must be resolved beforehand."); + // TODO: handle failure status if (lbStyle == LineBreakStyle::NoBreak) { return std::make_unique<NoBreakBreakIterator>(); @@ -70,6 +75,11 @@ int32_t ICUBreakIterator::next() { ICULineBreakerPool::Slot ICULineBreakerPoolImpl::acquire(const Locale& locale, LineBreakStyle lbStyle, LineBreakWordStyle lbWordStyle) { + if (lbStyle == LineBreakStyle::Auto) { + lbStyle = locale.supportsScript('J', 'p', 'a', 'n') ? LineBreakStyle::Strict + : LineBreakStyle::None; + } + const uint64_t id = locale.getIdentifier(); std::lock_guard<std::mutex> lock(mMutex); for (auto i = mPool.begin(); i != mPool.end(); i++) { |