summaryrefslogtreecommitdiff
path: root/libs/minikin
diff options
context:
space:
mode:
authorSeigo Nonaka <nona@google.com>2023-10-11 15:42:18 +0900
committerSeigo Nonaka <nona@google.com>2023-10-25 10:54:54 +0900
commitfd8349e7a33b3db583e4ea49714c585d282fea52 (patch)
treeab301dba2d5266bf672e954662260a41bbc9b980 /libs/minikin
parent308a35adb98cc428f0a2ab022958d8fcb1710bd0 (diff)
downloadminikin-fd8349e7a33b3db583e4ea49714c585d282fea52.tar.gz
Add LineBreakWordStyle::Auto and LineBreakStyle::Auto to minikin
Bug: 280005585 Test: minikin_tests Change-Id: Ib2810ff27f6cf3ff38b8b73703704ff823902a8f
Diffstat (limited to 'libs/minikin')
-rw-r--r--libs/minikin/FeatureFlags.h8
-rw-r--r--libs/minikin/GreedyLineBreaker.cpp59
-rw-r--r--libs/minikin/LineBreakerUtil.h87
-rw-r--r--libs/minikin/Locale.cpp15
-rw-r--r--libs/minikin/Locale.h6
-rw-r--r--libs/minikin/MeasuredText.cpp2
-rw-r--r--libs/minikin/OptimalLineBreaker.cpp42
-rw-r--r--libs/minikin/WordBreaker.cpp10
8 files changed, 191 insertions, 38 deletions
diff --git a/libs/minikin/FeatureFlags.h b/libs/minikin/FeatureFlags.h
index 3aa2be6..3c2e455 100644
--- a/libs/minikin/FeatureFlags.h
+++ b/libs/minikin/FeatureFlags.h
@@ -31,6 +31,14 @@ inline bool phrase_strict_fallback() {
#endif // __ANDROID__
}
+inline bool word_style_auto() {
+#ifdef __ANDROID__
+ return com_android_text_flags_word_style_auto();
+#else
+ return true;
+#endif // __ANDROID__
+}
+
} // namespace features
#endif // FEATURE_FLAGS
diff --git a/libs/minikin/GreedyLineBreaker.cpp b/libs/minikin/GreedyLineBreaker.cpp
index 2d89cf2..c4ceabe 100644
--- a/libs/minikin/GreedyLineBreaker.cpp
+++ b/libs/minikin/GreedyLineBreaker.cpp
@@ -49,10 +49,12 @@ public:
mEnableHyphenation(enableHyphenation),
mUseBoundsForWidth(useBoundsForWidth) {}
- void process();
+ void process(bool forceWordStyleAutoToPhrase);
LineBreakResult getResult() const;
+ bool retryWithPhraseWordBreak = false;
+
private:
struct BreakPoint {
BreakPoint(uint32_t offset, float lineWidth, StartHyphenEdit startHyphen,
@@ -425,27 +427,27 @@ void GreedyLineBreaker::processLineBreak(uint32_t offset, WordBreaker* breaker,
}
}
-void GreedyLineBreaker::process() {
+void GreedyLineBreaker::process(bool forceWordStyleAutoToPhrase) {
WordBreaker wordBreaker;
wordBreaker.setText(mTextBuf.data(), mTextBuf.size());
- // Following two will be initialized after the first iteration.
- uint32_t localeListId = LocaleListCache::kInvalidListId;
- LineBreakStyle lineBreakStyle;
+ WordBreakerTransitionTracker wbTracker;
uint32_t nextWordBoundaryOffset = 0;
for (const auto& run : mMeasuredText.runs) {
const Range range = run->getRange();
// Update locale if necessary.
- uint32_t newLocaleListId = run->getLocaleListId();
- LineBreakStyle newLineBreakStyle = run->lineBreakStyle();
- if (localeListId != newLocaleListId || lineBreakStyle != newLineBreakStyle) {
- Locale locale = getEffectiveLocale(newLocaleListId);
- nextWordBoundaryOffset = wordBreaker.followingWithLocale(
- locale, run->lineBreakStyle(), run->lineBreakWordStyle(), range.getStart());
+ if (wbTracker.update(*run)) {
+ const LocaleList& localeList = wbTracker.getCurrentLocaleList();
+ const Locale locale = localeList.empty() ? Locale() : localeList[0];
+
+ LineBreakWordStyle lbWordStyle = wbTracker.getCurrentLineBreakWordStyle();
+ std::tie(lbWordStyle, retryWithPhraseWordBreak) =
+ resolveWordStyleAuto(lbWordStyle, localeList, forceWordStyleAutoToPhrase);
+
+ nextWordBoundaryOffset = wordBreaker.followingWithLocale(locale, run->lineBreakStyle(),
+ lbWordStyle, range.getStart());
mHyphenator = HyphenatorMap::lookup(locale);
- localeListId = newLocaleListId;
- lineBreakStyle = newLineBreakStyle;
}
for (uint32_t i = range.getStart(); i < range.getEnd(); ++i) {
@@ -523,8 +525,35 @@ LineBreakResult breakLineGreedy(const U16StringPiece& textBuf, const MeasuredTex
}
GreedyLineBreaker lineBreaker(textBuf, measured, lineWidthLimits, tabStops, enableHyphenation,
useBoundsForWidth);
- lineBreaker.process();
- return lineBreaker.getResult();
+ lineBreaker.process(false);
+ LineBreakResult res = lineBreaker.getResult();
+
+ if (!features::word_style_auto()) {
+ return res;
+ }
+
+ // The line breaker says that retry with phrase based word break because of the auto option and
+ // given locales.
+ if (!lineBreaker.retryWithPhraseWordBreak) {
+ return res;
+ }
+
+ // If the line break result is more than heuristics threshold, don't try pharse based word
+ // break.
+ if (res.breakPoints.size() >= LBW_AUTO_HEURISTICS_LINE_COUNT) {
+ return res;
+ }
+
+ GreedyLineBreaker phLineBreaker(textBuf, measured, lineWidthLimits, tabStops, enableHyphenation,
+ useBoundsForWidth);
+ phLineBreaker.process(true);
+ LineBreakResult res2 = phLineBreaker.getResult();
+
+ if (res2.breakPoints.size() < LBW_AUTO_HEURISTICS_LINE_COUNT) {
+ return res2;
+ } else {
+ return res;
+ }
}
} // namespace minikin
diff --git a/libs/minikin/LineBreakerUtil.h b/libs/minikin/LineBreakerUtil.h
index b48948e..030e677 100644
--- a/libs/minikin/LineBreakerUtil.h
+++ b/libs/minikin/LineBreakerUtil.h
@@ -19,19 +19,21 @@
#include <vector>
-#include "minikin/Hyphenator.h"
-#include "minikin/MeasuredText.h"
-#include "minikin/U16StringPiece.h"
-
#include "HyphenatorMap.h"
#include "LayoutUtils.h"
#include "Locale.h"
#include "LocaleListCache.h"
#include "MinikinInternal.h"
#include "WordBreaker.h"
+#include "minikin/Hyphenator.h"
+#include "minikin/LineBreakStyle.h"
+#include "minikin/MeasuredText.h"
+#include "minikin/U16StringPiece.h"
namespace minikin {
+constexpr uint32_t LBW_AUTO_HEURISTICS_LINE_COUNT = 5;
+
// ParaWidth is used to hold cumulative width from beginning of paragraph. Note that for very large
// paragraphs, accuracy could degrade using only 32-bit float. Note however that float is used
// extensively on the Java side for this. This is a typedef so that we can easily change it based
@@ -134,6 +136,58 @@ inline void populateHyphenationPoints(
}
}
+// Class for tracking the word breaker transition point.
+class WordBreakerTransitionTracker {
+public:
+ // Update the word breaker transition information. This function return true if the word
+ // breaker need to be updated.
+ bool update(const Run& run) {
+ const uint32_t newLocaleListId = run.getLocaleListId();
+ const LineBreakStyle newLineBreakStyle = run.lineBreakStyle();
+ const LineBreakWordStyle newLineBreakWordStyle = run.lineBreakWordStyle();
+ const bool isUpdate = localeListId != newLocaleListId ||
+ lineBreakStyle != newLineBreakStyle ||
+ lineBreakWordStyle != newLineBreakWordStyle;
+
+ localeListId = newLocaleListId;
+ lineBreakStyle = newLineBreakStyle;
+ lineBreakWordStyle = newLineBreakWordStyle;
+
+ return isUpdate;
+ }
+
+ const LocaleList& getCurrentLocaleList() const {
+ return LocaleListCache::getById(localeListId);
+ }
+
+ LineBreakStyle getCurrentLineBreakStyle() const { return lineBreakStyle; }
+
+ LineBreakWordStyle getCurrentLineBreakWordStyle() const { return lineBreakWordStyle; }
+
+private:
+ uint32_t localeListId = LocaleListCache::kInvalidListId;
+ LineBreakStyle lineBreakStyle = LineBreakStyle::None;
+ LineBreakWordStyle lineBreakWordStyle = LineBreakWordStyle::None;
+};
+
+inline std::pair<LineBreakWordStyle, bool> resolveWordStyleAuto(LineBreakWordStyle lbWordStyle,
+ const LocaleList& localeList,
+ bool forceWordStyleAutoToPhrase) {
+ if (lbWordStyle == LineBreakWordStyle::Auto) {
+ if (forceWordStyleAutoToPhrase) {
+ return std::make_pair(LineBreakWordStyle::Phrase, false);
+ } else if (localeList.hasKorean()) {
+ return std::make_pair(LineBreakWordStyle::Phrase, false);
+ } else if (localeList.hasJapanese()) {
+ return std::make_pair(LineBreakWordStyle::None, true);
+ } else {
+ return std::make_pair(LineBreakWordStyle::None, false);
+ }
+ } else {
+ return std::make_pair(lbWordStyle, false);
+ }
+}
+
// Processes and retrieve informations from characters in the paragraph.
struct CharProcessor {
// The number of spaces.
@@ -166,6 +220,8 @@ struct CharProcessor {
// The current hyphenator.
const Hyphenator* hyphenator = nullptr;
+ bool retryWithPhraseWordBreak = false;
+
// Retrieve the current word range.
inline Range wordRange() const { return breaker.wordRange(); }
@@ -184,17 +240,17 @@ struct CharProcessor {
// The user of CharProcessor must call updateLocaleIfNecessary with valid locale at least one
// time before feeding characters.
- void updateLocaleIfNecessary(const Run& run) {
- uint32_t newLocaleListId = run.getLocaleListId();
- LineBreakStyle newLineBreakStyle = run.lineBreakStyle();
- if (localeListId != newLocaleListId || lineBreakStyle != newLineBreakStyle) {
- Locale locale = getEffectiveLocale(newLocaleListId);
- nextWordBreak = breaker.followingWithLocale(locale, run.lineBreakStyle(),
- run.lineBreakWordStyle(),
+ void updateLocaleIfNecessary(const Run& run, bool forceWordStyleAutoToPhrase) {
+ if (wbTracker.update(run)) {
+ const LocaleList& localeList = wbTracker.getCurrentLocaleList();
+ const Locale locale = localeList.empty() ? Locale() : localeList[0];
+
+ LineBreakWordStyle lbWordStyle = wbTracker.getCurrentLineBreakWordStyle();
+ std::tie(lbWordStyle, retryWithPhraseWordBreak) =
+ resolveWordStyleAuto(lbWordStyle, localeList, forceWordStyleAutoToPhrase);
+ nextWordBreak = breaker.followingWithLocale(locale, run.lineBreakStyle(), lbWordStyle,
run.getRange().getStart());
hyphenator = HyphenatorMap::lookup(locale);
- localeListId = newLocaleListId;
- lineBreakStyle = newLineBreakStyle;
}
}
@@ -223,10 +279,7 @@ struct CharProcessor {
}
private:
- // The current locale list id, line break style, line break word style.
- uint32_t localeListId = LocaleListCache::kInvalidListId;
- LineBreakStyle lineBreakStyle;
-
+ WordBreakerTransitionTracker wbTracker;
WordBreaker breaker;
};
} // namespace minikin
diff --git a/libs/minikin/Locale.cpp b/libs/minikin/Locale.cpp
index a399944..f7e7323 100644
--- a/libs/minikin/Locale.cpp
+++ b/libs/minikin/Locale.cpp
@@ -467,6 +467,12 @@ bool Locale::supportsScript(uint32_t script) const {
return supportsScript(mSubScriptBits, scriptToSubScriptBits(packedScript));
}
+bool Locale::supportsScript(char c1, char c2, char c3, char c4) const {
+ uint32_t packedScript = packScript(c1, c2, c3, c4);
+ if (packedScript == mScript) return true;
+ return supportsScript(mSubScriptBits, scriptToSubScriptBits(packedScript));
+}
+
int Locale::calcScoreFor(const LocaleList& supported) const {
bool languageScriptMatch = false;
bool subtagMatch = false;
@@ -544,4 +550,13 @@ bool LocaleList::atLeastOneScriptMatch(const LocaleList& list) const {
return false;
}
+bool LocaleList::hasScript(char c1, char c2, char c3, char c4) const {
+ for (const Locale& locale : mLocales) {
+ if (locale.supportsScript(c1, c2, c3, c4)) {
+ return true;
+ }
+ }
+ return false;
+}
+
} // namespace minikin
diff --git a/libs/minikin/Locale.h b/libs/minikin/Locale.h
index 31bd51f..a19daa9 100644
--- a/libs/minikin/Locale.h
+++ b/libs/minikin/Locale.h
@@ -123,6 +123,7 @@ public:
// Returns true if this script supports the given script. For example, ja-Jpan supports Hira,
// ja-Hira doesn't support Jpan.
bool supportsScript(uint32_t script) const;
+ bool supportsScript(char c1, char c2, char c3, char c4) const;
std::string getString() const;
@@ -219,6 +220,9 @@ public:
bool atLeastOneScriptMatch(const LocaleList& list) const;
+ bool hasJapanese() const { return hasScript('J', 'p', 'a', 'n'); }
+ bool hasKorean() const { return hasScript('K', 'o', 'r', 'e'); }
+
private:
friend struct Locale; // for calcScoreFor
@@ -233,6 +237,8 @@ private:
uint8_t getUnionOfSubScriptBits() const { return mUnionOfSubScriptBits; }
bool isAllTheSameLocale() const { return mIsAllTheSameLocale; }
+ bool hasScript(char c1, char c2, char c3, char c4) const;
+
// Do not copy and assign.
LocaleList(const LocaleList&) = delete;
void operator=(const LocaleList&) = delete;
diff --git a/libs/minikin/MeasuredText.cpp b/libs/minikin/MeasuredText.cpp
index 3ae19a0..8a37dc7 100644
--- a/libs/minikin/MeasuredText.cpp
+++ b/libs/minikin/MeasuredText.cpp
@@ -185,7 +185,7 @@ void MeasuredText::measure(const U16StringPiece& textBuf, bool computeHyphenatio
continue;
}
- proc.updateLocaleIfNecessary(*run);
+ proc.updateLocaleIfNecessary(*run, false /* forceWordStyleAutoToPhrase */);
for (uint32_t i = range.getStart(); i < range.getEnd(); ++i) {
// Even if the run is not a candidate of line break, treat the end of run as the line
// break candidate.
diff --git a/libs/minikin/OptimalLineBreaker.cpp b/libs/minikin/OptimalLineBreaker.cpp
index d3db0ec..1d11532 100644
--- a/libs/minikin/OptimalLineBreaker.cpp
+++ b/libs/minikin/OptimalLineBreaker.cpp
@@ -97,6 +97,8 @@ struct OptimizeContext {
// fonts), it's only guaranteed to pick one.
float spaceWidth = 0.0f;
+ bool retryWithPhraseWordBreak = false;
+
// Append desperate break point to the candidates.
inline void pushDesperate(uint32_t offset, ParaWidth sumOfCharWidths, float score,
uint32_t spaceCount, bool isRtl) {
@@ -239,7 +241,7 @@ void appendWithMerging(std::vector<HyphenBreak>::const_iterator hyIter,
// Enumerate all line break candidates.
OptimizeContext populateCandidates(const U16StringPiece& textBuf, const MeasuredText& measured,
const LineWidth& lineWidth, HyphenationFrequency frequency,
- bool isJustified) {
+ bool isJustified, bool forceWordStyleAutoToPhrase) {
const ParaWidth minLineWidth = lineWidth.getMin();
CharProcessor proc(textBuf);
@@ -260,7 +262,7 @@ OptimizeContext populateCandidates(const U16StringPiece& textBuf, const Measured
result.linePenalty = std::max(penalties.second, result.linePenalty);
}
- proc.updateLocaleIfNecessary(*run);
+ proc.updateLocaleIfNecessary(*run, forceWordStyleAutoToPhrase);
for (uint32_t i = range.getStart(); i < range.getEnd(); ++i) {
MINIKIN_ASSERT(textBuf[i] != CHAR_TAB, "TAB is not supported in optimal line breaker");
@@ -301,6 +303,7 @@ OptimizeContext populateCandidates(const U16StringPiece& textBuf, const Measured
}
}
result.spaceWidth = proc.spaceWidth;
+ result.retryWithPhraseWordBreak = proc.retryWithPhraseWordBreak;
return result;
}
@@ -484,11 +487,40 @@ LineBreakResult breakLineOptimal(const U16StringPiece& textBuf, const MeasuredTe
if (textBuf.size() == 0) {
return LineBreakResult();
}
+
const OptimizeContext context =
- populateCandidates(textBuf, measured, lineWidth, frequency, justified);
+ populateCandidates(textBuf, measured, lineWidth, frequency, justified,
+ false /* forceWordStyleAutoToPhrase */);
LineBreakOptimizer optimizer;
- return optimizer.computeBreaks(context, textBuf, measured, lineWidth, strategy, justified,
- useBoundsForWidth);
+ LineBreakResult res = optimizer.computeBreaks(context, textBuf, measured, lineWidth, strategy,
+ justified, useBoundsForWidth);
+
+ if (!features::word_style_auto()) {
+ return res;
+ }
+
+ // The line breaker says that retry with phrase based word break because of the auto option and
+ // given locales.
+ if (!context.retryWithPhraseWordBreak) {
+ return res;
+ }
+
+ // If the line break result is more than heuristics threshold, don't try pharse based word
+ // break.
+ if (res.breakPoints.size() >= LBW_AUTO_HEURISTICS_LINE_COUNT) {
+ return res;
+ }
+
+ const OptimizeContext phContext =
+ populateCandidates(textBuf, measured, lineWidth, frequency, justified,
+ true /* forceWordStyleAutoToPhrase */);
+ LineBreakResult res2 = optimizer.computeBreaks(phContext, textBuf, measured, lineWidth,
+ strategy, justified, useBoundsForWidth);
+ if (res2.breakPoints.size() < LBW_AUTO_HEURISTICS_LINE_COUNT) {
+ return res2;
+ } else {
+ return res;
+ }
}
} // namespace minikin
diff --git a/libs/minikin/WordBreaker.cpp b/libs/minikin/WordBreaker.cpp
index e3578b4..a1e9526 100644
--- a/libs/minikin/WordBreaker.cpp
+++ b/libs/minikin/WordBreaker.cpp
@@ -35,6 +35,11 @@ namespace {
static std::unique_ptr<BreakIterator> createNewIterator(const Locale& locale,
LineBreakStyle lbStyle,
LineBreakWordStyle lbWordStyle) {
+ MINIKIN_ASSERT(lbStyle != LineBreakStyle::Auto,
+ "LineBreakStyle::Auto must be resolved beforehand.");
+ MINIKIN_ASSERT(lbWordStyle != LineBreakWordStyle::Auto,
+ "LineBreakWordStyle::Auto must be resolved beforehand.");
+
// TODO: handle failure status
if (lbStyle == LineBreakStyle::NoBreak) {
return std::make_unique<NoBreakBreakIterator>();
@@ -70,6 +75,11 @@ int32_t ICUBreakIterator::next() {
ICULineBreakerPool::Slot ICULineBreakerPoolImpl::acquire(const Locale& locale,
LineBreakStyle lbStyle,
LineBreakWordStyle lbWordStyle) {
+ if (lbStyle == LineBreakStyle::Auto) {
+ lbStyle = locale.supportsScript('J', 'p', 'a', 'n') ? LineBreakStyle::Strict
+ : LineBreakStyle::None;
+ }
+
const uint64_t id = locale.getIdentifier();
std::lock_guard<std::mutex> lock(mMutex);
for (auto i = mPool.begin(); i != mPool.end(); i++) {