diff options
author | roubert@google.com <roubert@google.com@38ededc0-08b8-5190-f2ac-b31f878777ad> | 2014-02-18 20:06:37 +0000 |
---|---|---|
committer | roubert@google.com <roubert@google.com@38ededc0-08b8-5190-f2ac-b31f878777ad> | 2014-02-18 20:06:37 +0000 |
commit | ff46a74157ff7b73490fd1fb909d6151fcafa353 (patch) | |
tree | f177c140037e3da28b439055c9380b8f974a89e5 /cpp/src | |
parent | 7af2afbf8abd201b5cad73ca1275e1b03d91efca (diff) | |
download | src-ff46a74157ff7b73490fd1fb909d6151fcafa353.tar.gz |
Integrate SplitString into libaddressinput.
This patch removes unnecessary functions and dependencies from SplitString
and includes it in the build of libaddressinput.
BUG=327046
Review URL: https://codereview.chromium.org/113493002
git-svn-id: http://libaddressinput.googlecode.com/svn/trunk@179 38ededc0-08b8-5190-f2ac-b31f878777ad
Diffstat (limited to 'cpp/src')
-rw-r--r-- | cpp/src/util/string_split.cc | 216 | ||||
-rw-r--r-- | cpp/src/util/string_split.h | 79 |
2 files changed, 32 insertions, 263 deletions
diff --git a/cpp/src/util/string_split.cc b/cpp/src/util/string_split.cc index b586b7f..114cd92 100644 --- a/cpp/src/util/string_split.cc +++ b/cpp/src/util/string_split.cc @@ -1,219 +1,37 @@ // Copyright 2013 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. +// +// The original source code is from: +// http://src.chromium.org/viewvc/chrome/trunk/src/base/strings/string_split.cc?revision=216633 -#include "base/strings/string_split.h" +#include "string_split.h" -#include "base/logging.h" -#include "base/strings/string_util.h" -#include "base/strings/utf_string_conversions.h" -#include "base/third_party/icu/icu_utf.h" +#include <cassert> +#include <cstddef> +#include <string> +#include <vector> -namespace base { +namespace i18n { +namespace addressinput { -template<typename STR> -static void SplitStringT(const STR& str, - const typename STR::value_type s, - bool trim_whitespace, - std::vector<STR>* r) { +void SplitString(const std::string& str, char s, std::vector<std::string>* r) { + assert(r != NULL); r->clear(); size_t last = 0; size_t c = str.size(); for (size_t i = 0; i <= c; ++i) { if (i == c || str[i] == s) { - STR tmp(str, last, i - last); - if (trim_whitespace) - TrimWhitespace(tmp, TRIM_ALL, &tmp); + std::string tmp(str, last, i - last); // Avoid converting an empty or all-whitespace source string into a vector // of one empty string. - if (i != c || !r->empty() || !tmp.empty()) + if (i != c || !r->empty() || !tmp.empty()) { r->push_back(tmp); + } last = i + 1; } } } -void SplitString(const string16& str, - char16 c, - std::vector<string16>* r) { - DCHECK(CBU16_IS_SINGLE(c)); - SplitStringT(str, c, true, r); -} - -void SplitString(const std::string& str, - char c, - std::vector<std::string>* r) { -#if CHAR_MIN < 0 - DCHECK(c >= 0); -#endif - DCHECK(c < 0x7F); - SplitStringT(str, c, true, r); -} - -bool SplitStringIntoKeyValues( - const std::string& line, - char key_value_delimiter, - std::string* key, std::vector<std::string>* values) { - key->clear(); - values->clear(); - - // Find the key string. - size_t end_key_pos = line.find_first_of(key_value_delimiter); - if (end_key_pos == std::string::npos) { - DVLOG(1) << "cannot parse key from line: " << line; - return false; // no key - } - key->assign(line, 0, end_key_pos); - - // Find the values string. - std::string remains(line, end_key_pos, line.size() - end_key_pos); - size_t begin_values_pos = remains.find_first_not_of(key_value_delimiter); - if (begin_values_pos == std::string::npos) { - DVLOG(1) << "cannot parse value from line: " << line; - return false; // no value - } - std::string values_string(remains, begin_values_pos, - remains.size() - begin_values_pos); - - // Construct the values vector. - values->push_back(values_string); - return true; -} - -bool SplitStringIntoKeyValuePairs(const std::string& line, - char key_value_delimiter, - char key_value_pair_delimiter, - StringPairs* key_value_pairs) { - key_value_pairs->clear(); - - std::vector<std::string> pairs; - SplitString(line, key_value_pair_delimiter, &pairs); - - bool success = true; - for (size_t i = 0; i < pairs.size(); ++i) { - // Empty pair. SplitStringIntoKeyValues is more strict about an empty pair - // line, so continue with the next pair. - if (pairs[i].empty()) - continue; - - std::string key; - std::vector<std::string> value; - if (!SplitStringIntoKeyValues(pairs[i], - key_value_delimiter, - &key, &value)) { - // Don't return here, to allow for keys without associated - // values; just record that our split failed. - success = false; - } - DCHECK_LE(value.size(), 1U); - key_value_pairs->push_back( - make_pair(key, value.empty() ? std::string() : value[0])); - } - return success; -} - -template <typename STR> -static void SplitStringUsingSubstrT(const STR& str, - const STR& s, - std::vector<STR>* r) { - r->clear(); - typename STR::size_type begin_index = 0; - while (true) { - const typename STR::size_type end_index = str.find(s, begin_index); - if (end_index == STR::npos) { - const STR term = str.substr(begin_index); - STR tmp; - TrimWhitespace(term, TRIM_ALL, &tmp); - r->push_back(tmp); - return; - } - const STR term = str.substr(begin_index, end_index - begin_index); - STR tmp; - TrimWhitespace(term, TRIM_ALL, &tmp); - r->push_back(tmp); - begin_index = end_index + s.size(); - } -} - -void SplitStringUsingSubstr(const string16& str, - const string16& s, - std::vector<string16>* r) { - SplitStringUsingSubstrT(str, s, r); -} - -void SplitStringUsingSubstr(const std::string& str, - const std::string& s, - std::vector<std::string>* r) { - SplitStringUsingSubstrT(str, s, r); -} - -void SplitStringDontTrim(const string16& str, - char16 c, - std::vector<string16>* r) { - DCHECK(CBU16_IS_SINGLE(c)); - SplitStringT(str, c, false, r); -} - -void SplitStringDontTrim(const std::string& str, - char c, - std::vector<std::string>* r) { - DCHECK(IsStringUTF8(str)); -#if CHAR_MIN < 0 - DCHECK(c >= 0); -#endif - DCHECK(c < 0x7F); - SplitStringT(str, c, false, r); -} - -template<typename STR> -void SplitStringAlongWhitespaceT(const STR& str, std::vector<STR>* result) { - result->clear(); - const size_t length = str.length(); - if (!length) - return; - - bool last_was_ws = false; - size_t last_non_ws_start = 0; - for (size_t i = 0; i < length; ++i) { - switch (str[i]) { - // HTML 5 defines whitespace as: space, tab, LF, line tab, FF, or CR. - case L' ': - case L'\t': - case L'\xA': - case L'\xB': - case L'\xC': - case L'\xD': - if (!last_was_ws) { - if (i > 0) { - result->push_back( - str.substr(last_non_ws_start, i - last_non_ws_start)); - } - last_was_ws = true; - } - break; - - default: // Not a space character. - if (last_was_ws) { - last_was_ws = false; - last_non_ws_start = i; - } - break; - } - } - if (!last_was_ws) { - result->push_back( - str.substr(last_non_ws_start, length - last_non_ws_start)); - } -} - -void SplitStringAlongWhitespace(const string16& str, - std::vector<string16>* result) { - SplitStringAlongWhitespaceT(str, result); -} - -void SplitStringAlongWhitespace(const std::string& str, - std::vector<std::string>* result) { - SplitStringAlongWhitespaceT(str, result); -} - -} // namespace base +} // namespace addressinput +} // namespace i18n diff --git a/cpp/src/util/string_split.h b/cpp/src/util/string_split.h index 83d1c76..6809296 100644 --- a/cpp/src/util/string_split.h +++ b/cpp/src/util/string_split.h @@ -1,83 +1,34 @@ // Copyright 2013 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. +// +// The original source code is from: +// http://src.chromium.org/viewvc/chrome/trunk/src/base/strings/string_split.h?revision=236210 +// +// Modifications from original: +// 1) Supports only std::string type. +// 2) Does not trim whitespace. -#ifndef BASE_STRINGS_STRING_SPLIT_H_ -#define BASE_STRINGS_STRING_SPLIT_H_ +#ifndef I18N_ADDRESSINPUT_UTIL_STRING_SPLIT_H_ +#define I18N_ADDRESSINPUT_UTIL_STRING_SPLIT_H_ #include <string> -#include <utility> #include <vector> -#include "base/base_export.h" -#include "base/strings/string16.h" - -namespace base { +namespace i18n { +namespace addressinput { // Splits |str| into a vector of strings delimited by |c|, placing the results // in |r|. If several instances of |c| are contiguous, or if |str| begins with // or ends with |c|, then an empty string is inserted. // -// Every substring is trimmed of any leading or trailing white space. -// NOTE: |c| must be in BMP (Basic Multilingual Plane) -BASE_EXPORT void SplitString(const string16& str, - char16 c, - std::vector<string16>* r); -// |str| should not be in a multi-byte encoding like Shift-JIS or GBK in which -// the trailing byte of a multi-byte character can be in the ASCII range. -// UTF-8, and other single/multi-byte ASCII-compatible encodings are OK. -// Note: |c| must be in the ASCII range. -BASE_EXPORT void SplitString(const std::string& str, - char c, - std::vector<std::string>* r); - -BASE_EXPORT bool SplitStringIntoKeyValues(const std::string& line, - char key_value_delimiter, - std::string* key, - std::vector<std::string>* values); - -typedef std::vector<std::pair<std::string, std::string> > StringPairs;; - -BASE_EXPORT bool SplitStringIntoKeyValuePairs( - const std::string& line, - char key_value_delimiter, - char key_value_pair_delimiter, - StringPairs* key_value_pairs); - -// The same as SplitString, but use a substring delimiter instead of a char. -BASE_EXPORT void SplitStringUsingSubstr(const string16& str, - const string16& s, - std::vector<string16>* r); -BASE_EXPORT void SplitStringUsingSubstr(const std::string& str, - const std::string& s, - std::vector<std::string>* r); - -// The same as SplitString, but don't trim white space. -// NOTE: |c| must be in BMP (Basic Multilingual Plane) -BASE_EXPORT void SplitStringDontTrim(const string16& str, - char16 c, - std::vector<string16>* r); // |str| should not be in a multi-byte encoding like Shift-JIS or GBK in which // the trailing byte of a multi-byte character can be in the ASCII range. // UTF-8, and other single/multi-byte ASCII-compatible encodings are OK. // Note: |c| must be in the ASCII range. -BASE_EXPORT void SplitStringDontTrim(const std::string& str, - char c, - std::vector<std::string>* r); - -// WARNING: this uses whitespace as defined by the HTML5 spec. If you need -// a function similar to this but want to trim all types of whitespace, then -// factor this out into a function that takes a string containing the characters -// that are treated as whitespace. -// -// Splits the string along whitespace (where whitespace is the five space -// characters defined by HTML 5). Each contiguous block of non-whitespace -// characters is added to result. -BASE_EXPORT void SplitStringAlongWhitespace(const string16& str, - std::vector<string16>* result); -BASE_EXPORT void SplitStringAlongWhitespace(const std::string& str, - std::vector<std::string>* result); +void SplitString(const std::string& str, char c, std::vector<std::string>* r); -} // namespace base +} // namespace addressinput +} // namespace i18n -#endif // BASE_STRINGS_STRING_SPLIT_H_ +#endif // I18N_ADDRESSINPUT_UTIL_STRING_SPLIT_H_ |