From d0aa98ef7d243177e09d579d00f11f323dc3e7a2 Mon Sep 17 00:00:00 2001 From: fonsinchen Date: Sat, 1 Mar 2014 14:14:41 +0000 Subject: [PATCH] (svn r26384) -Fix [FS#5924]: Return correct values from ICU iterators in case of leading or trailing whitespace --- src/string.cpp | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/src/string.cpp b/src/string.cpp index d33356e6e4..2a96921e47 100644 --- a/src/string.cpp +++ b/src/string.cpp @@ -752,7 +752,14 @@ public: /* The ICU word iterator considers both the start and the end of a word a valid * break point, but we only want word starts. Move to the next location in * case the new position points to whitespace. */ - while (pos != icu::BreakIterator::DONE && IsWhitespace(Utf16DecodeChar((const uint16 *)&this->utf16_str[pos]))) pos = this->word_itr->next(); + while (pos != icu::BreakIterator::DONE && + IsWhitespace(Utf16DecodeChar((const uint16 *)&this->utf16_str[pos]))) { + int32_t new_pos = this->word_itr->next(); + /* Don't set it to DONE if it was valid before. Otherwise we'll return END + * even though the iterator wasn't at the end of the string before. */ + if (new_pos == icu::BreakIterator::DONE) break; + pos = new_pos; + } this->char_itr->isBoundary(pos); break; @@ -777,7 +784,14 @@ public: /* The ICU word iterator considers both the start and the end of a word a valid * break point, but we only want word starts. Move to the previous location in * case the new position points to whitespace. */ - while (pos != icu::BreakIterator::DONE && IsWhitespace(Utf16DecodeChar((const uint16 *)&this->utf16_str[pos]))) pos = this->word_itr->previous(); + while (pos != icu::BreakIterator::DONE && + IsWhitespace(Utf16DecodeChar((const uint16 *)&this->utf16_str[pos]))) { + int32_t new_pos = this->word_itr->previous(); + /* Don't set it to DONE if it was valid before. Otherwise we'll return END + * even though the iterator wasn't at the start of the string before. */ + if (new_pos == icu::BreakIterator::DONE) break; + pos = new_pos; + } this->char_itr->isBoundary(pos); break;