diff options
Diffstat (limited to 'plugins/com.ionic.keyboard/src/blackberry10/native/public/tokenizer.cpp')
| -rw-r--r-- | plugins/com.ionic.keyboard/src/blackberry10/native/public/tokenizer.cpp | 222 |
1 files changed, 222 insertions, 0 deletions
diff --git a/plugins/com.ionic.keyboard/src/blackberry10/native/public/tokenizer.cpp b/plugins/com.ionic.keyboard/src/blackberry10/native/public/tokenizer.cpp new file mode 100644 index 00000000..4a39573b --- /dev/null +++ b/plugins/com.ionic.keyboard/src/blackberry10/native/public/tokenizer.cpp @@ -0,0 +1,222 @@ +/************************************************************************ +The zlib/libpng License + +Copyright (c) 2006 Joerg Wiedenmann + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from +the use of this software. + +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it +freely, subject to the following restrictions: + +1. The origin of this software must not be misrepresented; +you must not claim that you wrote the original software. +If you use this software in a product, an acknowledgment +in the product documentation would be appreciated but is +not required. + +2. Altered source versions must be plainly marked as such, +and must not be misrepresented as being the original software. + +3. This notice may not be removed or altered from any source distribution. + +***********************************************************************/ + +/******************************************************************** + created: 2006-01-28 + filename: tokenizer.cpp + author: Jörg Wiedenmann + + purpose: A tokenizer function which provides a very + customizable way of breaking up strings. + + history: 2006-01-28, Original version + 2006-03-04, Fixed a small parsing bug, thanks Elias. +*********************************************************************/ + +#include "tokenizer.h" + +using namespace std; + +void tokenize ( const string& str, vector<string>& result, + const string& delimiters, const string& delimiters_preserve, + const string& quote, const string& esc ) +{ + // clear the vector + if ( false == result.empty() ) + { + result.clear(); + } + + string::size_type pos = 0; // the current position (char) in the string + char ch = 0; // buffer for the current character + char delimiter = 0; // the buffer for the delimiter char which + // will be added to the tokens if the delimiter + // is preserved + char current_quote = 0; // the char of the current open quote + bool quoted = false; // indicator if there is an open quote + string token; // string buffer for the token + bool token_complete = false; // indicates if the current token is + // read to be added to the result vector + string::size_type len = str.length(); // length of the input-string + + // for every char in the input-string + while ( len > pos ) + { + // get the character of the string and reset the delimiter buffer + ch = str.at(pos); + delimiter = 0; + + // assume ch isn't a delimiter + bool add_char = true; + + // check ... + + // ... if the delimiter is an escaped character + bool escaped = false; // indicates if the next char is protected + if ( false == esc.empty() ) // check if esc-chars are provided + { + if ( string::npos != esc.find_first_of(ch) ) + { + // get the escaped char + ++pos; + if ( pos < len ) // if there are more chars left + { + // get the next one + ch = str.at(pos); + + // add the escaped character to the token + add_char = true; + } + else // cannot get any more characters + { + // don't add the esc-char + add_char = false; + } + + // ignore the remaining delimiter checks + escaped = true; + } + } + + // ... if the delimiter is a quote + if ( false == quote.empty() && false == escaped ) + { + // if quote chars are provided and the char isn't protected + if ( string::npos != quote.find_first_of(ch) ) + { + // if not quoted, set state to open quote and set + // the quote character + if ( false == quoted ) + { + quoted = true; + current_quote = ch; + + // don't add the quote-char to the token + add_char = false; + } + else // if quote is open already + { + // check if it is the matching character to close it + if ( current_quote == ch ) + { + // close quote and reset the quote character + quoted = false; + current_quote = 0; + + // don't add the quote-char to the token + add_char = false; + } + } // else + } + } + + // ... if the delimiter isn't preserved + if ( false == delimiters.empty() && false == escaped && + false == quoted ) + { + // if a delimiter is provided and the char isn't protected by + // quote or escape char + if ( string::npos != delimiters.find_first_of(ch) ) + { + // if ch is a delimiter and the token string isn't empty + // the token is complete + if ( false == token.empty() ) // BUGFIX: 2006-03-04 + { + token_complete = true; + } + + // don't add the delimiter to the token + add_char = false; + } + } + + // ... if the delimiter is preserved - add it as a token + bool add_delimiter = false; + if ( false == delimiters_preserve.empty() && false == escaped && + false == quoted ) + { + // if a delimiter which will be preserved is provided and the + // char isn't protected by quote or escape char + if ( string::npos != delimiters_preserve.find_first_of(ch) ) + { + // if ch is a delimiter and the token string isn't empty + // the token is complete + if ( false == token.empty() ) // BUGFIX: 2006-03-04 + { + token_complete = true; + } + + // don't add the delimiter to the token + add_char = false; + + // add the delimiter + delimiter = ch; + add_delimiter = true; + } + } + + + // add the character to the token + if ( true == add_char ) + { + // add the current char + token.push_back( ch ); + } + + // add the token if it is complete + if ( true == token_complete && false == token.empty() ) + { + // add the token string + result.push_back( token ); + + // clear the contents + token.clear(); + + // build the next token + token_complete = false; + } + + // add the delimiter + if ( true == add_delimiter ) + { + // the next token is the delimiter + string delim_token; + delim_token.push_back( delimiter ); + result.push_back( delim_token ); + + // REMOVED: 2006-03-04, Bugfix + } + + // repeat for the next character + ++pos; + } // while + + // add the final token + if ( false == token.empty() ) + { + result.push_back( token ); + } +} |
