18 : _normaliser (NULL), _unquoter (NULL), _unpunctuater (NULL),
25 : _normaliser (NULL), _unquoter (NULL), _unpunctuater (NULL),
27 assert (iTransliterator._normaliser != NULL);
28 _normaliser = iTransliterator._normaliser->clone();
30 assert (iTransliterator._unquoter != NULL);
31 _unquoter = iTransliterator._unquoter->clone();
33 assert (iTransliterator._unpunctuater != NULL);
34 _unpunctuater = iTransliterator._unpunctuater->clone();
36 assert (iTransliterator._tranlist != NULL);
37 _tranlist = iTransliterator._tranlist->clone();
47 void OTransliterator::init() {
49 UErrorCode lStatus = U_ZERO_ERROR;
55 if (_normaliser == NULL || U_FAILURE (lStatus)) {
56 std::ostringstream oStr;
57 oStr <<
"Unicode error: no Transliterator can be created for the '"
62 assert (_normaliser != NULL);
65 Transliterator::registerInstance (_normaliser);
68 lStatus = U_ZERO_ERROR;
71 _unquoter = Transliterator::createFromRules(
"RBTUnaccent", lUnquotedRules,
72 UTRANS_FORWARD, pError, lStatus);
74 if (_unquoter == NULL || U_FAILURE (lStatus)) {
75 std::ostringstream oStr;
76 oStr <<
"Unicode error: no Transliterator can be created for the '"
79 throw UnicodeTransliteratorCreationException (oStr.str());
81 assert (_unquoter != NULL);
84 Transliterator::registerInstance (_unquoter);
87 lStatus = U_ZERO_ERROR;
91 UTRANS_FORWARD, lStatus);
93 if (_unpunctuater == NULL || U_FAILURE (lStatus)) {
94 std::ostringstream oStr;
95 oStr <<
"Unicode error: no Transliterator can be created for the '"
98 throw UnicodeTransliteratorCreationException (oStr.str());
100 assert (_unpunctuater != NULL);
103 Transliterator::registerInstance (_unpunctuater);
106 lStatus = U_ZERO_ERROR;
110 UTRANS_FORWARD, lStatus);
112 if (_tranlist == NULL || U_FAILURE (lStatus)) {
113 std::ostringstream oStr;
114 oStr <<
"Unicode error: no Transliterator can be created for the '"
117 throw UnicodeTransliteratorCreationException (oStr.str());
119 assert (_tranlist != NULL);
122 Transliterator::registerInstance (_tranlist);
126 void OTransliterator::finalise() {
127 delete _normaliser; _normaliser = NULL;
128 delete _unquoter; _unquoter = NULL;
129 delete _unpunctuater; _unpunctuater = NULL;
130 delete _tranlist; _tranlist = NULL;
135 assert (_normaliser != NULL);
138 UnicodeString lString (iString.c_str());
141 _normaliser->transliterate (lString);
142 _unquoter->transliterate (lString);
143 _unpunctuater->transliterate (lString);
144 _tranlist->transliterate (lString);
147 const std::string& lNormalisedString =
getUTF8 (lString);
149 return lNormalisedString;
#define OPENTREP_LOG_ERROR(iToBeLogged)
const char * K_ICU_PUNCTUATION_REMOVAL_RULE
const char * K_ICU_GENERIC_TRANSLITERATOR_RULE
const char * K_ICU_QUOTATION_REMOVAL_RULE
std::string getUTF8(const UnicodeString &iString)
const char * K_ICU_ACCENT_REMOVAL_RULE
std::string normalise(const std::string &iString) const