OpenTREP Logo  0.6.0
C++ Open Travel Request Parsing Library
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
OTransliterator.cpp
Go to the documentation of this file.
1 // //////////////////////////////////////////////////////////////////////
2 // Import section
3 // //////////////////////////////////////////////////////////////////////
4 // STL
5 #include <cassert>
6 #include <sstream>
7 // OpenTrep
13 
14 namespace OPENTREP {
15 
16  // //////////////////////////////////////////////////////////////////////
18  : _normaliser (NULL), _unquoter (NULL), _unpunctuater (NULL),
19  _tranlist (NULL) {
20  init();
21  }
22 
23  // //////////////////////////////////////////////////////////////////////
25  : _normaliser (NULL), _unquoter (NULL), _unpunctuater (NULL),
26  _tranlist (NULL) {
27  assert (iTransliterator._normaliser != NULL);
28  _normaliser = iTransliterator._normaliser->clone();
29 
30  assert (iTransliterator._unquoter != NULL);
31  _unquoter = iTransliterator._unquoter->clone();
32 
33  assert (iTransliterator._unpunctuater != NULL);
34  _unpunctuater = iTransliterator._unpunctuater->clone();
35 
36  assert (iTransliterator._tranlist != NULL);
37  _tranlist = iTransliterator._tranlist->clone();
38 
39  }
40 
41  // //////////////////////////////////////////////////////////////////////
43  finalise();
44  }
45 
46  // //////////////////////////////////////////////////////////////////////
47  void OTransliterator::init() {
48  // Create a normaliser
49  UErrorCode lStatus = U_ZERO_ERROR;
50 
51  _normaliser =
52  Transliterator::createInstance (K_ICU_ACCENT_REMOVAL_RULE, UTRANS_FORWARD,
53  lStatus);
54 
55  if (_normaliser == NULL || U_FAILURE (lStatus)) {
56  std::ostringstream oStr;
57  oStr << "Unicode error: no Transliterator can be created for the '"
58  << K_ICU_ACCENT_REMOVAL_RULE << "' rule.";
59  OPENTREP_LOG_ERROR (oStr.str());
60  throw UnicodeTransliteratorCreationException (oStr.str());
61  }
62  assert (_normaliser != NULL);
63 
64  // Register the Unicode Transliterator
65  Transliterator::registerInstance (_normaliser);
66 
67  // Create a remover of quotation
68  lStatus = U_ZERO_ERROR;
69  UParseError pError;
70  UnicodeString lUnquotedRules (K_ICU_QUOTATION_REMOVAL_RULE);
71  _unquoter = Transliterator::createFromRules("RBTUnaccent", lUnquotedRules,
72  UTRANS_FORWARD, pError, lStatus);
73 
74  if (_unquoter == NULL || U_FAILURE (lStatus)) {
75  std::ostringstream oStr;
76  oStr << "Unicode error: no Transliterator can be created for the '"
77  << K_ICU_QUOTATION_REMOVAL_RULE << "' rule.";
78  OPENTREP_LOG_ERROR (oStr.str());
79  throw UnicodeTransliteratorCreationException (oStr.str());
80  }
81  assert (_unquoter != NULL);
82 
83  // Register the Unicode Transliterator
84  Transliterator::registerInstance (_unquoter);
85 
86  // Create a remover of punctuation
87  lStatus = U_ZERO_ERROR;
88 
89  _unpunctuater =
90  Transliterator::createInstance (K_ICU_PUNCTUATION_REMOVAL_RULE,
91  UTRANS_FORWARD, lStatus);
92 
93  if (_unpunctuater == NULL || U_FAILURE (lStatus)) {
94  std::ostringstream oStr;
95  oStr << "Unicode error: no Transliterator can be created for the '"
96  << K_ICU_PUNCTUATION_REMOVAL_RULE << "' rule.";
97  OPENTREP_LOG_ERROR (oStr.str());
98  throw UnicodeTransliteratorCreationException (oStr.str());
99  }
100  assert (_unpunctuater != NULL);
101 
102  // Register the Unicode Transliterator
103  Transliterator::registerInstance (_unpunctuater);
104 
105  // Create a generic transliterator
106  lStatus = U_ZERO_ERROR;
107 
108  _tranlist =
109  Transliterator::createInstance (K_ICU_GENERIC_TRANSLITERATOR_RULE,
110  UTRANS_FORWARD, lStatus);
111 
112  if (_tranlist == NULL || U_FAILURE (lStatus)) {
113  std::ostringstream oStr;
114  oStr << "Unicode error: no Transliterator can be created for the '"
115  << K_ICU_GENERIC_TRANSLITERATOR_RULE << "' rule.";
116  OPENTREP_LOG_ERROR (oStr.str());
117  throw UnicodeTransliteratorCreationException (oStr.str());
118  }
119  assert (_tranlist != NULL);
120 
121  // Register the Unicode Transliterator
122  Transliterator::registerInstance (_tranlist);
123  }
124 
125  // //////////////////////////////////////////////////////////////////////
126  void OTransliterator::finalise() {
127  delete _normaliser; _normaliser = NULL;
128  delete _unquoter; _unquoter = NULL;
129  delete _unpunctuater; _unpunctuater = NULL;
130  delete _tranlist; _tranlist = NULL;
131  }
132 
133  // //////////////////////////////////////////////////////////////////////
134  std::string OTransliterator::normalise (const std::string& iString) const {
135  assert (_normaliser != NULL);
136 
137  // Build a UnicodeString from the STL string
138  UnicodeString lString (iString.c_str());
139 
140  // Apply the whole sery of transformators
141  _normaliser->transliterate (lString);
142  _unquoter->transliterate (lString);
143  _unpunctuater->transliterate (lString);
144  _tranlist->transliterate (lString);
145 
146  // Convert from UnicodeString to UTF8-encoded STL string
147  const std::string& lNormalisedString = getUTF8 (lString);
148 
149  return lNormalisedString;
150  }
151 
152 }
#define OPENTREP_LOG_ERROR(iToBeLogged)
Definition: Logger.hpp:23
const char * K_ICU_PUNCTUATION_REMOVAL_RULE
Definition: BasConst.cpp:67
const char * K_ICU_GENERIC_TRANSLITERATOR_RULE
Definition: BasConst.cpp:73
const char * K_ICU_QUOTATION_REMOVAL_RULE
Definition: BasConst.cpp:60
std::string getUTF8(const UnicodeString &iString)
Definition: icu_util.cpp:65
const char * K_ICU_ACCENT_REMOVAL_RULE
Definition: BasConst.cpp:54
std::string normalise(const std::string &iString) const