OpenTREP Logo  0.6.0
C++ Open Travel Request Parsing Library
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
WordCombinationHolder.cpp
Go to the documentation of this file.
1 // //////////////////////////////////////////////////////////////////////
2 // Import section
3 // //////////////////////////////////////////////////////////////////////
4 // STL
5 #include <cassert>
6 #include <sstream>
7 #include <set>
8 // OpenTrep
10 #include <opentrep/bom/Filter.hpp>
14 
15 namespace OPENTREP {
16 
17  // //////////////////////////////////////////////////////////////////////
18  WordCombinationHolder::WordCombinationHolder (const std::string& iString) {
19  init (iString);
20  }
21 
22  // //////////////////////////////////////////////////////////////////////
24  }
25 
26  // //////////////////////////////////////////////////////////////////////
27  void WordCombinationHolder::push_back (const std::string& iString) {
28  _list.push_back (iString);
29  }
30 
31  // //////////////////////////////////////////////////////////////////////
32  size_t WordCombinationHolder::size() const {
33  return _list.size();
34  }
35 
36  // //////////////////////////////////////////////////////////////////////
38  _list.clear();
39  }
40 
41  // //////////////////////////////////////////////////////////////////////
42  std::string WordCombinationHolder::describeKey() const {
43  std::ostringstream oStr;
44  oStr << "";
45  return oStr.str();
46  }
47 
48  // //////////////////////////////////////////////////////////////////////
49  std::string WordCombinationHolder::describe() const {
50  std::ostringstream oStr;
51  oStr << describeKey();
52 
53  //
54  oStr << "{";
55 
56  short idx_sublist = 0;
57  for (StringList_T::const_iterator itWordCombination = _list.begin();
58  itWordCombination != _list.end(); ++itWordCombination, ++idx_sublist) {
59  //
60  if (idx_sublist != 0) {
61  oStr << ", ";
62  }
63 
64  //
65  const StringSet& lStringSet = *itWordCombination;
66 
67  //
68  oStr << lStringSet;
69  }
70 
71  //
72  oStr << " }";
73 
74  return oStr.str();
75  }
76 
77  // //////////////////////////////////////////////////////////////////////
78  void WordCombinationHolder::toStream (std::ostream& ioOut) const {
79  ioOut << describe();
80  }
81 
82  // //////////////////////////////////////////////////////////////////////
83  void WordCombinationHolder::fromStream (std::istream& ioIn) {
84  }
85 
86  // //////////////////////////////////////////////////////////////////////
87  void WordCombinationHolder::init (const std::string& iPhrase) {
88  // Set of unique strings
89  typedef std::set<std::string> StringSet_T;
90  StringSet_T lStringSet;
91 
92  // 1. Derive all the partitions of the initial (full) string
93  const StringPartition lStringPartitionHolder (iPhrase);
94  const StringPartition::StringPartition_T& lStringPartition =
95  lStringPartitionHolder._partition;
96 
97  // 2.1. For every word combination, add it if not already in the
98  // list (STL set) of strings.
99  for (StringPartition::StringPartition_T::const_iterator itSet =
100  lStringPartition.begin(); itSet != lStringPartition.end(); ++itSet) {
101  const StringSet& itStringList = *itSet;
102 
103  const StringList_T& lStringList = itStringList._set;
104  for (StringList_T::const_iterator itWordCombination = lStringList.begin();
105  itWordCombination != lStringList.end(); ++itWordCombination) {
106  const std::string& lWordCombination = *itWordCombination;
107 
108  // Check whether the (remaining) word combination should be filtered out
109  //const bool isToBeAdded= Filter::shouldKeep (iPhrase, lWordCombination);
110  const bool isToBeAdded = true;
111  if (isToBeAdded == true) {
112  lStringSet.insert (lWordCombination);
113  }
114  }
115  }
116 
117  // 2.2. Convert the STL set into a STL list
118  for (StringSet_T::const_iterator itWordCombination = lStringSet.begin();
119  itWordCombination != lStringSet.end(); ++itWordCombination) {
120  const std::string& lWordCombination = *itWordCombination;
121 
122  // Add that word combination in the list for indexation by Xapian.
123  // Note that if that word combination is already present in the list,
124  // it will not be added a second time (thanks to the STL list design).
125  _list.push_back (lWordCombination);
126  }
127 
128  // 3. Add the word combinations, made by removing all the possible groups
129  // of continuous words inbetween the two extreme words (from left- and
130  // right-hand sides).
131  // 3.0. Initialisation of the list of words, made of all the words of the
132  // given string.
133  WordList_T lWordList;
134  tokeniseStringIntoWordList (iPhrase, lWordList);
135  const short nbOfWords = lWordList.size();
136 
137  // 3.1. If the string contains no more than two words, the job is finished.
138  if (nbOfWords <= 2) {
139  return;
140  }
141 
142  // 3.2. Iteration on the number of words to remove in the middle of the
143  // string, from 1 to (nbOfWords - 2)
144  for (short mdl_string_len = 1; mdl_string_len != nbOfWords-1;
145  ++mdl_string_len) {
146 
147  // 3.2. Iteration on all the middle words of the given string,
148  // from 1 to (nbOfWords - mdl_string_len)
149  for (short idx_word=1; idx_word != nbOfWords-mdl_string_len; ++idx_word) {
150  // 3.2.1. Copy the first idx_word word(s)
151  const std::string& lLeftHandString =
152  createStringFromWordList (lWordList, idx_word);
153 
154  // 3.2.2. Copy the last (nbOfWords - (idx_word + mdl_string_len)) words
155  const std::string& lRightHandString =
156  createStringFromWordList (lWordList,
157  idx_word + mdl_string_len,
158  false);
159 
160  // 3.2.3. Concatenate both sub-strings
161  std::ostringstream lConcatenatedStr;
162  lConcatenatedStr << lLeftHandString << " " << lRightHandString;
163  const std::string& lConcatenatedString = lConcatenatedStr.str();
164 
165  // 3.2.4. Add the concatenated string into the list, if not filtered out
166  // const bool isToBeAdded =
167  // Filter::shouldKeep (iPhrase, lConcatenatedString);
168  const bool isToBeAdded = true;
169  if (isToBeAdded == true) {
170  _list.push_back (lConcatenatedString);
171  }
172  }
173  }
174  }
175 
176 }
std::vector< std::string > WordList_T
StringSet_T _set
Definition: StringSet.hpp:118
std::string createStringFromWordList(const WordList_T &iWordList, const unsigned short iSplitIdx, const bool iFromBeginningFlag)
Definition: Utilities.cpp:38
std::list< StringSet > StringPartition_T
void push_back(const std::string &)
std::list< std::string > StringList_T
void tokeniseStringIntoWordList(const std::string &iPhrase, WordList_T &ioWordList)
Definition: Utilities.cpp:16
Class holding a set of strings, e.g., {"rio", "de", "janeiro"}.
Definition: StringSet.hpp:19
void toStream(std::ostream &) const