OpenTREP Logo  0.6.0
C++ Open Travel Request Parsing Library
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
QuerySlices.cpp
Go to the documentation of this file.
1 // //////////////////////////////////////////////////////////////////////
2 // Import section
3 // //////////////////////////////////////////////////////////////////////
4 // STL
5 #include <cassert>
6 #include <sstream>
7 #include <set>
8 // OpenTrep
12 #include <opentrep/bom/Filter.hpp>
15 
16 namespace OPENTREP {
17 
18  // //////////////////////////////////////////////////////////////////////
19  QuerySlices::QuerySlices (const Xapian::Database& iDatabase,
20  const TravelQuery_T& iQueryString)
21  : _database (iDatabase), _queryString (iQueryString) {
22  init();
23  }
24 
25  // //////////////////////////////////////////////////////////////////////
27  }
28 
29  // //////////////////////////////////////////////////////////////////////
30  void QuerySlices::push_back (const StringPartition& iStringPartition) {
31  if (iStringPartition.empty() == false) {
32  _slices.push_back (iStringPartition);
33  }
34  }
35 
36  // //////////////////////////////////////////////////////////////////////
37  size_t QuerySlices::size() const {
38  return _slices.size();
39  }
40 
41  // //////////////////////////////////////////////////////////////////////
42  bool QuerySlices::empty() const {
43  return _slices.empty();
44  }
45 
46  // //////////////////////////////////////////////////////////////////////
48  _slices.clear();
49  }
50 
51  // //////////////////////////////////////////////////////////////////////
52  std::string QuerySlices::describeKey() const {
53  std::ostringstream oStr;
54  oStr << "";
55  return oStr.str();
56  }
57 
58  // //////////////////////////////////////////////////////////////////////
59  std::string QuerySlices::describe() const {
60  std::ostringstream oStr;
61  oStr << describeKey();
62 
63  //
64  oStr << "[ ";
65 
66  short idx_sublist = 0;
67  for (StringPartitionList_T::const_iterator itSlice = _slices.begin();
68  itSlice != _slices.end(); ++itSlice, ++idx_sublist) {
69  //
70  if (idx_sublist != 0) {
71  oStr << "; ";
72  }
73 
74  //
75  const StringPartition& lStringPartition = *itSlice;
76 
77  //
78  oStr << idx_sublist << ". " << lStringPartition;
79  }
80 
81  //
82  oStr << " ]";
83 
84  return oStr.str();
85  }
86 
87  // //////////////////////////////////////////////////////////////////////
88  void QuerySlices::toStream (std::ostream& ioOut) const {
89  ioOut << describe();
90  }
91 
92  // //////////////////////////////////////////////////////////////////////
93  void QuerySlices::fromStream (std::istream& ioIn) {
94  }
95 
107  // //////////////////////////////////////////////////////////////////////
108  static unsigned int calculateEditDistance (const TravelQuery_T& iPhrase) {
109  NbOfErrors_T oEditDistance = 2;
110 
111  const NbOfErrors_T lQueryStringSize = iPhrase.size();
112 
113  oEditDistance = lQueryStringSize / K_DEFAULT_SIZE_FOR_SPELLING_ERROR_UNIT;
114  return oEditDistance;
115  }
116 
120  // //////////////////////////////////////////////////////////////////////
121  bool doesMatch (const Xapian::Database& iDatabase,
122  const std::string& iWord1, const std::string& iWord2) {
123  bool oDoesMatch = false;
124 
125  //
126  std::ostringstream oStr;
127  oStr << iWord1 << " " << iWord2;
128  const std::string lQueryString (oStr.str());
129 
130  // Catch any Xapian::Error exceptions thrown
131  Xapian::MSet lMatchingSet;
132  try {
133 
134  // Build the query object
135  Xapian::QueryParser lQueryParser;
136  lQueryParser.set_database (iDatabase);
137 
143  // lQueryParser.set_default_op (Xapian::Query::OP_ADJ);
144  lQueryParser.set_default_op (Xapian::Query::OP_PHRASE);
145 
146  // DEBUG
147  /*
148  OPENTREP_LOG_DEBUG ("Query parser `" << lQueryParser.get_description()
149  << "'");
150  */
151 
152  // DEBUG
153  // OPENTREP_LOG_DEBUG (" --------");
154 
155  // Start an enquire session
156  Xapian::Enquire enquire (iDatabase);
157 
164  const Xapian::Query& lXapianQuery =
165  lQueryParser.parse_query (lQueryString,
166  Xapian::QueryParser::FLAG_BOOLEAN
167  | Xapian::QueryParser::FLAG_PHRASE
168  | Xapian::QueryParser::FLAG_LOVEHATE);
169 
170  // Give the query object to the enquire session
171  enquire.set_query (lXapianQuery);
172 
173  // Get the top 20 results of the query
174  lMatchingSet = enquire.get_mset (0, 20);
175 
176  // Display the results
177  int nbMatches = lMatchingSet.size();
178 
179  // DEBUG
180  /*
181  OPENTREP_LOG_DEBUG (" Query string: `" << lQueryString
182  << "', i.e.: `" << lXapianQuery.get_description()
183  << "' => " << nbMatches << " result(s) found");
184  */
185 
186  if (nbMatches != 0) {
187  // There has been a matching
188  oDoesMatch = true;
189 
190  // DEBUG
191  /*
192  OPENTREP_LOG_DEBUG (" Query string: `" << lQueryString
193  << "' provides " << nbMatches << " exact matches.");
194  */
195 
196  return oDoesMatch;
197  }
198  assert (lMatchingSet.empty() == true);
199 
205  const NbOfErrors_T& lAllowableEditDistance =
206  calculateEditDistance (lQueryString);
207 
208  // Let Xapian find a spelling correction (if any)
209  const std::string& lCorrectedString =
210  iDatabase.get_spelling_suggestion (lQueryString, lAllowableEditDistance);
211 
212  // If the correction is no better than the original string, there is
213  // no need to go further: there is no match.
214  if (lCorrectedString.empty() == true || lCorrectedString == lQueryString) {
215  // DEBUG
216  /*
217  OPENTREP_LOG_DEBUG (" Query string: `"
218  << lQueryString << "' provides no match, "
219  << "and there is no spelling suggestion, "
220  << "even with an edit distance of "
221  << lAllowableEditDistance);
222  */
223 
224  // No match
225  return oDoesMatch;
226  }
227  assert (lCorrectedString.empty() == false
228  && lCorrectedString != lQueryString);
229 
230  // Calculate the effective (Levenshtein) edit distance/error
231  const NbOfErrors_T& lEditDistance =
232  Levenshtein::getDistance (lQueryString, lCorrectedString);
233 
241  const Xapian::Query& lCorrectedXapianQuery =
242  lQueryParser.parse_query (lCorrectedString,
243  Xapian::QueryParser::FLAG_BOOLEAN
244  | Xapian::QueryParser::FLAG_PHRASE
245  | Xapian::QueryParser::FLAG_LOVEHATE);
246 
247  enquire.set_query (lCorrectedXapianQuery);
248  lMatchingSet = enquire.get_mset (0, 20);
249 
250  // Display the results
251  nbMatches = lMatchingSet.size();
252 
253  // DEBUG
254  /*
255  OPENTREP_LOG_DEBUG (" Corrected query string: `" << lCorrectedString
256  << "', i.e.: `"
257  << lCorrectedXapianQuery.get_description()
258  << "' => " << nbMatches << " result(s) found");
259  */
260 
261  if (nbMatches != 0) {
262  // DEBUG
263  /*
264  OPENTREP_LOG_DEBUG (" Query string: `"
265  << lQueryString << "', spelling suggestion: `"
266  << lCorrectedString
267  << "', with a Levenshtein edit distance of "
268  << lEditDistance
269  << " over an allowable edit distance of "
270  << lAllowableEditDistance << ", provides "
271  << nbMatches << " matches.");
272  */
273 
274  //
275  oDoesMatch = true;
276  return oDoesMatch;
277  }
278 
279  // Error
280  OPENTREP_LOG_ERROR (" Query string: `"
281  << lQueryString << "', spelling suggestion: `"
282  << lCorrectedString
283  << "', with a Levenshtein edit distance of "
284  << lEditDistance
285  << " over an allowable edit distance of "
286  << lAllowableEditDistance << ", provides no match, "
287  << "which is not consistent with the existence of "
288  << "the spelling correction.");
289  assert (false);
290 
291  } catch (const Xapian::Error& error) {
292  // Error
293  OPENTREP_LOG_ERROR ("Exception: " << error.get_msg());
294  throw XapianException (error.get_msg());
295  }
296 
297  return oDoesMatch;
298  }
299 
300  // //////////////////////////////////////////////////////////////////////
301  void QuerySlices::init() {
302  // 0. Initialisation
303  // 0.1. Initialisation of the tokenizer
304  WordList_T lWordList;
306  const unsigned short nbOfWords = lWordList.size();
307 
308  // When the query has a single word, stop here, as there is a single slice
309  if (nbOfWords <= 1) {
310  _slices.push_back (_queryString);
311  return;
312  }
313 
314  // 0.2. Re-create the initial phrase, without any (potential) seperator
315  const std::string lPhrase = createStringFromWordList (lWordList);
316 
317  // 1. Browse the words, two by two, and check whether their association
318  // matches with the Xapian index
319  WordList_T::const_iterator itWord = lWordList.begin();
320  WordList_T::const_iterator itNextWord = lWordList.begin(); ++itNextWord;
321  for (unsigned short idx = 1, idx_rel = 1; itNextWord != lWordList.end();
322  ++itWord, ++itNextWord, ++idx, ++idx_rel) {
323  const std::string& leftWord = *itWord;
324  const std::string& rightWord = *itNextWord;
325 
326  // Store the left word in the staging string
327  if (idx_rel >= 2) {
328  _itLeftWords += " ";
329  }
330  _itLeftWords += leftWord;
331 
332  // Check whether the juxtaposition of the two contiguous words matches
333  const bool lDoesMatch =
334  OPENTREP::doesMatch (_database, leftWord, rightWord);
335 
336  if (lDoesMatch == true) {
337  // When the two words give a match, do nothing now, as at the next turn,
338  // the right word will become the left word and thus be added to the
339  // staging string
340 
341  // DEBUG
342  /*
343  OPENTREP_LOG_DEBUG ("[" << idx << "][" << idx_rel
344  << "] Match - staging string: '"
345  << _itLeftWords << "'");
346  */
347 
348  } else {
349  // DEBUG
350  /*
351  OPENTREP_LOG_DEBUG ("[" << idx << "][" << idx_rel
352  << "] No match - staging string: '"
353  << _itLeftWords << "'");
354  */
355 
356  // When the two words give no match, add the content of the staging
357  // list to the list of slices. Then, empty the staging string.
358  _slices.push_back (_itLeftWords);
359  _itLeftWords = "";
360  idx_rel = 0;
361  }
362  }
363 
364  // 2.
365  const std::string& leftWord = *itWord;
366  if (_itLeftWords.empty() == false) {
367  _itLeftWords += " ";
368  }
369  _itLeftWords += leftWord;
370  _slices.push_back (_itLeftWords);
371 
372  // DEBUG
373  // OPENTREP_LOG_DEBUG ("Last staging string: '" << _itLeftWords << "'");
374  // OPENTREP_LOG_DEBUG ("Slices: " << *this);
375  }
376 
377 }
TravelQuery_T _queryString
std::vector< std::string > WordList_T
const NbOfErrors_T K_DEFAULT_SIZE_FOR_SPELLING_ERROR_UNIT
#define OPENTREP_LOG_ERROR(iToBeLogged)
Definition: Logger.hpp:23
std::string _itLeftWords
static int getDistance(const std::string &iSource, const std::string &iTarget)
Definition: Levenshtein.cpp:13
std::string createStringFromWordList(const WordList_T &iWordList, const unsigned short iSplitIdx, const bool iFromBeginningFlag)
Definition: Utilities.cpp:38
void push_back(const StringPartition &iStringPartition)
Definition: QuerySlices.cpp:30
size_t size() const
Definition: QuerySlices.cpp:37
std::string describeKey() const
Definition: QuerySlices.cpp:52
bool empty() const
Definition: QuerySlices.cpp:42
unsigned short NbOfErrors_T
std::string describe() const
Definition: QuerySlices.cpp:59
StringPartitionList_T _slices
void tokeniseStringIntoWordList(const std::string &iPhrase, WordList_T &ioWordList)
Definition: Utilities.cpp:16
const Xapian::Database & _database
QuerySlices(const Xapian::Database &, const TravelQuery_T &)
Definition: QuerySlices.cpp:19
std::string TravelQuery_T
void fromStream(std::istream &ioIn)
Definition: QuerySlices.cpp:93
bool doesMatch(const Xapian::Database &iDatabase, const std::string &iWord1, const std::string &iWord2)
Helper function to query for a Xapian-based full text match.
void toStream(std::ostream &ioOut) const
Definition: QuerySlices.cpp:88
static unsigned int calculateEditDistance(const TravelQuery_T &iPhrase)
Helper function.