OpenTREP Logo  0.6.0
C++ Open Travel Request Parsing Library
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Result.cpp
Go to the documentation of this file.
1 // //////////////////////////////////////////////////////////////////////
2 // Import section
3 // //////////////////////////////////////////////////////////////////////
4 // STL
5 #include <cassert>
6 #include <sstream>
7 #include <algorithm>
8 // Boost
9 #include <boost/tokenizer.hpp>
10 // OpenTREP
11 #include <opentrep/LocationKey.hpp>
13 #include <opentrep/bom/Filter.hpp>
17 #include <opentrep/bom/Place.hpp>
18 #include <opentrep/bom/Result.hpp>
21 
22 namespace OPENTREP {
23 
24  // //////////////////////////////////////////////////////////////////////
25  Result::Result (const TravelQuery_T& iQueryString,
26  const Xapian::Database& iDatabase)
27  : _resultHolder (NULL), _database (iDatabase),
28  _queryString (iQueryString), _hasFullTextMatched (false),
29  _bestDocData (RawDataString_T ("")) {
30  init();
31  }
32 
33  // //////////////////////////////////////////////////////////////////////
34  Result::~Result() {
35  }
36 
37  // //////////////////////////////////////////////////////////////////////
38  void Result::init() {
39  }
40 
41  // //////////////////////////////////////////////////////////////////////
42  std::string Result::describeShortKey() const {
43  std::ostringstream oStr;
44  oStr << _queryString;
45  return oStr.str();
46  }
47 
48  // //////////////////////////////////////////////////////////////////////
49  std::string Result::describeKey() const {
50  std::ostringstream oStr;
51  oStr << "'" << describeShortKey() << "' ";
52  if (_correctedQueryString.empty() == false
53  && _correctedQueryString != _queryString) {
54  oStr << "(corrected into '" << _correctedQueryString
55  << "' with an edit distance/error of " << _editDistance
56  << " over an allowable distance of " << _allowableEditDistance
57  << ") - ";
58  } else {
59  oStr << "- ";
60  }
61  return oStr.str();
62  }
63 
64  // //////////////////////////////////////////////////////////////////////
65  std::string Result::toString() const {
66  std::ostringstream oStr;
67  oStr << describeKey();
68 
69  if (_documentList.empty() == true) {
70  oStr << "No match" << std::endl;
71  return oStr.str();
72  }
73  assert (_hasFullTextMatched == true);
74 
75  unsigned short idx = 0;
76  for (DocumentList_T::const_iterator itDoc = _documentList.begin();
77  itDoc != _documentList.end(); ++itDoc, ++idx) {
78  const XapianDocumentPair_T& lDocumentPair = *itDoc;
79 
80  const Xapian::Document& lXapianDoc = lDocumentPair.first;
81  const Xapian::docid& lDocID = lXapianDoc.get_docid();
82 
83  const ScoreBoard& lScoreBoard = lDocumentPair.second;
84 
85  if (idx != 0) {
86  oStr << ", ";
87  }
88  oStr << "Doc ID: " << lDocID << ", matching with ("
89  << lScoreBoard.describe() << "), containing: '"
90  << lXapianDoc.get_data() << "'";
91  }
92 
93  return oStr.str();
94  }
95 
96  // //////////////////////////////////////////////////////////////////////
97  void Result::toStream (std::ostream& ioOut) const {
98  ioOut << toString();
99  }
100 
101  // //////////////////////////////////////////////////////////////////////
102  void Result::fromStream (std::istream& ioIn) {
103  }
104 
105  // //////////////////////////////////////////////////////////////////////
107  getDocumentPair (const Xapian::docid& iDocID) const {
108  // Retrieve the Xapian document and associated ScoreBoard structure
109  // corresponding to the doc ID of the best matching document
110  DocumentMap_T::const_iterator itDoc = _documentMap.find (iDocID);
111 
112  if (itDoc == _documentMap.end()) {
113  OPENTREP_LOG_ERROR ("The Xapian document (ID = " << iDocID
114  << ") can not be found in the Result object "
115  << describeKey());
116  }
117  assert (itDoc != _documentMap.end());
118 
119  //
120  const XapianDocumentPair_T& oDocumentPair = itDoc->second;
121 
122  //
123  return oDocumentPair;
124  }
125 
126  // //////////////////////////////////////////////////////////////////////
127  const Xapian::Document& Result::
128  getDocument (const Xapian::docid& iDocID) const {
129  // First, retrieve the pair made of Xapian document and associated
130  // ScoreBoard structure
131  const XapianDocumentPair_T& lDocumentPair = getDocumentPair (iDocID);
132 
133  // Then, take the Xapian document (and leave the ScoreBoard out)
134  const Xapian::Document& oXapianDocument = lDocumentPair.first;
135 
136  //
137  return oXapianDocument;
138  }
139 
140  // //////////////////////////////////////////////////////////////////////
141  void Result::addDocument (const Xapian::Document& iDocument,
142  const Score_T& iScore) {
151  Score_T lCorrectedScore = iScore;
152  if (_editDistance > 0) {
153  lCorrectedScore = iScore / (_editDistance * _editDistance * _editDistance);
154  }
155 
156  // The document is created at the time of (Xapian-based) full-text matching
157  const ScoreType lXapianScoreType (ScoreType::XAPIAN_PCT);
158 
159  // Create a ScoreBoard structure
160  const ScoreBoard lScoreBoard (_queryString,
161  lXapianScoreType, lCorrectedScore);
162 
163  // Retrieve the ID of the Xapian document
164  const Xapian::docid& lDocID = iDocument.get_docid();
165 
175  // Create a (Xapian document, score board) pair, so as to store
176  // the document along with its corresponding score board
177  const XapianDocumentPair_T lDocumentPair (iDocument, lScoreBoard);
178 
179  // Insert the just created pair into the dedicated (STL) list
180  _documentList.push_back (lDocumentPair);
181 
182  // Insert the just created pair into the dedicated (STL) map
183  const bool hasInsertBeenSuccessful =
184  _documentMap.insert (DocumentMap_T::value_type (lDocID,
185  lDocumentPair)).second;
186  // Sanity check
187  assert (hasInsertBeenSuccessful == true);
188  }
189 
190  // //////////////////////////////////////////////////////////////////////
191  void Result::fillResult (const Xapian::MSet& iMatchingSet) {
196  for (Xapian::MSetIterator itDoc = iMatchingSet.begin();
197  itDoc != iMatchingSet.end(); ++itDoc) {
198  const Xapian::percent& lXapianPercentage = itDoc.get_percent();
199  const Xapian::Document& lDocument = itDoc.get_document();
200  addDocument (lDocument, lXapianPercentage);
201  }
202  }
203 
204  // //////////////////////////////////////////////////////////////////////
205  void Result::fillPlace (Place& ioPlace) const {
206  // Set the original and corrected/suggested keywords
207  ioPlace.setOriginalKeywords (_queryString);
208  ioPlace.setCorrectedKeywords (_correctedQueryString);
209 
210  // Set the effective (Levenshtein) edit distance/error, as
211  // well as the allowable edit distance/error
212  ioPlace.setEditDistance (_editDistance);
213  ioPlace.setAllowableEditDistance (_allowableEditDistance);
214 
215  // Set the Xapian document ID
216  ioPlace.setDocID (_bestDocID);
217 
218  // Set the matching percentage
219  ioPlace.setPercentage (_bestCombinedWeight);
220 
221  // Retrieve the parameters of the best matching document
222  const LocationKey& lKey = ioPlace.getKey();
223 
224  // DEBUG
225  OPENTREP_LOG_DEBUG ("Place key: " << lKey << " - Xapian ID " << _bestDocID
226  << ", " << _bestCombinedWeight << "% [" << _bestDocData
227  << "]");
228  }
229 
241  // //////////////////////////////////////////////////////////////////////
242  static unsigned int calculateEditDistance (const TravelQuery_T& iPhrase) {
243  NbOfErrors_T oEditDistance = 2;
244 
245  const NbOfErrors_T lQueryStringSize = iPhrase.size();
246 
247  oEditDistance = lQueryStringSize / K_DEFAULT_SIZE_FOR_SPELLING_ERROR_UNIT;
248  return oEditDistance;
249  }
250 
251  // //////////////////////////////////////////////////////////////////////
253  // Initialise the POR (point of reference) parser
254  PORStringParser lStringParser (iRawDataString);
255 
256  // Parse the raw data
257  const Location& oLocation = lStringParser.generateLocation();
258 
259  // DEBUG
260  //OPENTREP_LOG_DEBUG ("Location: " << oLocation);
261 
262  return oLocation;
263  }
264 
265  // //////////////////////////////////////////////////////////////////////
266  Location Result::retrieveLocation (const Xapian::Document& iDocument) {
267  // Retrieve the Xapian document data
268  const std::string& lDocumentDataStr = iDocument.get_data();
269  const RawDataString_T& lDocumentData = RawDataString_T (lDocumentDataStr);
270 
271  // Parse the POR details and create the corresponding Location structure
272  const Location& oLocation = retrieveLocation (lDocumentData);
273 
274  return oLocation;
275  }
276 
277  // //////////////////////////////////////////////////////////////////////
278  LocationKey Result::getPrimaryKey (const Xapian::Document& iDocument) {
279  // Parse the POR (point of reference) details held by the Xapian document
280  const Location& lLocation = retrieveLocation (iDocument);
281 
282  // Get the key (IATA and ICAO codes, GeonamesID)
283  const LocationKey& oLocationKey = lLocation.getKey();
284 
285  return oLocationKey;
286  }
287 
288  // //////////////////////////////////////////////////////////////////////
289  Score_T Result::getEnvelopeID (const Xapian::Document& iDocument) {
290  // Parse the POR (point of reference) details held by the Xapian document
291  const Location& lLocation = retrieveLocation (iDocument);
292 
293  // Get the envelope ID (it is an integer value in the Location structure)
294  const EnvelopeID_T& lEnvelopeIDInt = lLocation.getEnvelopeID();
295 
296  // Convert the envelope ID value, from an integer to a floating point one
297  const Score_T oEnvelopeID = static_cast<const Score_T> (lEnvelopeIDInt);
298 
299  return oEnvelopeID;
300  }
301 
302  // //////////////////////////////////////////////////////////////////////
303  PageRank_T Result::getPageRank (const Xapian::Document& iDocument) {
304  // Parse the POR (point of reference) details held by the Xapian document
305  const Location& lLocation = retrieveLocation (iDocument);
306 
307  // Get the PageRank value
308  const PageRank_T& oPageRank = lLocation.getPageRank();
309 
310  return oPageRank;
311  }
312 
313  // //////////////////////////////////////////////////////////////////////
314  std::string Result::fullTextMatch (const Xapian::Database& iDatabase,
315  const TravelQuery_T& iQueryString,
316  Xapian::MSet& ioMatchingSet) {
317  std::string oMatchedString;
318 
319  // Catch any Xapian::Error exceptions thrown
320  try {
321 
322  // Build the query object
323  Xapian::QueryParser lQueryParser;
324  lQueryParser.set_database (iDatabase);
325 
331  // lQueryParser.set_default_op (Xapian::Query::OP_ADJ);
332  lQueryParser.set_default_op (Xapian::Query::OP_PHRASE);
333 
334  // DEBUG
335  /*
336  OPENTREP_LOG_DEBUG ("Query parser `" << lQueryParser.get_description()
337  << "'");
338  */
339 
340  // DEBUG
341  OPENTREP_LOG_DEBUG (" --------");
342 
343  // Start an enquire session
344  Xapian::Enquire enquire (iDatabase);
345 
352  const Xapian::Query& lXapianQuery =
353  lQueryParser.parse_query (iQueryString,
354  Xapian::QueryParser::FLAG_BOOLEAN
355  | Xapian::QueryParser::FLAG_PHRASE
356  | Xapian::QueryParser::FLAG_LOVEHATE);
357 
358  // Give the query object to the enquire session
359  enquire.set_query (lXapianQuery);
360 
361  // Get the top K_DEFAULT_XAPIAN_MATCHING_SET_SIZE (normally, 30)
362  // results of the query
363  ioMatchingSet = enquire.get_mset (0, K_DEFAULT_XAPIAN_MATCHING_SET_SIZE);
364 
365  // Display the results
366  int nbMatches = ioMatchingSet.size();
367 
368  // DEBUG
369  OPENTREP_LOG_DEBUG (" Query string: `" << iQueryString
370  << "', i.e.: `" << lXapianQuery.get_description()
371  << "' => " << nbMatches << " result(s) found");
372 
373  if (nbMatches != 0) {
374  // Store the effective (Levenshtein) edit distance/error
375  const NbOfErrors_T lEditDistance = 0;
376  setEditDistance (lEditDistance);
377 
378  // Store the allowable edit distance/error
379  setAllowableEditDistance (lEditDistance);
380 
381  //
382  oMatchedString = iQueryString;
383 
384  // Store the fact that there has been a full-text match
385  setHasFullTextMatched (true);
386 
387  // Store the corrected string (the same as the given string, here,
388  // as that latter directly gave full-text matches).
389  setCorrectedQueryString (oMatchedString);
390 
391  // DEBUG
392  OPENTREP_LOG_DEBUG (" Query string: `" << iQueryString
393  << "' provides " << nbMatches << " exact matches.");
394 
395  return oMatchedString;
396  }
397  assert (ioMatchingSet.empty() == true);
398 
404  const NbOfErrors_T& lAllowableEditDistance =
405  calculateEditDistance (iQueryString);
406 
407  // Let Xapian find a spelling correction (if any)
408  const std::string& lCorrectedString =
409  iDatabase.get_spelling_suggestion (iQueryString, lAllowableEditDistance);
410 
411  // If the correction is no better than the original string, there is
412  // no need to go further: there is no match.
413  if (lCorrectedString.empty() == true || lCorrectedString == iQueryString) {
414  // DEBUG
415  OPENTREP_LOG_DEBUG (" Query string: `"
416  << iQueryString << "' provides no match, "
417  << "and there is no spelling suggestion, "
418  << "even with an edit distance of "
419  << lAllowableEditDistance);
420 
421  // Store the fact that there has not been any full-text match
422  setHasFullTextMatched (false);
423 
424  // Leave the string empty
425  return oMatchedString;
426  }
427  assert (lCorrectedString.empty() == false
428  && lCorrectedString != iQueryString);
429 
430  // Calculate the effective (Levenshtein) edit distance/error
431  const NbOfErrors_T& lEditDistance =
432  Levenshtein::getDistance (iQueryString, lCorrectedString);
433 
441  const Xapian::Query& lCorrectedXapianQuery =
442  lQueryParser.parse_query (lCorrectedString,
443  Xapian::QueryParser::FLAG_BOOLEAN
444  | Xapian::QueryParser::FLAG_PHRASE
445  | Xapian::QueryParser::FLAG_LOVEHATE);
446 
447  // Retrieve a maximum of K_DEFAULT_XAPIAN_MATCHING_SET_SIZE (normally,
448  // 30) entries
449  enquire.set_query (lCorrectedXapianQuery);
450  ioMatchingSet = enquire.get_mset (0, K_DEFAULT_XAPIAN_MATCHING_SET_SIZE);
451 
452  // Display the results
453  nbMatches = ioMatchingSet.size();
454 
455  // DEBUG
456  OPENTREP_LOG_DEBUG (" Corrected query string: `" << lCorrectedString
457  << "', i.e.: `"
458  << lCorrectedXapianQuery.get_description()
459  << "' => " << nbMatches << " result(s) found");
460 
461  if (nbMatches != 0) {
462  // Store the effective (Levenshtein) edit distance/error
463  setEditDistance (lEditDistance);
464 
465  // Store the allowable edit distance/error
466  setAllowableEditDistance (lAllowableEditDistance);
467 
468  //
469  oMatchedString = lCorrectedString;
470 
471  // Store the fact that there has been a full-text match
472  setHasFullTextMatched (true);
473 
474  // Store the corrected string
475  setCorrectedQueryString (oMatchedString);
476 
477  // DEBUG
478  OPENTREP_LOG_DEBUG (" Query string: `"
479  << iQueryString << "', spelling suggestion: `"
480  << lCorrectedString
481  << "', with a Levenshtein edit distance of "
482  << lEditDistance
483  << " over an allowable edit distance of "
484  << lAllowableEditDistance << ", provides "
485  << nbMatches << " matches.");
486 
487  //
488  return oMatchedString;
489  }
490 
491  // Error
492  OPENTREP_LOG_ERROR (" Query string: `"
493  << iQueryString << "', spelling suggestion: `"
494  << lCorrectedString
495  << "', with a Levenshtein edit distance of "
496  << lEditDistance
497  << " over an allowable edit distance of "
498  << lAllowableEditDistance << ", provides no match, "
499  << "which is not consistent with the existence of "
500  << "the spelling correction.");
501  assert (false);
502 
503  } catch (const Xapian::Error& error) {
504  OPENTREP_LOG_ERROR ("Exception: " << error.get_msg());
505  throw XapianException (error.get_msg());
506  }
507 
508  // Store the fact that there has not been any full-text match
509  setHasFullTextMatched (false);
510 
511  return oMatchedString;
512  }
513 
514  // //////////////////////////////////////////////////////////////////////
515  std::string Result::fullTextMatch (const Xapian::Database& iDatabase,
516  const TravelQuery_T& iQueryString) {
517  std::string oMatchedString;
518 
519  // Catch any Xapian::Error exceptions thrown
520  try {
521 
522  // DEBUG
523  OPENTREP_LOG_DEBUG (" ----------------");
524  OPENTREP_LOG_DEBUG (" Current query string: '"<< iQueryString << "'");
525 
526  // Check whether the string should be filtered out
527  const bool isToBeAdded = Filter::shouldKeep ("", iQueryString);
528  //const bool isToBeAdded = true;
529 
530  Xapian::MSet lMatchingSet;
531  if (isToBeAdded == true) {
532  oMatchedString = fullTextMatch (iDatabase, iQueryString, lMatchingSet);
533  }
534 
535  // Create the corresponding documents (from the Xapian MSet object)
536  fillResult (lMatchingSet);
537 
538  // DEBUG
539  if (isToBeAdded == false) {
540  OPENTREP_LOG_DEBUG (" No full text search performed as '"
541  << iQueryString
542  << "' is not made of searchable words");
543  }
544  OPENTREP_LOG_DEBUG (" ==> " << toString());
545  OPENTREP_LOG_DEBUG (" ----------------");
546 
547  } catch (const Xapian::Error& error) {
548  OPENTREP_LOG_ERROR ("Xapian-related error: " << error.get_msg());
549  throw XapianException (error.get_msg());
550  }
551 
552  return oMatchedString;
553  }
554 
555  // //////////////////////////////////////////////////////////////////////
557  // Browse the list of Xapian documents
558  for (DocumentList_T::const_iterator itDoc = _documentList.begin();
559  itDoc != _documentList.end(); ++itDoc) {
560  const XapianDocumentPair_T& lDocumentPair = *itDoc;
561 
562  // Retrieve the Xapian document
563  const Xapian::Document& lXapianDoc = lDocumentPair.first;
564 
565  // Extract the Xapian document ID
566  const Xapian::docid& lDocID = lXapianDoc.get_docid();
567 
568  // Extract the envelope ID from the document data
569  const LocationKey& lLocationKey = getPrimaryKey (lXapianDoc);
570 
571  // Retrieve the score board for that Xapian document
572  const ScoreBoard& lScoreBoard = lDocumentPair.second;
573 
574  // Extract the Xapian matching percentage
575  const Score_T& lXapianPct = lScoreBoard.getScore (ScoreType::XAPIAN_PCT);
576 
577  // DEBUG
578  OPENTREP_LOG_NOTIFICATION (" [xapian] '" << describeShortKey()
579  << "' with (" << lLocationKey << ", doc ID = "
580  << lDocID << ") matches at " << lXapianPct
581  << "%");
582  }
583  }
584 
585  // //////////////////////////////////////////////////////////////////////
586  void Result::setScoreOnDocMap (const Xapian::docid& iDocID,
587  const ScoreType& iType, const Score_T& iScore) {
588  // Retrieve the Xapian document and associated ScoreBoard structure
589  // corresponding to the given doc ID
590  DocumentMap_T::iterator itDoc = _documentMap.find (iDocID);
591 
592  if (itDoc == _documentMap.end()) {
593  OPENTREP_LOG_ERROR ("The Xapian document (ID = " << iDocID
594  << ") can not be found in the Result object "
595  << describeKey());
596  }
597  assert (itDoc != _documentMap.end());
598 
599  // Retrieve the associated ScoreBoard structure
600  XapianDocumentPair_T& lXapianDocPair = itDoc->second;
601  ScoreBoard& lScoreBoard = lXapianDocPair.second;
602 
603  // Update the score/weight
604  lScoreBoard.setScore (iType, iScore);
605  }
606 
607  // //////////////////////////////////////////////////////////////////////
609  // Browse the list of Xapian documents
610  for (DocumentList_T::iterator itDoc = _documentList.begin();
611  itDoc != _documentList.end(); ++itDoc) {
612  XapianDocumentPair_T& lDocumentPair = *itDoc;
613 
614  // Retrieve the Xapian document
615  const Xapian::Document& lXapianDoc = lDocumentPair.first;
616 
617  // Extract the Xapian document ID
618  const Xapian::docid& lDocID = lXapianDoc.get_docid();
619 
620  // Extract the envelope ID from the document data
621  const LocationKey& lLocationKey = getPrimaryKey (lXapianDoc);
622 
623  // Extract the envelope ID from the document data
624  const EnvelopeID_T& lEnvelopeIDInt = getEnvelopeID (lXapianDoc);
625 
626  // DEBUG
627  if (lEnvelopeIDInt != 0) {
629  << "] (" << lLocationKey << ", doc ID = "
630  << lDocID << ") has a non-null envelope ID ("
631  << lEnvelopeIDInt << ") => match of 0.10%");
632  }
633 
634  // Convert the envelope ID value, from an integer to a floating point one
635  const Score_T lEnvelopeID = static_cast<const Score_T> (lEnvelopeIDInt);
636 
637  // Retrieve the score board for that Xapian document
638  ScoreBoard& lScoreBoard = lDocumentPair.second;
639 
640  // Store the envelope-related weight
641  lScoreBoard.setScore (ScoreType::ENV_ID, lEnvelopeID);
642  setScoreOnDocMap (lDocID, ScoreType::ENV_ID, lEnvelopeID);
643  }
644  }
645 
646  // //////////////////////////////////////////////////////////////////////
648  // Browse the list of Xapian documents
649  for (DocumentList_T::iterator itDoc = _documentList.begin();
650  itDoc != _documentList.end(); ++itDoc) {
651  XapianDocumentPair_T& lDocumentPair = *itDoc;
652 
653  // Retrieve the Xapian document
654  const Xapian::Document& lXapianDoc = lDocumentPair.first;
655 
656  // Extract the Xapian document ID
657  const Xapian::docid& lDocID = lXapianDoc.get_docid();
658 
659  // Extract the envelope ID from the document data
660  const LocationKey& lLocationKey = getPrimaryKey (lXapianDoc);
661 
662  // Initialisation of the IATA/ICAO code full matching percentage
663  Score_T lCodeMatchPct = 0.0;
664  bool hasCodeFullyMatched = false;
665 
666  // Filter out "standard" words such as "airport", "international",
667  // "city", as well as words having a length strictly less than
668  // 3 letters.
669  std::string lFilteredString (_queryString);
670  const NbOfLetters_T kMinWordLength = 3;
671  Filter::trim (lFilteredString, kMinWordLength);
672 
673  // Check whether or not the filtered query string is made of
674  // a single word
675  WordList_T lFilteredQueryWordList;
677  lFilteredQueryWordList);
678  const NbOfWords_T nbOfFilteredQueryWords = lFilteredQueryWordList.size();
679 
680  //
681  if (_hasFullTextMatched == true) {
688  const size_t lNbOfLetters = lFilteredString.size();
689  if (nbOfFilteredQueryWords == 1
690  && lNbOfLetters >= 3 && lNbOfLetters <= 4
691  && _correctedQueryString == _queryString) {
692  // Convert the query string (made of one word of 3 or 4 letters)
693  // to uppercase letters
694  std::string lUpperQueryWord;
695  lUpperQueryWord.resize (lNbOfLetters);
696  std::transform (lFilteredString.begin(), lFilteredString.end(),
697  lUpperQueryWord.begin(), ::toupper);
698 
699  // Retrieve with the IATA code
700  const IATACode_T& lIataCode = lLocationKey.getIataCode();
701 
702  // Compare the 3/4-letter-word query string with the IATA
703  // and ICAO codes
704  if (lUpperQueryWord == lIataCode) {
708  lCodeMatchPct = 1.0;
709  hasCodeFullyMatched = true;
710  }
711  }
712 
713  if (hasCodeFullyMatched == true) {
714  // DEBUG
716  << "' matches the IATA/ICAO code ("
717  << lLocationKey << ", doc ID = "
718  << lDocID << ") => match of "
720  } else {
721  // DEBUG
723  << "' does not match with the IATA/ICAO "
724  << "code (" << lLocationKey << ", doc ID = "
725  << lDocID << ") => match of "
727  }
728  }
729 
730  // Retrieve the score board for that Xapian document
731  ScoreBoard& lScoreBoard = lDocumentPair.second;
732 
733  // Store the IATA/ICAO code match percentage/weight
734  lScoreBoard.setScore (ScoreType::CODE_FULL_MATCH, lCodeMatchPct);
735  setScoreOnDocMap (lDocID, ScoreType::CODE_FULL_MATCH, lCodeMatchPct);
736  }
737  }
738 
739  // //////////////////////////////////////////////////////////////////////
741  // Browse the list of Xapian documents
742  for (DocumentList_T::iterator itDoc = _documentList.begin();
743  itDoc != _documentList.end(); ++itDoc) {
744  XapianDocumentPair_T& lDocumentPair = *itDoc;
745 
746  // Retrieve the Xapian document
747  const Xapian::Document& lXapianDoc = lDocumentPair.first;
748 
749  // Extract the Xapian document ID
750  const Xapian::docid& lDocID = lXapianDoc.get_docid();
751 
752  // Extract the envelope ID from the document data
753  const LocationKey& lLocationKey = getPrimaryKey (lXapianDoc);
754 
755  // Extract the PageRank from the document data
756  const Score_T& lPageRank = getPageRank (lXapianDoc);
757 
758  // DEBUG
760  << "] (" << lLocationKey << ", doc ID = "
761  << lDocID << ") has a PageRank of "
762  << lPageRank << "%");
763 
764  // Retrieve the score board for that Xapian document
765  ScoreBoard& lScoreBoard = lDocumentPair.second;
766 
767  // Store the PageRank weight
768  lScoreBoard.setScore (ScoreType::PAGE_RANK, lPageRank);
769  setScoreOnDocMap (lDocID, ScoreType::PAGE_RANK, lPageRank);
770  }
771  }
772 
773  // //////////////////////////////////////////////////////////////////////
780  }
781 
782  // //////////////////////////////////////////////////////////////////////
784  Percentage_T lMaxPercentage = 0.0;
785  std::string lBestDocData;
786 
787  // Browse the list of Xapian documents
788  Xapian::docid lBestDocID = 0;
789  for (DocumentList_T::iterator itDoc = _documentList.begin();
790  itDoc != _documentList.end(); ++itDoc) {
791  XapianDocumentPair_T& lDocumentPair = *itDoc;
792 
793  // Retrieve the Xapian document ID
794  const Xapian::Document& lXapianDoc = lDocumentPair.first;
795  const Xapian::docid& lDocID = lXapianDoc.get_docid();
796  const std::string& lDocData = lXapianDoc.get_data();
797 
802  ScoreBoard& lScoreBoard = lDocumentPair.second;
803  const Percentage_T& lPercentage = lScoreBoard.calculateCombinedWeight();
804 
814  // Register the document, if it is the best matching until now
815  if (lPercentage > lMaxPercentage) {
816  lMaxPercentage = lPercentage;
817  lBestDocID = lDocID;
818  lBestDocData = lDocData;
819  }
820  }
821 
822  // Check whether or not the (original) query string is made of a single word
823  WordList_T lOriginalQueryWordList;
825  lOriginalQueryWordList);
826  const NbOfWords_T nbOfOriginalQueryWords = lOriginalQueryWordList.size();
827 
828  //
829  if (_hasFullTextMatched == true) {
830  // Retrieve the primary key (IATA, location type, Geonames ID) of
831  // the place corresponding to the document
832  const XapianDocumentPair_T& lXapianDocPair = getDocumentPair (lBestDocID);
833  const Xapian::Document& lXapianDoc = lXapianDocPair.first;
834  const ScoreBoard& lScoreBoard = lXapianDocPair.second;
835  const LocationKey& lLocationKey = getPrimaryKey (lXapianDoc);
836 
837  // DEBUG
838  OPENTREP_LOG_DEBUG (" [pct] '" << describeShortKey()
839  << "' matches at " << lMaxPercentage
840  << "% for " << lLocationKey << " (doc ID = "
841  << lBestDocID << "). Score calculation: "
842  << lScoreBoard.describe());
843 
844  } else {
849  const bool shouldBeKept = Filter::shouldKeep ("", _queryString);
850 
851  if (nbOfOriginalQueryWords == 1 && shouldBeKept == true) {
857  lMaxPercentage = 100.0;
858 
859  // DEBUG
860  OPENTREP_LOG_DEBUG (" [pct] '" << describeShortKey()
861  << "' does not match, but it is a non black-listed "
862  << "single-word string; hence, the weight is "
863  << lMaxPercentage << "%");
864 
865  } else {
874  lMaxPercentage = std::pow (10.0, -3*nbOfOriginalQueryWords);
875 
876  // DEBUG
877  OPENTREP_LOG_DEBUG(" [pct] '" << describeShortKey()
878  << "' does not match, and is either a multiple-word "
879  << "string or black-listed; hence, the weight is "
880  << lMaxPercentage << "%");
881  }
882  }
883 
884  // Store the doc ID of the best matching document
885  setBestDocID (lBestDocID);
886 
887  // Store the best weight
888  setBestCombinedWeight (lMaxPercentage);
889 
890  // Store all the details of the Xapian document
891  setBestDocData (lBestDocData);
892  }
893 
894 }
void fromStream(std::istream &ioIn)
Definition: Result.cpp:102
void setEditDistance(const NbOfErrors_T &iEditDistance)
Definition: Place.hpp:848
void setCorrectedQueryString(const TravelQuery_T &iCorrectedQueryString)
Definition: Result.hpp:156
Class modelling the primary key of a location/POR (point of reference).
Definition: LocationKey.hpp:21
Structure holding a board for all the types of score/matching having been performed.
Definition: ScoreBoard.hpp:22
const NbOfErrors_T K_DEFAULT_SIZE_FOR_SPELLING_ERROR_UNIT
std::pair< Xapian::Document, ScoreBoard > XapianDocumentPair_T
Definition: Result.hpp:23
#define OPENTREP_LOG_ERROR(iToBeLogged)
Definition: Logger.hpp:23
#define OPENTREP_LOG_DEBUG(iToBeLogged)
Definition: Logger.hpp:32
const XapianDocumentPair_T & getDocumentPair(const Xapian::docid &) const
Definition: Result.cpp:107
#define OPENTREP_LOG_NOTIFICATION(iToBeLogged)
Definition: Logger.hpp:26
Score_T getScore(const ScoreType &) const
Definition: ScoreBoard.cpp:40
double Percentage_T
static int getDistance(const std::string &iSource, const std::string &iTarget)
Definition: Levenshtein.cpp:13
double Score_T
void calculateCombinedWeights()
Definition: Result.cpp:783
static LocationKey getPrimaryKey(const Xapian::Document &)
Definition: Result.cpp:278
Structure modelling a (geographical) location.
Definition: Location.hpp:24
static Score_T getEnvelopeID(const Xapian::Document &)
Definition: Result.cpp:289
double PageRank_T
static PageRank_T getPageRank(const Xapian::Document &)
Definition: Result.cpp:303
unsigned int NbOfLetters_T
void setCorrectedKeywords(const std::string &iCorrectedKeywords)
Definition: Place.hpp:834
const Xapian::Document & getDocument(const Xapian::docid &) const
Definition: Result.cpp:128
static Location retrieveLocation(const Xapian::Document &)
Definition: Result.cpp:266
void addDocument(const Xapian::Document &, const Score_T &)
Definition: Result.cpp:141
std::string fullTextMatch(const Xapian::Database &, const TravelQuery_T &)
Definition: Result.cpp:515
void displayXapianPercentages() const
Definition: Result.cpp:556
static void trim(std::string &ioPhrase, const NbOfLetters_T &iMinWordLength=4)
Definition: Filter.cpp:131
std::list< Word_T > WordList_T
void setOriginalKeywords(const std::string &iOriginalKeywords)
Definition: Place.hpp:827
void setScoreOnDocMap(const Xapian::docid &, const ScoreType &, const Score_T &)
Definition: Result.cpp:586
void fillResult(const Xapian::MSet &iMatchingSet)
Definition: Result.cpp:191
Percentage_T calculateCombinedWeight()
Definition: ScoreBoard.cpp:186
void toStream(std::ostream &ioOut) const
Definition: Result.cpp:97
static void tokeniseStringIntoWordList(const TravelQuery_T &, WordList_T &)
Definition: WordHolder.cpp:37
const PageRank_T & getPageRank() const
Definition: Location.hpp:339
void setEditDistance(const NbOfErrors_T &iEditDistance)
Definition: Result.hpp:170
void setAllowableEditDistance(const NbOfErrors_T &iAllowableEditDistance)
Definition: Place.hpp:856
unsigned short NbOfErrors_T
const Percentage_T K_DEFAULT_MODIFIED_MATCHING_PCT
void setBestCombinedWeight(const Percentage_T &iPercentage)
Definition: Result.hpp:215
Class modelling a place/POR (point of reference).
Definition: Place.hpp:28
static bool shouldKeep(const std::string &iPhrase, const std::string &iWord)
Definition: Filter.cpp:144
std::string toString() const
Definition: Result.cpp:65
void setBestDocData(const std::string &iDocData)
Definition: Result.hpp:222
const Percentage_T K_DEFAULT_FULL_CODE_MATCH_PCT
const NbOfMatches_T K_DEFAULT_XAPIAN_MATCHING_SET_SIZE
unsigned short NbOfWords_T
Enumeration of score types.
Definition: ScoreType.hpp:25
void calculateHeuristicWeights()
Definition: Result.cpp:774
void fillPlace(Place &) const
Definition: Result.cpp:205
const LocationKey & getKey() const
Definition: Location.hpp:30
std::string describeKey() const
Definition: Result.cpp:49
void setDocID(const XapianDocID_T &iDocID)
Definition: Place.hpp:870
void calculatePageRanks()
Definition: Result.cpp:740
void setScore(const ScoreType &, const Score_T &)
Definition: ScoreBoard.cpp:54
std::string describe() const
Definition: ScoreBoard.cpp:157
void setBestDocID(const Xapian::docid &iDocID)
Definition: Result.hpp:207
std::string TravelQuery_T
void setHasFullTextMatched(const bool iHasFullTextMatched)
Definition: Result.hpp:163
const EnvelopeID_T & getEnvelopeID() const
Definition: Location.hpp:108
const LocationKey & getKey() const
Definition: Place.hpp:58
void setPercentage(const MatchingPercentage_T &iPercentage)
Definition: Place.hpp:841
const IATACode_T & getIataCode() const
Definition: LocationKey.hpp:27
std::string describeShortKey() const
Definition: Result.cpp:42
const Location & generateLocation()
void setAllowableEditDistance(const NbOfErrors_T &iAllowableEditDistance)
Definition: Result.hpp:178
void calculateEnvelopeWeights()
Definition: Result.cpp:608
void calculateCodeMatches()
Definition: Result.cpp:647
static unsigned int calculateEditDistance(const TravelQuery_T &iPhrase)
Helper function.