9 #include <boost/tokenizer.hpp>
26 const Xapian::Database& iDatabase)
27 : _resultHolder (NULL), _database (iDatabase),
28 _queryString (iQueryString), _hasFullTextMatched (false),
29 _bestDocData (RawDataString_T (
"")) {
43 std::ostringstream oStr;
50 std::ostringstream oStr;
52 if (_correctedQueryString.empty() ==
false
53 && _correctedQueryString != _queryString) {
54 oStr <<
"(corrected into '" << _correctedQueryString
55 <<
"' with an edit distance/error of " << _editDistance
56 <<
" over an allowable distance of " << _allowableEditDistance
66 std::ostringstream oStr;
69 if (_documentList.empty() ==
true) {
70 oStr <<
"No match" << std::endl;
73 assert (_hasFullTextMatched ==
true);
75 unsigned short idx = 0;
76 for (DocumentList_T::const_iterator itDoc = _documentList.begin();
77 itDoc != _documentList.end(); ++itDoc, ++idx) {
80 const Xapian::Document& lXapianDoc = lDocumentPair.first;
81 const Xapian::docid& lDocID = lXapianDoc.get_docid();
83 const ScoreBoard& lScoreBoard = lDocumentPair.second;
88 oStr <<
"Doc ID: " << lDocID <<
", matching with ("
89 << lScoreBoard.
describe() <<
"), containing: '"
90 << lXapianDoc.get_data() <<
"'";
110 DocumentMap_T::const_iterator itDoc = _documentMap.find (iDocID);
112 if (itDoc == _documentMap.end()) {
114 <<
") can not be found in the Result object "
117 assert (itDoc != _documentMap.end());
123 return oDocumentPair;
134 const Xapian::Document& oXapianDocument = lDocumentPair.first;
137 return oXapianDocument;
151 Score_T lCorrectedScore = iScore;
152 if (_editDistance > 0) {
153 lCorrectedScore = iScore / (_editDistance * _editDistance * _editDistance);
161 lXapianScoreType, lCorrectedScore);
164 const Xapian::docid& lDocID = iDocument.get_docid();
180 _documentList.push_back (lDocumentPair);
183 const bool hasInsertBeenSuccessful =
184 _documentMap.insert (DocumentMap_T::value_type (lDocID,
185 lDocumentPair)).second;
187 assert (hasInsertBeenSuccessful ==
true);
196 for (Xapian::MSetIterator itDoc = iMatchingSet.begin();
197 itDoc != iMatchingSet.end(); ++itDoc) {
198 const Xapian::percent& lXapianPercentage = itDoc.get_percent();
199 const Xapian::Document& lDocument = itDoc.get_document();
226 <<
", " << _bestCombinedWeight <<
"% [" << _bestDocData
248 return oEditDistance;
268 const std::string& lDocumentDataStr = iDocument.get_data();
297 const Score_T oEnvelopeID =
static_cast<const Score_T> (lEnvelopeIDInt);
316 Xapian::MSet& ioMatchingSet) {
317 std::string oMatchedString;
323 Xapian::QueryParser lQueryParser;
324 lQueryParser.set_database (iDatabase);
332 lQueryParser.set_default_op (Xapian::Query::OP_PHRASE);
344 Xapian::Enquire enquire (iDatabase);
352 const Xapian::Query& lXapianQuery =
353 lQueryParser.parse_query (iQueryString,
354 Xapian::QueryParser::FLAG_BOOLEAN
355 | Xapian::QueryParser::FLAG_PHRASE
356 | Xapian::QueryParser::FLAG_LOVEHATE);
359 enquire.set_query (lXapianQuery);
366 int nbMatches = ioMatchingSet.size();
370 <<
"', i.e.: `" << lXapianQuery.get_description()
371 <<
"' => " << nbMatches <<
" result(s) found");
373 if (nbMatches != 0) {
382 oMatchedString = iQueryString;
393 <<
"' provides " << nbMatches <<
" exact matches.");
395 return oMatchedString;
397 assert (ioMatchingSet.empty() ==
true);
408 const std::string& lCorrectedString =
409 iDatabase.get_spelling_suggestion (iQueryString, lAllowableEditDistance);
413 if (lCorrectedString.empty() ==
true || lCorrectedString == iQueryString) {
416 << iQueryString <<
"' provides no match, "
417 <<
"and there is no spelling suggestion, "
418 <<
"even with an edit distance of "
419 << lAllowableEditDistance);
425 return oMatchedString;
427 assert (lCorrectedString.empty() ==
false
428 && lCorrectedString != iQueryString);
441 const Xapian::Query& lCorrectedXapianQuery =
442 lQueryParser.parse_query (lCorrectedString,
443 Xapian::QueryParser::FLAG_BOOLEAN
444 | Xapian::QueryParser::FLAG_PHRASE
445 | Xapian::QueryParser::FLAG_LOVEHATE);
449 enquire.set_query (lCorrectedXapianQuery);
453 nbMatches = ioMatchingSet.size();
458 << lCorrectedXapianQuery.get_description()
459 <<
"' => " << nbMatches <<
" result(s) found");
461 if (nbMatches != 0) {
469 oMatchedString = lCorrectedString;
479 << iQueryString <<
"', spelling suggestion: `"
481 <<
"', with a Levenshtein edit distance of "
483 <<
" over an allowable edit distance of "
484 << lAllowableEditDistance <<
", provides "
485 << nbMatches <<
" matches.");
488 return oMatchedString;
493 << iQueryString <<
"', spelling suggestion: `"
495 <<
"', with a Levenshtein edit distance of "
497 <<
" over an allowable edit distance of "
498 << lAllowableEditDistance <<
", provides no match, "
499 <<
"which is not consistent with the existence of "
500 <<
"the spelling correction.");
503 }
catch (
const Xapian::Error& error) {
505 throw XapianException (error.get_msg());
511 return oMatchedString;
517 std::string oMatchedString;
530 Xapian::MSet lMatchingSet;
531 if (isToBeAdded ==
true) {
532 oMatchedString =
fullTextMatch (iDatabase, iQueryString, lMatchingSet);
539 if (isToBeAdded ==
false) {
542 <<
"' is not made of searchable words");
547 }
catch (
const Xapian::Error& error) {
552 return oMatchedString;
558 for (DocumentList_T::const_iterator itDoc = _documentList.begin();
559 itDoc != _documentList.end(); ++itDoc) {
563 const Xapian::Document& lXapianDoc = lDocumentPair.first;
566 const Xapian::docid& lDocID = lXapianDoc.get_docid();
572 const ScoreBoard& lScoreBoard = lDocumentPair.second;
579 <<
"' with (" << lLocationKey <<
", doc ID = "
580 << lDocID <<
") matches at " << lXapianPct
590 DocumentMap_T::iterator itDoc = _documentMap.find (iDocID);
592 if (itDoc == _documentMap.end()) {
594 <<
") can not be found in the Result object "
597 assert (itDoc != _documentMap.end());
601 ScoreBoard& lScoreBoard = lXapianDocPair.second;
604 lScoreBoard.
setScore (iType, iScore);
610 for (DocumentList_T::iterator itDoc = _documentList.begin();
611 itDoc != _documentList.end(); ++itDoc) {
615 const Xapian::Document& lXapianDoc = lDocumentPair.first;
618 const Xapian::docid& lDocID = lXapianDoc.get_docid();
627 if (lEnvelopeIDInt != 0) {
629 <<
"] (" << lLocationKey <<
", doc ID = "
630 << lDocID <<
") has a non-null envelope ID ("
631 << lEnvelopeIDInt <<
") => match of 0.10%");
635 const Score_T lEnvelopeID =
static_cast<const Score_T> (lEnvelopeIDInt);
638 ScoreBoard& lScoreBoard = lDocumentPair.second;
649 for (DocumentList_T::iterator itDoc = _documentList.begin();
650 itDoc != _documentList.end(); ++itDoc) {
654 const Xapian::Document& lXapianDoc = lDocumentPair.first;
657 const Xapian::docid& lDocID = lXapianDoc.get_docid();
664 bool hasCodeFullyMatched =
false;
669 std::string lFilteredString (_queryString);
677 lFilteredQueryWordList);
678 const NbOfWords_T nbOfFilteredQueryWords = lFilteredQueryWordList.size();
681 if (_hasFullTextMatched ==
true) {
688 const size_t lNbOfLetters = lFilteredString.size();
689 if (nbOfFilteredQueryWords == 1
690 && lNbOfLetters >= 3 && lNbOfLetters <= 4
691 && _correctedQueryString == _queryString) {
694 std::string lUpperQueryWord;
695 lUpperQueryWord.resize (lNbOfLetters);
696 std::transform (lFilteredString.begin(), lFilteredString.end(),
697 lUpperQueryWord.begin(), ::toupper);
704 if (lUpperQueryWord == lIataCode) {
709 hasCodeFullyMatched =
true;
713 if (hasCodeFullyMatched ==
true) {
716 <<
"' matches the IATA/ICAO code ("
717 << lLocationKey <<
", doc ID = "
718 << lDocID <<
") => match of "
723 <<
"' does not match with the IATA/ICAO "
724 <<
"code (" << lLocationKey <<
", doc ID = "
725 << lDocID <<
") => match of "
731 ScoreBoard& lScoreBoard = lDocumentPair.second;
742 for (DocumentList_T::iterator itDoc = _documentList.begin();
743 itDoc != _documentList.end(); ++itDoc) {
747 const Xapian::Document& lXapianDoc = lDocumentPair.first;
750 const Xapian::docid& lDocID = lXapianDoc.get_docid();
760 <<
"] (" << lLocationKey <<
", doc ID = "
761 << lDocID <<
") has a PageRank of "
762 << lPageRank <<
"%");
765 ScoreBoard& lScoreBoard = lDocumentPair.second;
785 std::string lBestDocData;
788 Xapian::docid lBestDocID = 0;
789 for (DocumentList_T::iterator itDoc = _documentList.begin();
790 itDoc != _documentList.end(); ++itDoc) {
794 const Xapian::Document& lXapianDoc = lDocumentPair.first;
795 const Xapian::docid& lDocID = lXapianDoc.get_docid();
796 const std::string& lDocData = lXapianDoc.get_data();
802 ScoreBoard& lScoreBoard = lDocumentPair.second;
815 if (lPercentage > lMaxPercentage) {
816 lMaxPercentage = lPercentage;
818 lBestDocData = lDocData;
825 lOriginalQueryWordList);
826 const NbOfWords_T nbOfOriginalQueryWords = lOriginalQueryWordList.size();
829 if (_hasFullTextMatched ==
true) {
833 const Xapian::Document& lXapianDoc = lXapianDocPair.first;
834 const ScoreBoard& lScoreBoard = lXapianDocPair.second;
839 <<
"' matches at " << lMaxPercentage
840 <<
"% for " << lLocationKey <<
" (doc ID = "
841 << lBestDocID <<
"). Score calculation: "
851 if (nbOfOriginalQueryWords == 1 && shouldBeKept ==
true) {
857 lMaxPercentage = 100.0;
861 <<
"' does not match, but it is a non black-listed "
862 <<
"single-word string; hence, the weight is "
863 << lMaxPercentage <<
"%");
874 lMaxPercentage = std::pow (10.0, -3*nbOfOriginalQueryWords);
878 <<
"' does not match, and is either a multiple-word "
879 <<
"string or black-listed; hence, the weight is "
880 << lMaxPercentage <<
"%");
void fromStream(std::istream &ioIn)
void setEditDistance(const NbOfErrors_T &iEditDistance)
void setCorrectedQueryString(const TravelQuery_T &iCorrectedQueryString)
Class modelling the primary key of a location/POR (point of reference).
Structure holding a board for all the types of score/matching having been performed.
const NbOfErrors_T K_DEFAULT_SIZE_FOR_SPELLING_ERROR_UNIT
std::pair< Xapian::Document, ScoreBoard > XapianDocumentPair_T
#define OPENTREP_LOG_ERROR(iToBeLogged)
#define OPENTREP_LOG_DEBUG(iToBeLogged)
const XapianDocumentPair_T & getDocumentPair(const Xapian::docid &) const
#define OPENTREP_LOG_NOTIFICATION(iToBeLogged)
Score_T getScore(const ScoreType &) const
static int getDistance(const std::string &iSource, const std::string &iTarget)
void calculateCombinedWeights()
static LocationKey getPrimaryKey(const Xapian::Document &)
Structure modelling a (geographical) location.
static Score_T getEnvelopeID(const Xapian::Document &)
static PageRank_T getPageRank(const Xapian::Document &)
unsigned int NbOfLetters_T
void setCorrectedKeywords(const std::string &iCorrectedKeywords)
const Xapian::Document & getDocument(const Xapian::docid &) const
static Location retrieveLocation(const Xapian::Document &)
void addDocument(const Xapian::Document &, const Score_T &)
std::string fullTextMatch(const Xapian::Database &, const TravelQuery_T &)
void displayXapianPercentages() const
static void trim(std::string &ioPhrase, const NbOfLetters_T &iMinWordLength=4)
std::list< Word_T > WordList_T
void setOriginalKeywords(const std::string &iOriginalKeywords)
void setScoreOnDocMap(const Xapian::docid &, const ScoreType &, const Score_T &)
void fillResult(const Xapian::MSet &iMatchingSet)
Percentage_T calculateCombinedWeight()
void toStream(std::ostream &ioOut) const
static void tokeniseStringIntoWordList(const TravelQuery_T &, WordList_T &)
const PageRank_T & getPageRank() const
void setEditDistance(const NbOfErrors_T &iEditDistance)
void setAllowableEditDistance(const NbOfErrors_T &iAllowableEditDistance)
unsigned short NbOfErrors_T
const Percentage_T K_DEFAULT_MODIFIED_MATCHING_PCT
void setBestCombinedWeight(const Percentage_T &iPercentage)
Class modelling a place/POR (point of reference).
static bool shouldKeep(const std::string &iPhrase, const std::string &iWord)
std::string toString() const
void setBestDocData(const std::string &iDocData)
const Percentage_T K_DEFAULT_FULL_CODE_MATCH_PCT
const NbOfMatches_T K_DEFAULT_XAPIAN_MATCHING_SET_SIZE
unsigned short NbOfWords_T
Enumeration of score types.
void calculateHeuristicWeights()
void fillPlace(Place &) const
const LocationKey & getKey() const
std::string describeKey() const
void setDocID(const XapianDocID_T &iDocID)
void calculatePageRanks()
void setScore(const ScoreType &, const Score_T &)
std::string describe() const
void setBestDocID(const Xapian::docid &iDocID)
std::string TravelQuery_T
void setHasFullTextMatched(const bool iHasFullTextMatched)
const EnvelopeID_T & getEnvelopeID() const
const LocationKey & getKey() const
void setPercentage(const MatchingPercentage_T &iPercentage)
const IATACode_T & getIataCode() const
std::string describeShortKey() const
const Location & generateLocation()
void setAllowableEditDistance(const NbOfErrors_T &iAllowableEditDistance)
void calculateEnvelopeWeights()
void calculateCodeMatches()
static unsigned int calculateEditDistance(const TravelQuery_T &iPhrase)
Helper function.