OpenTREP Logo  0.6.0
C++ Open Travel Request Parsing Library
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
RequestInterpreter.cpp
Go to the documentation of this file.
1 // //////////////////////////////////////////////////////////////////////
2 // Import section
3 // //////////////////////////////////////////////////////////////////////
4 // STL
5 #include <cassert>
6 #include <sstream>
7 #include <string>
8 #include <vector>
9 #include <exception>
10 // Boost
11 #include <boost/filesystem.hpp>
12 // OpenTrep
13 #include <opentrep/bom/Filter.hpp>
15 #include <opentrep/bom/Place.hpp>
18 #include <opentrep/bom/Result.hpp>
29 
30 namespace OPENTREP {
31 
43  // //////////////////////////////////////////////////////////////////////
44  void addUnmatchedWord (const TravelQuery_T& iQueryString,
45  WordList_T& ioWordList, WordSet_T& ioWordSet) {
46  // Token-ise the given string
47  WordList_T lQueryStringWordList;
49  lQueryStringWordList);
50  if (lQueryStringWordList.size() == 1) {
51  // Add the unmatched/unknown word, only when that latter has not
52  // already been stored, and when it is not black-listed.
53  const bool shouldBeKept = Filter::shouldKeep ("", iQueryString);
54  //const bool shouldBeKept = true;
55 
56  WordSet_T::const_iterator itWord = ioWordSet.find (iQueryString);
57  if (shouldBeKept == true && itWord == ioWordSet.end()) {
58  ioWordSet.insert (iQueryString);
59  ioWordList.push_back (iQueryString);
60  }
61  }
62  }
63 
64  // //////////////////////////////////////////////////////////////////////
65  void createPlaces (const ResultCombination& iResultCombination,
66  PlaceHolder& ioPlaceHolder) {
67 
68  // Retrieve the best matching ResultHolder object.
69  const ResultHolder& lResultHolder =
70  iResultCombination.getBestMatchingResultHolder();
71 
72  // Browse the list of result objects
73  const ResultList_T& lResultList = lResultHolder.getResultList();
74  for (ResultList_T::const_iterator itResult = lResultList.begin();
75  itResult != lResultList.end(); ++itResult) {
76  // Retrieve the result object
77  const Result* lResult_ptr = *itResult;
78  assert (lResult_ptr != NULL);
79 
84  const bool hasFullTextMatched = lResult_ptr->hasFullTextMatched();
85  if (hasFullTextMatched == false) {
86  continue;
87  }
88  assert (hasFullTextMatched == true);
89 
90  // Retrieve the Xapian document data (string)
91  const std::string& lDocDataStr = lResult_ptr->getBestDocData();
92  const RawDataString_T& lDocData = RawDataString_T (lDocDataStr);
93 
94  // Parse the POR details and create the corresponding Location structure
95  const Location& lLocation = Result::retrieveLocation (lDocData);
96 
97  // Instanciate an empty place object, which will be filled from the
98  // rows retrieved from the database.
99  Place& lPlace = FacPlace::instance().create (lLocation);
100 
101  // Insert the Place object within the PlaceHolder object
102  FacPlaceHolder::initLinkWithPlace (ioPlaceHolder, lPlace);
103 
104  // Fill the place with the remaining of the Result details.
105  lResult_ptr->fillPlace (lPlace);
106 
107  // DEBUG
108  OPENTREP_LOG_DEBUG ("Retrieved Document: " << lPlace.toString());
109  }
110  }
111 
124  // //////////////////////////////////////////////////////////////////////
125  void searchString (const StringPartition& iStringPartition,
126  const Xapian::Database& iDatabase,
127  ResultCombination& ioResultCombination,
128  WordList_T& ioWordList) {
129 
130  // Catch any thrown Xapian::Error exceptions
131  try {
132 
133  // Set of unknown words (just to eliminate the duplicates)
134  WordSet_T lWordSet;
135 
136  // Browse the partitions
137  for (StringPartition::StringPartition_T::const_iterator itSet =
138  iStringPartition._partition.begin();
139  itSet != iStringPartition._partition.end(); ++itSet) {
140  const StringSet& lStringSet = *itSet;
141 
142  // DEBUG
143  OPENTREP_LOG_DEBUG (" ==========");
144  OPENTREP_LOG_DEBUG (" String set: " << lStringSet);
145 
146  // Create a ResultHolder object.
147  ResultHolder& lResultHolder =
148  FacResultHolder::instance().create (lStringSet.describe(), iDatabase);
149 
150  // Add the ResultHolder object to the dedicated list.
152  lResultHolder);
153 
154  // Browse through all the word combinations of the partition
155  for (StringSet::StringSet_T::const_iterator itString =
156  lStringSet._set.begin();
157  itString != lStringSet._set.end(); ++itString) {
158  //
159  const std::string lQueryString (*itString);
160 
161  // DEBUG
162  OPENTREP_LOG_DEBUG (" --------");
163  OPENTREP_LOG_DEBUG (" Query string: '" << lQueryString << "'");
164 
165  // Create an empty Result object
166  Result& lResult = FacResult::instance().create (lQueryString,
167  iDatabase);
168 
169  // Add the Result object to the dedicated list.
170  FacResultHolder::initLinkWithResult (lResultHolder, lResult);
171 
172  // Perform the Xapian-based full-text match: the set of
173  // matching documents is filled.
174  const std::string& lMatchedString =
175  lResult.fullTextMatch (iDatabase, lQueryString);
176 
177  // When a single-word string is unmatched/unknown by/from Xapian,
178  // add it to the dedicated list (i.e., ioWordList).
179  if (lMatchedString.empty() == true) {
180  OPENTREP::addUnmatchedWord (lQueryString, ioWordList, lWordSet);
181  }
182  }
183 
184  // DEBUG
185  OPENTREP_LOG_DEBUG (std::endl
186  << "========================================="
187  << std::endl << "Result holder: "
188  << lResultHolder.toString() << std::endl
189  << "========================================="
190  << std::endl << std::endl);
191  }
192 
193  // DEBUG
194  OPENTREP_LOG_DEBUG ("*********************");
195 
196  } catch (const Xapian::Error& error) {
197  // Error
198  OPENTREP_LOG_ERROR ("Exception: " << error.get_msg());
199  throw XapianException (error.get_msg());
200  }
201  }
202 
219  // //////////////////////////////////////////////////////////////////////
220  void chooseBestMatchingResultHolder (ResultCombination& ioResultCombination) {
221 
222  // Calculate the weights for the full-text matches
223  const bool doesBestMatchingResultHolderExist =
224  ioResultCombination.chooseBestMatchingResultHolder();
225 
226  if (doesBestMatchingResultHolderExist == true) {
227  const ResultHolder& lBestMatchingResultHolder =
228  ioResultCombination.getBestMatchingResultHolder();
229 
230  // DEBUG
231  const StringSet& lCorrectedStringSet =
232  ioResultCombination.getCorrectedStringSet();
233  OPENTREP_LOG_DEBUG ("The best matching string partition for '"
234  << ioResultCombination.describeShortKey() << "' is "
235  << lBestMatchingResultHolder.describeShortKey()
236  << ", and has got a weight of "
237  << ioResultCombination.getBestMatchingWeight()
238  << "%. The corrected string set is: "
239  << lCorrectedStringSet);
240 
241  } else {
242  // DEBUG
243  OPENTREP_LOG_DEBUG ("There is no match for '"
244  << ioResultCombination.describeShortKey() << "'");
245  }
246  }
247 
248  // //////////////////////////////////////////////////////////////////////
249  NbOfMatches_T RequestInterpreter::
250  interpretTravelRequest (const TravelDBFilePath_T& iTravelDBFilePath,
251  const TravelQuery_T& iTravelQuery,
252  LocationList_T& ioLocationList,
253  WordList_T& ioWordList) {
254  NbOfMatches_T oNbOfMatches = 0;
255 
256  // Sanity check
257  assert (iTravelQuery.empty() == false);
258 
259  // Create a PlaceHolder object, to collect the matching Place objects
260  PlaceHolder& lPlaceHolder = FacPlaceHolder::instance().create();
261 
262  // Check whether the file-path to the Xapian database/index exists
263  // and is a directory.
264  boost::filesystem::path lTravelDBFilePath (iTravelDBFilePath.begin(),
265  iTravelDBFilePath.end());
266  if (!(boost::filesystem::exists (lTravelDBFilePath)
267  && boost::filesystem::is_directory (lTravelDBFilePath))) {
268  std::ostringstream oStr;
269  oStr << "The file-path to the Xapian database/index ('"
270  << iTravelDBFilePath << "') does not exist or is not a directory.";
271  OPENTREP_LOG_ERROR (oStr.str());
272  throw FileNotFoundException (oStr.str());
273  }
274 
275  // Open the Xapian database
276  Xapian::Database lXapianDatabase (iTravelDBFilePath);
277 
278  // DEBUG
279  OPENTREP_LOG_DEBUG (std::endl
280  << "=========================================");
281 
282  // First, cut the travel query in slices and calculate all the partitions
283  // for each of those query slices
284  QuerySlices lQuerySlices (lXapianDatabase, iTravelQuery);
285 
286  // DEBUG
287  OPENTREP_LOG_DEBUG ("+=+=+=+=+=+=+=+=+=+=+=+=+=+=+");
288  OPENTREP_LOG_DEBUG ("Travel query: `" << iTravelQuery << "'");
289  OPENTREP_LOG_DEBUG ("Query slices: `" << lQuerySlices << "'");
290 
291  // Browse the travel query slices
292  const StringPartitionList_T& lStringPartitionList =
293  lQuerySlices.getStringPartitionList();
294  for (StringPartitionList_T::const_iterator itSlice =
295  lStringPartitionList.begin();
296  itSlice != lStringPartitionList.end(); ++itSlice) {
297  StringPartition lStringPartition = *itSlice;
298  const std::string& lTravelQuerySlice = lStringPartition.getInitialString();
299 
305  ResultCombination& lResultCombination =
306  FacResultCombination::instance().create (lTravelQuerySlice);
307 
308  // DEBUG
309  // DEBUG
310  OPENTREP_LOG_DEBUG ("+++++++++++++++++++++");
311  OPENTREP_LOG_DEBUG ("Travel query slice: `" << lTravelQuerySlice << "'");
312  OPENTREP_LOG_DEBUG ("Partitions: " << lStringPartition);
313 
318  OPENTREP::searchString (lTravelQuerySlice, lXapianDatabase,
319  lResultCombination, ioWordList);
320 
324  lResultCombination.displayXapianPercentages();
325 
329  lResultCombination.calculateEnvelopeWeights();
330 
335  lResultCombination.calculateCodeMatches();
336 
340  lResultCombination.calculatePageRanks();
341 
345  lResultCombination.calculateHeuristicWeights();
346 
350  lResultCombination.calculateCombinedWeights();
351 
355  OPENTREP::chooseBestMatchingResultHolder (lResultCombination);
356 
362  createPlaces (lResultCombination, lPlaceHolder);
363 
364  // DEBUG
365  OPENTREP_LOG_DEBUG (std::endl
366  << "========================================="
367  << std::endl << "Summary:" << std::endl
368  << lPlaceHolder.toShortString() << std::endl
369  << "========================================="
370  << std::endl);
371  }
372 
378  lPlaceHolder.createLocations (ioLocationList);
379  oNbOfMatches = ioLocationList.size();
380 
381  return oNbOfMatches;
382  }
383 
384 }
const ResultList_T & getResultList() const
std::vector< std::string > WordList_T
ResultHolder & create(const TravelQuery_T &iQueryString, const Xapian::Database &iDatabase)
#define OPENTREP_LOG_ERROR(iToBeLogged)
Definition: Logger.hpp:23
#define OPENTREP_LOG_DEBUG(iToBeLogged)
Definition: Logger.hpp:32
static void initLinkWithResult(ResultHolder &, Result &)
void addUnmatchedWord(const TravelQuery_T &iQueryString, WordList_T &ioWordList, WordSet_T &ioWordSet)
unsigned short NbOfMatches_T
StringSet_T _set
Definition: StringSet.hpp:118
static FacPlace & instance()
Definition: FacPlace.cpp:29
const ResultHolder & getBestMatchingResultHolder() const
StringSet getCorrectedStringSet() const
Structure modelling a (geographical) location.
Definition: Location.hpp:24
static FacResultCombination & instance()
static Location retrieveLocation(const Xapian::Document &)
Definition: Result.cpp:266
std::string fullTextMatch(const Xapian::Database &, const TravelQuery_T &)
Definition: Result.cpp:515
static void initLinkWithResultHolder(ResultCombination &, ResultHolder &)
std::string describeShortKey() const
std::list< StringPartition > StringPartitionList_T
Result & create(const TravelQuery_T &, const Xapian::Database &)
Definition: FacResult.cpp:41
std::list< Word_T > WordList_T
std::string describeShortKey() const
bool hasFullTextMatched() const
Definition: Result.hpp:71
const Percentage_T & getBestMatchingWeight() const
static FacResult & instance()
Definition: FacResult.cpp:29
static void tokeniseStringIntoWordList(const TravelQuery_T &, WordList_T &)
Definition: WordHolder.cpp:37
std::set< std::string > WordSet_T
std::string describe() const
Definition: StringSet.cpp:88
Class wrapping functions on a list of Result objects.
std::list< Result * > ResultList_T
Definition: ResultList.hpp:13
Class modelling a place/POR (point of reference).
Definition: Place.hpp:28
static bool shouldKeep(const std::string &iPhrase, const std::string &iWord)
Definition: Filter.cpp:144
std::list< Location > LocationList_T
void searchString(const StringPartition &iStringPartition, const Xapian::Database &iDatabase, ResultCombination &ioResultCombination, WordList_T &ioWordList)
ResultCombination & create(const TravelQuery_T &iQueryString)
Class wrapping functions on a list of ResultHolder objects.
std::string toString() const
static FacResultHolder & instance()
void fillPlace(Place &) const
Definition: Result.cpp:205
static FacPlaceHolder & instance()
StringPartition_T _partition
static void initLinkWithPlace(PlaceHolder &, Place &)
std::string toString() const
Definition: Place.cpp:83
Class holding a set of strings, e.g., {"rio", "de", "janeiro"}.
Definition: StringSet.hpp:19
std::string TravelQuery_T
Place & create()
Definition: FacPlace.cpp:41
void createPlaces(const ResultCombination &iResultCombination, PlaceHolder &ioPlaceHolder)
void chooseBestMatchingResultHolder(ResultCombination &ioResultCombination)
Class wrapping a set of Xapian documents having matched a given query string.
Definition: Result.hpp:48
const RawDataString_T & getBestDocData() const
Definition: Result.hpp:132