OpenTREP Logo  0.6.0
C++ Open Travel Request Parsing Library
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
opentrep-searcher.cpp
Go to the documentation of this file.
1 // STL
2 #include <cassert>
3 #include <iostream>
4 #include <sstream>
5 #include <fstream>
6 #include <vector>
7 #include <string>
8 // Boost (Extended STL)
9 #include <boost/date_time/posix_time/posix_time.hpp>
10 #include <boost/date_time/gregorian/gregorian.hpp>
11 #include <boost/tokenizer.hpp>
12 #include <boost/program_options.hpp>
13 // OpenTREP
16 #include <opentrep/Location.hpp>
17 #include <opentrep/config/opentrep-paths.hpp>
18 
19 
20 // //////// Type definitions ///////
21 typedef std::vector<std::string> WordList_T;
22 
23 
24 // //////// Constants //////
28 const std::string K_OPENTREP_DEFAULT_LOG_FILENAME ("opentrep-searcher.log");
29 
33 const std::string K_OPENTREP_DEFAULT_QUERY_STRING ("sna francicso rio de janero lso angles reykyavki");
34 
43 const unsigned short K_OPENTREP_DEFAULT_SEARCH_TYPE = 0;
44 
49 
50 
51 // //////////////////////////////////////////////////////////////////////
52 void tokeniseStringIntoWordList (const std::string& iPhrase,
53  WordList_T& ioWordList) {
54  // Empty the word list
55  ioWordList.clear();
56 
57  // Boost Tokeniser
58  typedef boost::tokenizer<boost::char_separator<char> > Tokeniser_T;
59 
60  // Define the separators
61  const boost::char_separator<char> lSepatorList(" .,;:|+-*/_=!@#$%`~^&(){}[]?'<>\"");
62 
63  // Initialise the phrase to be tokenised
64  Tokeniser_T lTokens (iPhrase, lSepatorList);
65  for (Tokeniser_T::const_iterator tok_iter = lTokens.begin();
66  tok_iter != lTokens.end(); ++tok_iter) {
67  const std::string& lTerm = *tok_iter;
68  ioWordList.push_back (lTerm);
69  }
70 }
71 
72 // //////////////////////////////////////////////////////////////////////
73 std::string createStringFromWordList (const WordList_T& iWordList) {
74  std::ostringstream oStr;
75 
76  unsigned short idx = iWordList.size();
77  for (WordList_T::const_iterator itWord = iWordList.begin();
78  itWord != iWordList.end(); ++itWord, --idx) {
79  const std::string& lWord = *itWord;
80  oStr << lWord;
81  if (idx > 1) {
82  oStr << " ";
83  }
84  }
85 
86  return oStr.str();
87 }
88 
89 
90 // ///////// Parsing of Options & Configuration /////////
91 // A helper function to simplify the main part.
92 template<class T> std::ostream& operator<< (std::ostream& os,
93  const std::vector<T>& v) {
94  std::copy (v.begin(), v.end(), std::ostream_iterator<T> (std::cout, " "));
95  return os;
96 }
97 
100 
102 int readConfiguration (int argc, char* argv[],
103  unsigned short& ioSpellingErrorDistance,
104  std::string& ioQueryString,
105  std::string& ioXapianDBFilepath,
106  std::string& ioSQLiteDBFilepath,
107  std::string& ioLogFilename,
108  unsigned short& ioSearchType) {
109 
110  // Initialise the travel query string, if that one is empty
111  if (ioQueryString.empty() == true) {
112  ioQueryString = K_OPENTREP_DEFAULT_QUERY_STRING;
113  }
114 
115  // Transform the query string into a list of words (STL strings)
116  WordList_T lWordList;
117  tokeniseStringIntoWordList (ioQueryString, lWordList);
118 
119  // Declare a group of options that will be allowed only on command line
120  boost::program_options::options_description generic ("Generic options");
121  generic.add_options()
122  ("prefix", "print installation prefix")
123  ("version,v", "print version string")
124  ("help,h", "produce help message");
125 
126  // Declare a group of options that will be allowed both on command
127  // line and in config file
128  boost::program_options::options_description config ("Configuration");
129  config.add_options()
130  ("error,e",
131  boost::program_options::value< unsigned short >(&ioSpellingErrorDistance)->default_value(K_OPENTREP_DEFAULT_SPELLING_ERROR_DISTANCE),
132  "Spelling error distance (e.g., 3)")
133  ("xapiandb,d",
134  boost::program_options::value< std::string >(&ioXapianDBFilepath)->default_value(OPENTREP::DEFAULT_OPENTREP_XAPIAN_DB_FILEPATH),
135  "Xapian database filepath (e.g., /tmp/opentrep/traveldb)")
136  ("sqlite,s",
137  boost::program_options::value< std::string >(&ioSQLiteDBFilepath)->default_value(OPENTREP::DEFAULT_OPENTREP_SQLITE_DB_FILEPATH),
138  "SQLite3 database filepath (e.g., ~/tmp/opentrep/traveldb/ori_por_public.db)")
139  ("log,l",
140  boost::program_options::value< std::string >(&ioLogFilename)->default_value(K_OPENTREP_DEFAULT_LOG_FILENAME),
141  "Filepath for the logs")
142  ("type,t",
143  boost::program_options::value<unsigned short>(&ioSearchType)->default_value(K_OPENTREP_DEFAULT_SEARCH_TYPE),
144  "Type of search request (0 = full text, 1 = coordinates)")
145  ("query,q",
146  boost::program_options::value< WordList_T >(&lWordList)->multitoken(),
147  "Travel query word list (e.g. sna francicso rio de janero lso anglese reykyavki), which sould be located at the end of the command line (otherwise, the other options would be interpreted as part of that travel query word list)")
148  ;
149 
150  // Hidden options, will be allowed both on command line and
151  // in config file, but will not be shown to the user.
152  boost::program_options::options_description hidden ("Hidden options");
153  hidden.add_options()
154  ("copyright",
155  boost::program_options::value< std::vector<std::string> >(),
156  "Show the copyright (license)");
157 
158  boost::program_options::options_description cmdline_options;
159  cmdline_options.add(generic).add(config).add(hidden);
160 
161  boost::program_options::options_description config_file_options;
162  config_file_options.add(config).add(hidden);
163 
164  boost::program_options::options_description visible ("Allowed options");
165  visible.add(generic).add(config);
166 
167  boost::program_options::positional_options_description p;
168  p.add ("copyright", -1);
169 
170  boost::program_options::variables_map vm;
171  boost::program_options::
172  store (boost::program_options::command_line_parser (argc, argv).
173  options (cmdline_options).positional(p).run(), vm);
174 
175  std::ifstream ifs ("opentrep-searcher.cfg");
176  boost::program_options::store (parse_config_file (ifs, config_file_options),
177  vm);
178  boost::program_options::notify (vm);
179 
180  if (vm.count ("help")) {
181  std::cout << visible << std::endl;
183  }
184 
185  if (vm.count ("version")) {
186  std::cout << PACKAGE_NAME << ", version " << PACKAGE_VERSION << std::endl;
188  }
189 
190  if (vm.count ("prefix")) {
191  std::cout << "Installation prefix: " << PREFIXDIR << std::endl;
193  }
194 
195  if (vm.count ("xapiandb")) {
196  ioXapianDBFilepath = vm["xapiandb"].as< std::string >();
197  std::cout << "Xapian database filepath is: " << ioXapianDBFilepath
198  << std::endl;
199  }
200 
201  if (vm.count ("sqlitedb")) {
202  ioSQLiteDBFilepath = vm["sqlitedb"].as< std::string >();
203  std::cout << "SQLite3 database filepath is: " << ioSQLiteDBFilepath
204  << std::endl;
205  }
206 
207  if (vm.count ("log")) {
208  ioLogFilename = vm["log"].as< std::string >();
209  std::cout << "Log filename is: " << ioLogFilename << std::endl;
210  }
211 
212  std::cout << "The type of search is: " << ioSearchType << std::endl;
213 
214  std::cout << "The spelling error distance is: " << ioSpellingErrorDistance
215  << std::endl;
216 
217  ioQueryString = createStringFromWordList (lWordList);
218  std::cout << "The travel query string is: " << ioQueryString << std::endl;
219 
220  return 0;
221 }
222 
226 std::string parseQuery (OPENTREP::OPENTREP_Service& ioOpentrepService,
227  const OPENTREP::TravelQuery_T& iTravelQuery) {
228  std::ostringstream oStr;
229 
230  // Query the Xapian database (index)
231  OPENTREP::WordList_T lNonMatchedWordList;
232  OPENTREP::LocationList_T lLocationList;
233  const OPENTREP::NbOfMatches_T nbOfMatches =
234  ioOpentrepService.interpretTravelRequest (iTravelQuery, lLocationList,
235  lNonMatchedWordList);
236 
237  oStr << nbOfMatches << " (geographical) location(s) have been found "
238  << "matching your query (`" << iTravelQuery << "'). "
239  << lNonMatchedWordList.size() << " words were left unmatched."
240  << std::endl;
241 
242  if (nbOfMatches != 0) {
243  OPENTREP::NbOfMatches_T idx = 1;
244  for (OPENTREP::LocationList_T::const_iterator itLocation =
245  lLocationList.begin();
246  itLocation != lLocationList.end(); ++itLocation, ++idx) {
247  const OPENTREP::Location& lLocation = *itLocation;
248  oStr << " [" << idx << "]: " << lLocation << std::endl;
249  }
250  }
251 
252  if (lNonMatchedWordList.empty() == false) {
253  oStr << "List of unmatched words:" << std::endl;
254 
255  OPENTREP::NbOfMatches_T idx = 1;
256  for (OPENTREP::WordList_T::const_iterator itWord =
257  lNonMatchedWordList.begin();
258  itWord != lNonMatchedWordList.end(); ++itWord, ++idx) {
259  const OPENTREP::Word_T& lWord = *itWord;
260  oStr << " [" << idx << "]: " << lWord << std::endl;
261  }
262  }
263 
264  return oStr.str();
265 }
266 
267 // /////////////// M A I N /////////////////
268 int main (int argc, char* argv[]) {
269 
270  /*
271  const OPENTREP::NbOfLetters_T lScaleArray[5] = {3, 6, 9, 14, 19};
272 
273  const OPENTREP::DistanceErrorScaleArray_T lScaleBoostArray =
274  { {3, 6, 9, 14, 19} };
275 
276  OPENTREP::DistanceErrorRule lScale (5, lScaleArray);
277  OPENTREP::DistanceErrorRule lScaleBoost (lScaleBoostArray);
278 
279  std::cout << "Standard array: " << lScale << std::endl;
280  std::cout << "Boost array: " << lScaleBoost << std::endl;
281 
282  for (int idx = 0; idx != 20; ++idx) {
283  std::cout << "For " << idx << " letters => "
284  << lScale.getAllowedDistanceError(idx) << std::endl;
285  }
286 
287  return 0;
288  */
289 
290  // Travel query
291  OPENTREP::TravelQuery_T lTravelQuery;
292 
293  // Output log File
294  std::string lLogFilename;
295 
296  // Xapian database name (directory of the index)
297  std::string lXapianDBNameStr;
298 
299  // SQLite3 database file-path
300  std::string lSQLiteDBFilePathStr;
301 
302  // Type of search
303  unsigned short lSearchType;
304 
305  // Xapian spelling error distance
306  unsigned short lSpellingErrorDistance;
307 
308  // Call the command-line option parser
309  const int lOptionParserStatus =
310  readConfiguration (argc, argv, lSpellingErrorDistance, lTravelQuery,
311  lXapianDBNameStr, lSQLiteDBFilePathStr,
312  lLogFilename, lSearchType);
313 
314  if (lOptionParserStatus == K_OPENTREP_EARLY_RETURN_STATUS) {
315  return 0;
316  }
317 
318  // Set the log parameters
319  std::ofstream logOutputFile;
320  // open and clean the log outputfile
321  logOutputFile.open (lLogFilename.c_str());
322  logOutputFile.clear();
323 
324  if (lSearchType == 0) {
325  // Initialise the context
326  const OPENTREP::TravelDBFilePath_T lXapianDBName (lXapianDBNameStr);
327  const OPENTREP::SQLiteDBFilePath_T lSQLiteDBFilePath (lSQLiteDBFilePathStr);
328  OPENTREP::OPENTREP_Service opentrepService(logOutputFile,
329  lXapianDBName, lSQLiteDBFilePath);
330 
331  // Parse the query and retrieve the places from Xapian only
332  const std::string& lOutput = parseQuery (opentrepService, lTravelQuery);
333  std::cout << lOutput;
334 
335  } else {
336  std::cout << "Finding the airports closest to: " << lTravelQuery
337  << std::endl;
338  }
339 
340  // Close the Log outputFile
341  logOutputFile.close();
342 
343  return 0;
344 }
std::vector< std::string > WordList_T
std::string createStringFromWordList(const WordList_T &iWordList)
unsigned short NbOfMatches_T
int readConfiguration(int argc, char *argv[], unsigned short &ioSpellingErrorDistance, std::string &ioQueryString, std::string &ioXapianDBFilepath, std::string &ioSQLiteDBFilepath, std::string &ioLogFilename, unsigned short &ioSearchType)
Structure modelling a (geographical) location.
Definition: Location.hpp:24
std::string Word_T
const std::string DEFAULT_OPENTREP_SQLITE_DB_FILEPATH
int main(int argc, char *argv[])
Interface for the OPENTREP Services.
void tokeniseStringIntoWordList(const std::string &iPhrase, WordList_T &ioWordList)
std::vector< std::string > WordList_T
std::list< Word_T > WordList_T
const std::string K_OPENTREP_DEFAULT_LOG_FILENAME("opentrep-searcher.log")
const unsigned short K_OPENTREP_DEFAULT_SEARCH_TYPE
std::string parseQuery(OPENTREP::OPENTREP_Service &ioOpentrepService, const OPENTREP::TravelQuery_T &iTravelQuery)
std::list< Location > LocationList_T
const std::string K_OPENTREP_DEFAULT_QUERY_STRING("sna francicso rio de janero lso angles reykyavki")
const int K_OPENTREP_EARLY_RETURN_STATUS
NbOfMatches_T interpretTravelRequest(const std::string &iTravelQuery, LocationList_T &, WordList_T &)
const std::string DEFAULT_OPENTREP_XAPIAN_DB_FILEPATH
std::string TravelQuery_T
std::ostream & operator<<(std::ostream &os, const std::vector< T > &v)
const unsigned short K_OPENTREP_DEFAULT_SPELLING_ERROR_DISTANCE