OpenTREP Logo  0.6.0
C++ Open Travel Request Parsing Library
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
XapianIndexManager.cpp
Go to the documentation of this file.
1 // //////////////////////////////////////////////////////////////////////
2 // Import section
3 // //////////////////////////////////////////////////////////////////////
4 // STL
5 #include <cassert>
6 #include <sstream>
7 #include <string>
8 #include <vector>
9 #include <exception>
10 // Boost
11 #include <boost/filesystem.hpp>
12 #include <boost/random/random_device.hpp>
13 #include <boost/random/uniform_int_distribution.hpp>
14 // Xapian
15 #include <xapian.h>
16 // OpenTrep
17 #include <opentrep/Location.hpp>
18 #include <opentrep/bom/Result.hpp>
21 
22 namespace OPENTREP {
23 
24  // //////////////////////////////////////////////////////////////////////
25  boost::filesystem::path
26  checkTravelDBFilePath (const TravelDBFilePath_T& iTravelDBFilePath) {
27  boost::filesystem::path oTravelDBFilePath (iTravelDBFilePath.begin(),
28  iTravelDBFilePath.end());
29  if (!(boost::filesystem::exists (oTravelDBFilePath)
30  && boost::filesystem::is_directory (oTravelDBFilePath))) {
31  std::ostringstream oStr;
32  oStr << "The file-path to the Xapian database/index ('"
33  << iTravelDBFilePath << "') does not exist or is not a directory.";
34  OPENTREP_LOG_ERROR (oStr.str());
35  throw FileNotFoundException (oStr.str());
36  }
37 
38  return oTravelDBFilePath;
39  }
40 
41  // //////////////////////////////////////////////////////////////////////
42  NbOfDBEntries_T XapianIndexManager::
43  getSize (const TravelDBFilePath_T& iTravelDBFilePath) {
44  NbOfDBEntries_T oNbOfDBEntries = 0;
45 
46  // Check whether the file-path to the Xapian database/index exists
47  // and is a directory.
48  checkTravelDBFilePath (iTravelDBFilePath);
49 
50  // Open the Xapian database
51  Xapian::Database lXapianDatabase (iTravelDBFilePath);
52 
53  // Retrieve the actual number of documents indexed by the Xapian database
54  const Xapian::doccount& lDocCount = lXapianDatabase.get_doccount();
55 
56  //
57  oNbOfDBEntries = static_cast<const NbOfDBEntries_T> (lDocCount);
58 
59  return oNbOfDBEntries;
60  }
61 
62  // //////////////////////////////////////////////////////////////////////
63  NbOfMatches_T XapianIndexManager::
64  drawRandomLocations (const TravelDBFilePath_T& iTravelDBFilePath,
65  const NbOfMatches_T& iNbOfDraws,
66  LocationList_T& ioLocationList) {
67  NbOfMatches_T oNbOfMatches = 0;
68 
69  // Check whether the file-path to the Xapian database/index exists
70  // and is a directory.
71  checkTravelDBFilePath (iTravelDBFilePath);
72 
73  // Open the Xapian database
74  Xapian::Database lXapianDatabase (iTravelDBFilePath);
75 
76  // Retrieve the number of documents indexed by the database
77  const NbOfDBEntries_T& lTotalNbOfDocs = getSize (iTravelDBFilePath);
78 
79  // No need to go further when the Xapian database (index) is empty
80  if (lTotalNbOfDocs == 0) {
81  //
82  OPENTREP_LOG_NOTIFICATION ("The Xapian database is empty");
83  return oNbOfMatches;
84  }
85 
86  // random_device is used as a source of entropy, since the generated
87  // locations are expected not to be reproducible.
88  boost::random::random_device lRandomDevice;
89  boost::random::uniform_int_distribution<> uniformDistrib (1, lTotalNbOfDocs);
90 
91  // Randomly generate document IDs. If they the corresponding documents
92  // do not exist in the Xapian database, generate another one.
93  for (NbOfMatches_T idx = 1; idx <= iNbOfDraws; ++idx) {
94  unsigned int lRandomNbInt = uniformDistrib (lRandomDevice);
95  Xapian::docid lDocID = static_cast<Xapian::docid> (lRandomNbInt);
96 
97  // Retrieve the document from the Xapian database/index
98  Xapian::Document::Internal* lDocPtr =
99  lXapianDatabase.get_document_lazily (lDocID);
100 
101  unsigned short currentNbOfIterations = 0;
102  while (lDocPtr == NULL && currentNbOfIterations <= 100) {
103  // DEBUG
104  OPENTREP_LOG_DEBUG ("[" << idx << "] The " << lDocID
105  << " document ID does not exist in the Xapian "
106  << "database. Another ID will be generated.");
107 
108  // Re-draw another random document ID
109  lRandomNbInt = uniformDistrib (lRandomDevice);
110  lDocID = static_cast<Xapian::docid> (lRandomNbInt);
111 
112  // Retrieve the document from the Xapian database/index
113  lDocPtr = lXapianDatabase.get_document_lazily (lDocID);
114  }
115 
116  // Bad luck: no document ID can be generated so that it corresponds to
117  // an actual document in the Xapian database/index
118  if (lDocPtr == NULL) {
119  //
120  OPENTREP_LOG_NOTIFICATION ("[" << idx << "] No document ID can be "
121  << "generated so that it corresponds to "
122  << "a document in the Xapian database.");
123 
124  } else {
125  // Retrieve the actual document.
126  const Xapian::Document lDoc (lDocPtr);
127  const std::string& lDocDataStr = lDoc.get_data();
128  const RawDataString_T& lDocData = RawDataString_T (lDocDataStr);
129 
130  // Parse the POR details and create the corresponding Location structure
131  const Location& lLocation = Result::retrieveLocation (lDocData);
132 
133  // Add the Location structure to the dedicated list
134  ioLocationList.push_back (lLocation);
135  }
136  }
137 
138  // Consistency check
139  oNbOfMatches = ioLocationList.size();
140  if (oNbOfMatches != iNbOfDraws) {
141  //
142  OPENTREP_LOG_NOTIFICATION (iNbOfDraws << " random draws were expected, "
143  << "but " << oNbOfMatches
144  << " have been generated.");
145  }
146 
147  //
148  return oNbOfMatches;
149  }
150 
151 }
#define OPENTREP_LOG_ERROR(iToBeLogged)
Definition: Logger.hpp:23
#define OPENTREP_LOG_DEBUG(iToBeLogged)
Definition: Logger.hpp:32
#define OPENTREP_LOG_NOTIFICATION(iToBeLogged)
Definition: Logger.hpp:26
unsigned short NbOfMatches_T
static Location retrieveLocation(const Xapian::Document &)
Definition: Result.cpp:266
unsigned int NbOfDBEntries_T
std::list< Location > LocationList_T
boost::filesystem::path checkTravelDBFilePath(const TravelDBFilePath_T &iTravelDBFilePath)