00001
00002
00003
00004 #ifndef AITOOLS_INVERTEDINDEX_INDEX_SEARCHER_HPP
00005 #define AITOOLS_INVERTEDINDEX_INDEX_SEARCHER_HPP
00006
00007 #include "Logging.hpp"
00008 #include "Searcher.hpp"
00009 #include "Checksum.hpp"
00010 #include "Postlist.hpp"
00011 #include "Converter.hpp"
00012 #include "Exception.hpp"
00013 #include "IndexBuilder.hpp"
00014 #include "StorageSearcher.hpp"
00015 #include <boost/algorithm/string.hpp>
00016 #include <boost/filesystem/fstream.hpp>
00017
00018 namespace aitools {
00019 namespace invertedindex {
00020
00030 template<typename Value>
00031 class IndexSearcher : public Searcher {
00032
00033 public:
00034
00035 static const std::string header_file;
00036
00037 public:
00038
00042 IndexSearcher();
00043
00047 ~IndexSearcher();
00048
00049 private:
00050
00051 static void check_config(const Configuration& config)
00052 throw (std::invalid_argument);
00053
00054 public:
00055
00056 void dashboard(std::ostream& os) const;
00057
00058 void init(const Configuration& config)
00059 throw (std::runtime_error, std::logic_error);
00060
00061 Iterator::SharedPointer
00062 generic_search(const std::string& key)
00063 throw (std::runtime_error);
00064
00065 Iterator::SharedPointer
00066 generic_search(const std::string& key, Quantile::Order order)
00067 throw (std::runtime_error);
00068
00069 Iterator::SharedPointer
00070 generic_search(const std::string& key, size_t length)
00071 throw (std::runtime_error);
00072
00073 Postlist<Value>
00074 search(const std::string& key) throw (std::runtime_error);
00075
00076 Postlist<Value>
00077 search(const std::string& key, Quantile::Order order)
00078 throw (std::runtime_error);
00079
00080 Postlist<Value>
00081 search(const std::string& key, size_t length)
00082 throw (std::runtime_error);
00083
00084 const Quantile& quantile(const std::string& key) const;
00085
00086 const Vocabulary& vocabulary() const;
00087
00088 const MPHashFunction& mphf() const;
00089
00090 const Header& header() const;
00091
00092 private:
00093
00094 StorageSearcher storage_;
00095 Header header_;
00096
00097 };
00098
00099
00100
00101 template<typename Value>
00102 const std::string
00103 IndexSearcher<Value>::header_file(IndexBuilder<Value>::header_file);
00104
00105 template<typename Value>
00106 IndexSearcher<Value>::IndexSearcher()
00107 {}
00108
00109 template<typename Value>
00110 IndexSearcher<Value>::~IndexSearcher()
00111 {}
00112
00113 template<typename Value>
00114 void
00115 IndexSearcher<Value>::check_config(const Configuration& config)
00116 throw (std::invalid_argument)
00117 {
00118 if (!bfs::exists(config.index_directory()))
00119 {
00120 std::string msg("Does not exist");
00121 Exception::throw_invalid_argument(msg, config.index_directory());
00122 }
00123 if (bfs::is_empty(config.index_directory()))
00124 {
00125 std::string msg("Is empty");
00126 Exception::throw_invalid_argument(msg, config.index_directory());
00127 }
00128 }
00129
00130 template<typename Value>
00131 void
00132 IndexSearcher<Value>::dashboard(std::ostream& os) const
00133 {
00134 os << header_.DebugString();
00135 }
00136
00137 template<typename Value>
00138 void
00139 IndexSearcher<Value>::init(const Configuration& config)
00140 throw (std::runtime_error, std::logic_error)
00141 {
00142 check_config(config);
00143
00144
00145 bfs::path file(bfs::path(config.index_directory()) / header_file);
00146 bfs::ifstream stream(file, std::ios::binary);
00147 if (!stream)
00148 {
00149 Exception::throw_invalid_argument("Cannot open", file);
00150 }
00151 else if (!header_.ParseFromIstream(&stream))
00152 {
00153 Exception::throw_runtime_error("Cannot deserialize header");
00154 }
00155 stream.close();
00156
00157
00158 if (header_.major_version() != INDEX_VERSION_MAJOR ||
00159 header_.minor_version() != INDEX_VERSION_MINOR)
00160 {
00161 std::ostringstream oss;
00162 oss << "You need to have aitools-invertedindex-"
00163 << INDEX_VERSION_MAJOR << '.' << INDEX_VERSION_MINOR
00164 << " to be installed.";
00165 Exception::throw_domain_error("Version conflict: " + oss.str());
00166 }
00167
00168
00169 if (header_.value_class_name() != Value::classname)
00170 {
00171 std::string msg("Has to be " + header_.value_class_name());
00172 Exception::throw_domain_error("Wrong value type: " + msg);
00173 }
00174
00175 storage_.open(config.index_directory());
00176 }
00177
00178 template<typename Value>
00179 Iterator::SharedPointer
00180 IndexSearcher<Value>::generic_search(const std::string& key)
00181 throw (std::runtime_error)
00182 {
00183 return generic_search(key, INT_MAX);
00184 }
00185
00186 template<typename Value>
00187 Iterator::SharedPointer
00188 IndexSearcher<Value>::generic_search
00189 (const std::string& key, Quantile::Order order) throw (std::runtime_error)
00190 {
00191 return generic_search(key, quantile(key).get(order));
00192 }
00193
00194 template<typename Value>
00195 Iterator::SharedPointer
00196 IndexSearcher<Value>::generic_search(const std::string& key, size_t length)
00197 throw (std::runtime_error)
00198 {
00199 Iterator::SharedPointer iterator(storage_.get(key, length));
00200 if (iterator->is_valid() &&
00201 iterator->header().checksum != Checksum::hash16(key))
00202 {
00203 std::ostringstream msg;
00204 msg << "key checksum = " << Checksum::hash16(key)
00205 << ", iterator checksum = " << iterator->header().checksum;
00206 Logging::debug("checksum mismatch", msg.str());
00207 iterator.reset(new Iterator);
00208 }
00209 return iterator;
00210 }
00211
00212 template<typename Value>
00213 Postlist<Value>
00214 IndexSearcher<Value>::search(const std::string& key) throw (std::runtime_error)
00215 {
00216 return Postlist<Value>(generic_search(key, INT_MAX));
00217 }
00218
00219 template<typename Value>
00220 Postlist<Value>
00221 IndexSearcher<Value>::search(const std::string& key, Quantile::Order order)
00222 throw (std::runtime_error)
00223 {
00224 return Postlist<Value>(generic_search(key, quantile(key).get(order)));
00225 }
00226
00227 template<typename Value>
00228 Postlist<Value>
00229 IndexSearcher<Value>::search(const std::string& key, size_t length)
00230 throw (std::runtime_error)
00231 {
00232 return Postlist<Value>(generic_search(key, length));
00233 }
00234
00235 template<typename Value>
00236 const Quantile&
00237 IndexSearcher<Value>::quantile(const std::string& key) const
00238 {
00239 return storage_.quantile(key);
00240 }
00241
00242 template<typename Value>
00243 const Vocabulary&
00244 IndexSearcher<Value>::vocabulary() const
00245 {
00246 return storage_.vocabulary();
00247 }
00248
00249 template<typename Value>
00250 const MPHashFunction&
00251 IndexSearcher<Value>::mphf() const
00252 {
00253 return storage_.mphf();
00254 }
00255
00256 template<typename Value>
00257 const Header&
00258 IndexSearcher<Value>::header() const
00259 {
00260 return header_;
00261 }
00262
00263 }
00264 }
00265
00266 #endif // AITOOLS_INVERTEDINDEX_INDEX_SEARCHER_HPP