00001 #include "StorageSearcher.hpp"
00002 #include "Converter.hpp"
00003 #include "Logging.hpp"
00004 #include "System.hpp"
00005
00006 namespace aitools {
00007 namespace invertedindex {
00008
00009 const Quantile
00010 StorageSearcher::empty_quantile;
00011
00012 StorageSearcher::StorageSearcher()
00013 {}
00014
00015 StorageSearcher::~StorageSearcher()
00016 {}
00017
00018 void
00019 StorageSearcher::close()
00020 {
00021 readers_.clear();
00022 }
00023
00024 bool
00025 StorageSearcher::is_open() const
00026 {
00027 return !readers_.empty();
00028 }
00029
00030 void
00031 StorageSearcher::open(const bfs::path& directory) throw (std::invalid_argument)
00032 {
00033
00034 hash_func_.load(directory / StorageBuilder::mphf_file);
00035 vocabulary_.load(directory / StorageBuilder::vocab_file);
00036 unsigned table_size(hash_func_.size());
00037
00038
00039 locations_.resize(table_size);
00040 FILE* storage(System::fopen(directory/StorageBuilder::storage_file, "rb"));
00041 System::fread(locations_.data(), sizeof(Location), table_size, storage);
00042 std::fclose(storage);
00043
00044
00045 bfs::path path(directory / StorageBuilder::quantile_file);
00046 if (bfs::exists(path))
00047 {
00048 FILE* quantile(System::fopen(path, "rb"));
00049 quantiles_.resize(table_size);
00050 System::fread(quantiles_.data(), Quantile::size, table_size, quantile);
00051 std::fclose(quantile);
00052 }
00053
00054
00055 std::string id;
00056 unsigned num(std::max_element(locations_.begin(), locations_.end())->first);
00057 for (unsigned i(0); i <= num; ++i)
00058 {
00059 id = Converter::ui32_to_str(i);
00060 readers_.push_back(Reader(new PostlistReader));
00061 readers_.back()->open(directory / (StorageBuilder::data_file + id));
00062 }
00063 }
00064
00065 Iterator::SharedPointer
00066 StorageSearcher::get(const std::string& key) throw (std::runtime_error)
00067 {
00068 return get(key, INT_MAX);
00069 }
00070
00071 Iterator::SharedPointer
00072 StorageSearcher::get(const std::string& key, size_t length)
00073 throw (std::runtime_error)
00074 {
00075 if (!vocabulary_.contains(key))
00076 {
00077 return Iterator::SharedPointer(new Iterator);
00078 }
00079 Location location(locations_[hash_func_.hash(key)]);
00080 boost::mutex::scoped_lock lock(mutex_);
00081 readers_[location.first]->seek(location.second);
00082 return readers_[location.first]->read(length);
00083
00084 }
00085
00086 Iterator::SharedPointer
00087 StorageSearcher::get(const std::string& key, Quantile::Order order)
00088 throw (std::runtime_error)
00089 {
00090 return get(key, quantile(key).get(order));
00091 }
00092
00093 const Quantile&
00094 StorageSearcher::quantile(const std::string& key) const
00095 {
00096 if (!vocabulary_.contains(key) || quantiles_.empty())
00097 {
00098 return empty_quantile;
00099 }
00100 return quantiles_[hash_func_.hash(key)];
00101 }
00102
00103 const Vocabulary&
00104 StorageSearcher::vocabulary() const
00105 {
00106 return vocabulary_;
00107 }
00108
00109 const MPHashFunction&
00110 StorageSearcher::mphf() const
00111 {
00112 return hash_func_;
00113 }
00114
00115 }
00116 }