00001 #include "StorageBuilder.hpp"
00002 #include "MPHashFunction.hpp"
00003 #include "Converter.hpp"
00004 #include "Logging.hpp"
00005 #include "System.hpp"
00006
00007 namespace aitools {
00008 namespace invertedindex {
00009
00010 const std::string
00011 StorageBuilder::data_file("data.");
00012
00013 const std::string
00014 StorageBuilder::mphf_file("mphf");
00015
00016 const std::string
00017 StorageBuilder::vocab_file("vocab");
00018
00019 const std::string
00020 StorageBuilder::storage_file("storage");
00021
00022 const std::string
00023 StorageBuilder::quantile_file("quantile");
00024
00025 StorageBuilder::StorageBuilder()
00026 {}
00027
00028 StorageBuilder::~StorageBuilder()
00029 {
00030 close();
00031 }
00032
00033 bool
00034 StorageBuilder::is_open() const
00035 {
00036 return !directory_.empty();
00037 }
00038
00039 const bfs::path&
00040 StorageBuilder::directory() const
00041 {
00042 return directory_;
00043 }
00044
00045 void
00046 StorageBuilder::close() throw (std::invalid_argument)
00047 {
00048 if (!is_open()) return;
00049
00050
00051 MPHashFunction mphf;
00052 mphf.create(vocabulary_);
00053 mphf.save(directory_ / mphf_file);
00054 vocabulary_.save(directory_ / vocab_file);
00055 writers_.back()->close();
00056
00057
00058 std::vector<Location> locations(mphf.size());
00059 LocationMap::const_iterator lend(locations_.end());
00060 for (LocationMap::const_iterator it(locations_.begin()); it != lend; ++it)
00061 {
00062 locations[mphf.hash(it->first)] = it->second;
00063 }
00064 FILE* storage(System::fopen(directory_ / storage_file, "wb"));
00065 System::fwrite(locations.data(), sizeof(Location), mphf.size(), storage);
00066 std::fclose(storage);
00067
00068
00069 if (!quantiles_.empty())
00070 {
00071 std::vector<Quantile> quantiles(mphf.size());
00072 QuantileMap::const_iterator qend(quantiles_.end());
00073 QuantileMap::const_iterator qit(quantiles_.begin());
00074 for (; qit != qend; ++qit)
00075 {
00076 quantiles[mphf.hash(qit->first)] = qit->second;
00077 }
00078 FILE* quantile(System::fopen(directory_ / quantile_file, "wb"));
00079 System::fwrite(quantiles.data(), Quantile::size, mphf.size(), quantile);
00080 std::fclose(quantile);
00081 }
00082 directory_ = bfs::path();
00083 }
00084
00085 void
00086 StorageBuilder::open(const bfs::path& directory) throw (std::invalid_argument)
00087 {
00088 if (is_open()) return;
00089 if (!bfs::exists(directory))
00090 {
00091 Exception::throw_invalid_argument("Does not exist", directory);
00092 }
00093 directory_ = directory;
00094 }
00095
00096 void
00097 StorageBuilder::put(const std::string& key, Iterator::SharedPointer iterator)
00098 throw (std::runtime_error)
00099 {
00100 if (!is_open())
00101 {
00102 Exception::throw_runtime_error("StorageBuilder is not open");
00103 }
00104 if (vocabulary_.contains(key))
00105 {
00106 Logging::log("Error: This key is not unique: " + key);
00107 Logging::log("Start indexing again with InputFormat = PSEUDO_INVERTED");
00108 Exception::throw_runtime_error("Invalid real inverted file");
00109 }
00110 if (writers_.empty())
00111 {
00112 writers_.push_back(Writer(new PostlistWriter));
00113 writers_.back()->open(directory_ / (data_file + "0"));
00114 }
00115 else if (writers_.back()->tell() + iterator->header().payload > max_file_size)
00116 {
00117 writers_.back()->close();
00118 std::string id(Converter::ui32_to_str(writers_.size()));
00119 writers_.push_back(Writer(new PostlistWriter));
00120 writers_.back()->open(directory_ / (data_file + id));
00121 }
00122 Location location(writers_.size()-1, writers_.back()->tell());
00123 locations_.insert(std::make_pair(key, location));
00124 vocabulary_.insert(key, iterator->header().value_count);
00125 writers_.back()->write(iterator);
00126 }
00127
00128 void
00129 StorageBuilder::put(const std::string& key, const Quantile& quantile)
00130 throw (std::runtime_error)
00131 {
00132 if (!is_open())
00133 {
00134 Exception::throw_runtime_error("StorageBuilder is not open");
00135 }
00136 if (!vocabulary_.contains(key))
00137 {
00138 Exception::throw_runtime_error("Unknown key for quantile");
00139 }
00140 quantiles_.insert(std::make_pair(key, quantile));
00141 }
00142
00143 const Vocabulary&
00144 StorageBuilder::vocabulary() const
00145 {
00146 return vocabulary_;
00147 }
00148
00149 }
00150 }