00001
00002
00003 #ifndef AITOOLS_BIGHASHMAP_CORE_BIG_HASH_MAP_HPP
00004 #define AITOOLS_BIGHASHMAP_CORE_BIG_HASH_MAP_HPP
00005
00006 #include <boost/algorithm/string.hpp>
00007
00008 #include "core/raw_big_hash_map.hpp"
00009 #include "util/memory.hpp"
00010 #include "value/pair_traits.hpp"
00011 #include "value/triple_traits.hpp"
00012 #include "value/quadruple_traits.hpp"
00013
00014 namespace aitools {
00015 namespace bighashmap {
00016 namespace core {
00017
00024 template <typename T>
00025 class big_hash_map : public raw_big_hash_map {
00026
00027 public:
00028
00029 typedef cmph_map<T> map_type;
00030 typedef typename map_type::key_type key_type;
00031 typedef typename map_type::value_type value_type;
00032 typedef typename map_type::checksum_type checksum_type;
00033
00034 public:
00035
00036 static const std::string index_file_name;
00037
00038 private:
00039
00040 big_hash_map()
00041 : raw_big_hash_map() {}
00042
00043 big_hash_map(const std::vector<map_type*>& sub_maps)
00044 : raw_big_hash_map(),
00045 maps_(sub_maps),
00046 size_(0)
00047 {
00048 for (auto it(maps_.begin()); it != maps_.end(); ++it)
00049 {
00050 size_ += (*it)->size();
00051 }
00052 }
00053
00054 static uint64_t
00055 table_size(const bfs::path& dir)
00056 {
00057 uint64_t size(0);
00058 if (bfs::exists(dir))
00059 {
00060 const bfs::directory_iterator end;
00061 for (bfs::directory_iterator it(dir); it != end; ++it)
00062 {
00063 if (!bfs::is_regular_file(it->path())) continue;
00064 if (boost::ends_with(it->path().string(), "dat") ||
00065 boost::ends_with(it->path().string(), "mph"))
00066 {
00067 size += bfs::file_size(it->path());
00068 }
00069 }
00070 }
00071 return size;
00072 }
00073
00074 public:
00075
00076 ~big_hash_map()
00077 {
00078 for (auto it(maps_.begin()); it != maps_.end(); ++it)
00079 {
00080 delete *it;
00081 }
00082 }
00083
00084 public:
00085
00086 static const bfs::path
00087 build(const bfs::path& input_dir, const bfs::path& output_dir,
00088 algorithm algo = algorithm::bdz)
00089 {
00090 return builder<value_type>::build(input_dir, output_dir, algo);
00091 }
00092
00093 static big_hash_map*
00094 open(const bfs::path& idx_file, util::memory_type mem)
00095 {
00096 bfs::ifstream ifs(idx_file);
00097 if (!ifs)
00098 {
00099 util::throw_runtime_error("Cannot open", idx_file);
00100 }
00101 std::string sub_idx_file;
00102 std::vector<map_type*> sub_maps;
00103 const uint64_t payload(table_size(idx_file.parent_path()));
00104 if (payload > util::to_bytes(mem))
00105 {
00106 while (std::getline(ifs, sub_idx_file))
00107 {
00108 sub_maps.push_back(external_map<T>::open(
00109 idx_file.parent_path() / sub_idx_file));
00110 }
00111 }
00112 else
00113 {
00114 while (std::getline(ifs, sub_idx_file))
00115 {
00116 sub_maps.push_back(internal_map<T>::open(
00117 idx_file.parent_path() / sub_idx_file));
00118 }
00119 }
00120 ifs.close();
00121 return new big_hash_map(sub_maps);
00122 }
00123
00124 public:
00125
00126 bool
00127 find(const key_type& key, value_type& value)
00128 {
00129 return maps_.empty() ? false
00130 : maps_[util::hash32(key) % maps_.size()]->find(key, value);
00131 }
00132
00133 bool
00134 find(const std::string& key, char* value_buffer)
00135 {
00136 value_type value;
00137 if (maps_[util::hash32(key) % maps_.size()]->find(key, value))
00138 {
00139 value::value_traits<value_type>::copy_to(value, value_buffer);
00140 return true;
00141 }
00142 return false;
00143 }
00144
00145 uint64_t
00146 size() const
00147 {
00148 return size_;
00149 }
00150
00151 private:
00152
00153 std::vector<map_type*> maps_;
00154 uint64_t size_;
00155
00156 };
00157
00158 template <typename T>
00159 const std::string
00160 big_hash_map<T>::index_file_name(builder<T>::index_file_name);
00161
00162 }
00163 }
00164
00165 using bighashmap::core::big_hash_map;
00166
00167 }
00168
00169 #endif // AITOOLS_BIGHASHMAP_CORE_BIG_HASH_MAP_HPP