00001 #include "MPHashFunction.hpp"
00002 #include "Exception.hpp"
00003 #include "System.hpp"
00004 #include <vector>
00005
00006 namespace aitools {
00007 namespace invertedindex {
00008
00009 MPHashFunction::MPHashFunction()
00010 : mphf_(NULL)
00011 {}
00012
00013 MPHashFunction::~MPHashFunction()
00014 {
00015 clear_();
00016 }
00017
00018 void
00019 MPHashFunction::load(const bfs::path& path) throw (std::invalid_argument)
00020 {
00021 FILE* file(System::fopen(path, "rb"));
00022 clear_();
00023 mphf_ = cmph_load(file);
00024 System::fclose(file);
00025 }
00026
00027 void
00028 MPHashFunction::create(const bfs::path& vocabulary, Algorithm algorithm)
00029 throw (std::invalid_argument, std::runtime_error)
00030 {
00031 FILE* file(System::fopen(vocabulary, "r"));
00032 cmph_io_adapter_t* source(cmph_io_nlfile_adapter(file));
00033 try
00034 {
00035 create_(algorithm, source);
00036 }
00037 catch (const std::runtime_error&)
00038 {
00039 cmph_io_nlfile_adapter_destroy(source);
00040 System::fclose(file);
00041 throw;
00042 }
00043 }
00044
00045 void
00046 MPHashFunction::create(const Vocabulary& vocabulary, Algorithm algorithm)
00047 throw (std::runtime_error)
00048 {
00049 std::vector<std::string> words;
00050 vocabulary.get_words(words);
00051
00052 size_t size(words.size());
00053 char** word_array(new char*[size]);
00054 for (unsigned i(0); i != size; ++i)
00055 {
00056 word_array[i] = (char*)words[i].c_str();
00057 }
00058 cmph_io_adapter_t* source(cmph_io_vector_adapter(word_array, size));
00059 try
00060 {
00061 create_(algorithm, source);
00062 }
00063 catch (const std::runtime_error&)
00064 {
00065 cmph_io_vector_adapter_destroy(source);
00066 delete[] word_array;
00067 throw;
00068 }
00069 }
00070
00071 uint32_t
00072 MPHashFunction::hash(const std::string& key) const
00073 {
00074 return mphf_ == NULL ? 0 : cmph_search(mphf_, key.c_str(), key.size());
00075 }
00076
00077 void
00078 MPHashFunction::save(const bfs::path& path) throw (std::invalid_argument)
00079 {
00080 if (mphf_ == NULL) return;
00081 FILE* file(System::fopen(path, "wb"));
00082 cmph_dump(mphf_, file);
00083 System::fclose(file);
00084 }
00085
00086 uint32_t
00087 MPHashFunction::size() const
00088 {
00089 return mphf_ == NULL ? 0 : cmph_size(mphf_);
00090 }
00091
00092 void
00093 MPHashFunction::clear_()
00094 {
00095 if (mphf_ == NULL) return;
00096 cmph_destroy(mphf_);
00097 mphf_ = NULL;
00098 }
00099
00100 void
00101 MPHashFunction::create_(Algorithm algorithm, cmph_io_adapter_t* source)
00102 throw (std::runtime_error)
00103 {
00104 assert(source != NULL);
00105 cmph_config_t* config(cmph_config_new(source));
00106 assert(config != NULL);
00107 switch (algorithm)
00108 {
00109 case BMZ:
00110 cmph_config_set_algo(config, CMPH_BMZ);
00111 break;
00112 case CHM:
00113 cmph_config_set_algo(config, CMPH_CHM);
00114 break;
00115 case BRZ:
00116 cmph_config_set_algo(config, CMPH_BRZ);
00117 break;
00118 case FCH:
00119 cmph_config_set_algo(config, CMPH_FCH);
00120 break;
00121 case BDZ:
00122 cmph_config_set_algo(config, CMPH_BDZ);
00123 break;
00124 case CHD:
00125 cmph_config_set_algo(config, CMPH_CHD);
00126 break;
00127 }
00128 clear_();
00129 mphf_ = cmph_new(config);
00130 cmph_config_destroy(config);
00131 if (mphf_ == NULL)
00132 {
00133 Exception::throw_runtime_error("Cannot create mphf");
00134 }
00135 }
00136
00137 }
00138 }