00001 /* Copyright (C) 2010 webis.de 00002 * All rights reserved. 00003 */ 00004 #ifndef AITOOLS_INVERTEDINDEX_NGRAM_INDEXING_HPP 00005 #define AITOOLS_INVERTEDINDEX_NGRAM_INDEXING_HPP 00006 00007 #include "Triple.hpp" 00008 #include "NGramFileReader.hpp" 00009 #include <boost/filesystem.hpp> 00010 #include <unistd.h> 00011 #include <stdio.h> 00012 00013 namespace bfs = boost::filesystem; 00014 00015 namespace aitools { 00016 namespace invertedindex { 00017 00018 typedef Triple<int64_t, int32_t, int8_t> value_type; 00019 00020 void 00021 build_ngram_index(int argc, char* argv[]) 00022 { 00023 int c; 00024 Configuration config; 00025 config.set_available_memory(1000); 00026 RecordReader<value_type>* reader(NULL); 00027 while ((c = getopt(argc, argv, "f:s:i:o:")) != -1) 00028 { 00029 switch (c) 00030 { 00031 case 'f': 00032 if (std::strcmp(optarg, "ngram") == 0) 00033 { 00034 reader = new NGramFileReader; 00035 config.set_input_format(Configuration::PSEUDO_INVERTED); 00036 } 00037 else if (std::strcmp(optarg, "realif") == 0) 00038 { 00039 reader = new InvertedFileReader<value_type>; 00040 config.set_input_format(Configuration::REAL_INVERTED); 00041 } 00042 break; 00043 case 's': 00044 config.set_postlist_sorting(std::strcmp(optarg, "true") == 0 ? 00045 Configuration::DESCENDING : Configuration::DISABLED); 00046 break; 00047 case 'i': 00048 config.set_input_directory(optarg); 00049 break; 00050 case 'o': 00051 config.set_index_directory(optarg); 00052 break; 00053 default: 00054 break; 00055 } 00056 } 00057 00058 try 00059 { 00060 IndexBuilder<value_type> builder(reader); 00061 builder.build(config); 00062 } 00063 catch (const std::exception& error) 00064 { 00065 Logging::error(std::string(error.what())); 00066 } 00067 } 00068 00069 } // namespace invertedindex 00070 } // namespace aitools 00071 00072 #endif // AITOOLS_INVERTEDINDEX_NGRAM_INDEXING_HPP