00001 #include "inverted_file_writing.hpp" 00002 #include "invertedindex.hpp" 00003 #include <cstdlib> 00004 #include <cmath> 00005 00006 namespace aitools { 00007 namespace invertedindex { 00008 00009 void 00010 write_inverted_file(const bfs::path& file) 00011 { 00012 /* (1) Instantiate a record with an appropriate entry type. 00013 * The entry parameter will be the value part of the record that will be 00014 * serialized in a human readable format to an inverted file. Decide which 00015 * entry type fits best to encode your information. You can find some 00016 * predefined types (typedefs) in ${project}/src/value/EntryTypes.hpp. 00017 */ 00018 Record<IntFloat> record; 00019 00020 try 00021 { 00022 /* (2) Create an InvertedFileWriter which serializes all your records. 00023 * The usage of this class guarantees the correctness of the text file. 00024 */ 00025 InvertedFileWriter<IntFloat> writer(file); 00026 00027 /* (3) Write 1 million random records to the inverted file. 00028 * According to your data source you can use just one record instance 00029 * by resetting its values in each loop and write its actual content to 00030 * the file. This is the common way if the record's key is constant over 00031 * a sequence of records. 00032 */ 00033 for (unsigned i(0); i != 1000000; ++i) 00034 { 00035 /* Set the record's key to a random number encoded as string. 00036 */ 00037 record.key() = Converter::ui32_to_str(std::rand() % INT16_MAX); 00038 00039 /* Set the record's value to random primitives. 00040 */ 00041 record.value().e1() = std::rand(); 00042 record.value().e2() = std::sqrt(std::rand()); 00043 00044 /* Write the entire record to the inverted file. 00045 */ 00046 writer.write(record); 00047 } 00048 writer.close(); 00049 00050 /* Note: This example constructs a pseudo-inverted file with just one 00051 * record per line. This format can be seen as a minimal pseudo-inverted 00052 * file that is equivalent to a list of key-value pairs. 00053 * 00054 * Nevertheless the keys in this file doesn't have to be unique, this 00055 * is what the word 'pseudo' stands for. The indexing process will 00056 * group all records sharing the same key together in one postlist. 00057 */ 00058 } 00059 catch (const std::exception& error) 00060 { 00061 Logging::error(std::string(error.what())); 00062 } 00063 } 00064 00065 } // namespace invertedindex 00066 } // namespace aitools