00001 #include "PostlistReader.hpp"
00002 #include "System.hpp"
00003
00004 namespace aitools {
00005 namespace invertedindex {
00006
00007 PostlistReader::PostlistReader()
00008 : file_(NULL)
00009 {}
00010
00011 PostlistReader::PostlistReader(const bfs::path& path)
00012 throw (std::invalid_argument)
00013 : file_(NULL)
00014 {
00015 open(path);
00016 }
00017
00018 PostlistReader::~PostlistReader()
00019 {
00020 close();
00021 }
00022
00023 void
00024 PostlistReader::close()
00025 {
00026 if (is_open())
00027 {
00028 path_ = bfs::path();
00029 System::fclose(file_);
00030 file_ = NULL;
00031 }
00032 }
00033
00034 void
00035 PostlistReader::open(const bfs::path& path)
00036 throw (std::runtime_error, std::invalid_argument)
00037 {
00038 if (is_open())
00039 {
00040 Exception::throw_runtime_error("File already open");
00041 }
00042 file_ = System::fopen(path, "rb");
00043 path_ = path;
00044 }
00045
00046 const bfs::path&
00047 PostlistReader::path() const
00048 {
00049 return path_;
00050 }
00051
00052 Iterator::SharedPointer
00053 PostlistReader::read(size_t length) throw (std::runtime_error)
00054 {
00055
00056 Iterator::Header header;
00057 System::fread(&header, sizeof(header), 1, file_);
00058 long end_of_postlist(System::ftell(file_) + header.payload);
00059 const size_t org_value_count(header.value_count);
00060 if (length < header.value_count)
00061 {
00062 header.value_count = length;
00063 header.payload = header.value_count * header.value_size;
00064 }
00065 Iterator::SizeVector* vsizes(new Iterator::SizeVector);
00066 if (header.value_count != 0 && header.value_size == 0)
00067 {
00068 header.payload = 0;
00069 vsizes->resize(header.value_count);
00070 long end_of_header(System::ftell(file_));
00071 end_of_postlist += org_value_count * sizeof_value_size_t;
00072 System::fread(vsizes->data(), sizeof_value_size_t, vsizes->size(), file_);
00073 for (unsigned i = 0; i != header.value_count; ++i)
00074 {
00075 header.payload += (*vsizes)[i];
00076 }
00077 long end_of_vsizes(end_of_header + org_value_count * sizeof_value_size_t);
00078 System::fseek(file_, end_of_vsizes, SEEK_SET);
00079 }
00080
00081
00082 Iterator::SharedPointer iterator;
00083 if (header.payload > Iterator::max_chunk_size)
00084 {
00085 FILE* file(System::fopen(path_, "rb"));
00086 System::fseek(file, System::ftell(file_), SEEK_SET);
00087 iterator.reset(new Iterator(header, vsizes, file));
00088 }
00089 else
00090 {
00091 ByteBuffer* buffer(new ByteBuffer(header.payload));
00092 System::fread(buffer->data(), 1, header.payload, file_);
00093 iterator.reset(new Iterator(header, vsizes, buffer));
00094 }
00095 System::fseek(file_, end_of_postlist, SEEK_SET);
00096 return iterator;
00097 }
00098
00099 void
00100 PostlistReader::rewind()
00101 {
00102 if (is_open()) std::rewind(file_);
00103 }
00104
00105 void
00106 PostlistReader::seek(size_t offset)
00107 {
00108 if (is_open()) System::fseek(file_, offset, SEEK_SET);
00109 }
00110
00111 size_t
00112 PostlistReader::tell()
00113 {
00114 return is_open() ? System::ftell(file_) : 0;
00115 }
00116
00117 }
00118 }