00001 #include "Iterator.hpp"
00002 #include "System.hpp"
00003
00004 namespace aitools {
00005 namespace invertedindex {
00006
00007 Iterator::Iterator()
00008 : vsizes_(new SizeVector),
00009 index_(0)
00010 {}
00011
00012 Iterator::Iterator(const Header& header, FILE* page)
00013 : header_(header),
00014 vsizes_(new SizeVector),
00015 page_(page),
00016 index_(0)
00017 {
00018 if (header.value_size == 0)
00019 {
00020 assert(vsizes_->size() == length());
00021 }
00022 swap();
00023 }
00024
00025 Iterator::Iterator(const Header& header, ByteBuffer* buffer)
00026 : header_(header),
00027 vsizes_(new SizeVector),
00028 chunk_(buffer),
00029 index_(0)
00030 {
00031 if (header.value_size == 0)
00032 {
00033 assert(vsizes_->size() == length());
00034 }
00035 chunk_.end = header_.value_count;
00036 }
00037
00038 Iterator::Iterator(const Header& header, SizeVector* vsizes, FILE* page)
00039 : header_(header),
00040 vsizes_(vsizes),
00041 page_(page),
00042 index_(0)
00043 {
00044 assert(vsizes_ != NULL);
00045 if (header.value_size == 0)
00046 {
00047 assert(vsizes_->size() == length());
00048 }
00049 swap();
00050 }
00051
00052 Iterator::Iterator(const Header& header, SizeVector* vsizes, ByteBuffer* buffer)
00053 : header_(header),
00054 vsizes_(vsizes),
00055 chunk_(buffer),
00056 index_(0)
00057 {
00058 assert(vsizes_ != NULL);
00059 if (header.value_size == 0)
00060 {
00061 assert(vsizes_->size() == length());
00062 }
00063 chunk_.end = header_.value_count;
00064 }
00065
00066 Iterator::~Iterator()
00067 {
00068 delete vsizes_;
00069 }
00070
00071 void
00072 Iterator::advance()
00073 {
00074 chunk_.offset += size();
00075 if (++index_ == chunk_.end)
00076 {
00077 swap();
00078 }
00079 }
00080
00081 uint16_t
00082 Iterator::checksum() const
00083 {
00084 return header_.checksum;
00085 }
00086
00087 const Iterator::Chunk&
00088 Iterator::chunk() const
00089 {
00090 return chunk_;
00091 }
00092
00093 const Iterator::Header&
00094 Iterator::header() const
00095 {
00096 return header_;
00097 }
00098
00099 const Iterator::SizeVector&
00100 Iterator::value_sizes() const
00101 {
00102 return *vsizes_;
00103 }
00104
00105 size_t
00106 Iterator::length() const
00107 {
00108 return header_.value_count;
00109 }
00110
00111 size_t
00112 Iterator::payload() const
00113 {
00114 return header_.payload;
00115 }
00116
00117 void
00118 Iterator::rewind()
00119 {
00120 index_ = 0;
00121 chunk_.offset = 0;
00122 if (is_paged())
00123 {
00124 System::fseek(page_.file, page_.offset, SEEK_SET);
00125 chunk_.begin = 0;
00126 chunk_.end = 0;
00127 swap();
00128 }
00129 }
00130
00131 bool
00132 Iterator::swap() throw (std::runtime_error)
00133 {
00134 size_t remaining_value_count(header_.value_count - chunk_.end);
00135 if (remaining_value_count == 0) return false;
00136
00137 size_t chunk_size(0);
00138 size_t chunk_value_count(0);
00139 if (header_.value_size)
00140 {
00141 chunk_value_count = max_chunk_size / header_.value_size;
00142 if (chunk_value_count > remaining_value_count)
00143 {
00144 chunk_value_count = remaining_value_count;
00145 }
00146 chunk_size = chunk_value_count * header_.value_size;
00147 }
00148 else
00149 {
00150 for (unsigned i = chunk_.end; i != header_.value_count; ++i)
00151 {
00152 if (chunk_size > max_chunk_size) break;
00153 chunk_size += (*vsizes_)[i];
00154 ++chunk_value_count;
00155 }
00156 }
00157 index_ = chunk_.end;
00158 chunk_.offset = 0;
00159 chunk_.begin = chunk_.end;
00160 chunk_.end += chunk_value_count;
00161 chunk_.buffer->resize(chunk_size);
00162 System::fread(chunk_.buffer->data(), 1, chunk_size, page_.file);
00163 return true;
00164 }
00165
00166 }
00167 }