00001
00002
00003
00004 #ifndef AITOOLS_INVERTEDINDEX_INTERNAL_INTRO_SORT_HPP
00005 #define AITOOLS_INVERTEDINDEX_INTERNAL_INTRO_SORT_HPP
00006
00007 #include "IndexMessages.pb.h"
00008 #include "PostlistBuilder.hpp"
00009 #include "Postlist.hpp"
00010 #include "Quantile.hpp"
00011 #include <functional>
00012 #include <algorithm>
00013 #include <vector>
00014
00015 namespace aitools {
00016 namespace invertedindex {
00017
00027 class InternalIntroSort {
00028
00029 private:
00030
00031 typedef Configuration::Sorting Sorting;
00032
00033 private:
00034
00035 InternalIntroSort();
00036
00037 public:
00038
00039 template<typename Value>
00040 static void
00041 sort(Postlist<Value>& postlist, Quantile& quantile, Sorting mode);
00042
00043 template<typename Value>
00044 static void
00045 sort(std::vector<Value>& values, Quantile& quantile, Sorting mode);
00046
00047 private:
00048
00049 template<typename Value>
00050 static void
00051 comp_quantile(const std::vector<Value>& values, Quantile& quantile);
00052
00053 };
00054
00055
00056
00057 template<typename Value>
00058 void InternalIntroSort::sort
00059 (Postlist<Value>& postlist, Quantile& quantile, Sorting mode)
00060 {
00061 if (mode == Configuration::DISABLED) return;
00062
00063
00064 Value value;
00065 std::vector<Value> values;
00066 values.reserve(postlist.length());
00067 while (postlist.next(value))
00068 {
00069 values.push_back(value);
00070 }
00071
00072 sort(values, quantile, mode);
00073
00074 ByteBuffer buffer;
00075 PostlistBuilder builder;
00076 builder.set_checksum(postlist.iterator()->header().checksum);
00077 typename std::vector<Value>::const_iterator end(values.end());
00078 typename std::vector<Value>::const_iterator it(values.begin());
00079 for (; it != end; ++it)
00080 {
00081 it->to_bytes(buffer);
00082 builder.append(buffer);
00083 }
00084 postlist = builder.build();
00085 }
00086
00087 template<typename Value>
00088 void InternalIntroSort::sort
00089 (std::vector<Value>& values, Quantile& quantile, Sorting mode)
00090 {
00091 if (mode == Configuration::ASCENDING)
00092 {
00093 std::sort(values.begin(), values.end(), std::less<Value>());
00094 }
00095 else if (mode == Configuration::DESCENDING)
00096 {
00097 std::sort(values.begin(), values.end(), std::greater<Value>());
00098 }
00099 else
00100 {
00101 return;
00102 }
00103 comp_quantile(values, quantile);
00104 }
00105
00106 template<typename Value>
00107 void InternalIntroSort::comp_quantile
00108 (const std::vector<Value>& values, Quantile& quantile)
00109 {
00110
00111 double score(0);
00112 double integrals[Quantile::COUNT] = {0};
00113 typename std::vector<Value>::const_iterator end(values.end());
00114 typename std::vector<Value>::const_iterator it(values.begin());
00115 for (; it != end; ++it)
00116 {
00117 score += it->score();
00118 }
00119 integrals[Quantile::P100] = score;
00120
00121 for (unsigned q(Quantile::P10); q != Quantile::P91; ++q)
00122 {
00123 integrals[q] = (q+1) * 0.1 * score;
00124 }
00125
00126 for (unsigned q(Quantile::P91); q != Quantile::P100; ++q)
00127 {
00128 integrals[q] = (0.9 + (q-Quantile::P91+1) * (0.01)) * score;
00129 }
00130
00131 quantile.clear();
00132 double integral(0);
00133 Quantile::Order order(Quantile::P10);
00134 for (unsigned i(0); i != values.size(); ++i)
00135 {
00136 integral += values[i].score();
00137 if (integral >= integrals[order])
00138 {
00139 quantile.at(order) = i+1;
00140 if (order < Quantile::P100)
00141 {
00142 order = Quantile::Order(order+1);
00143 }
00144 }
00145 }
00146
00147 for (unsigned q(order); q <= Quantile::P100; ++q)
00148 {
00149 quantile.at((Quantile::Order)q) = values.size();
00150 }
00151 }
00152
00153 }
00154 }
00155
00156 #endif // AITOOLS_INVERTEDINDEX_INTERNAL_INTRO_SORT_HPP