#include <Vocabulary.hpp>
Public Types | |
typedef std::map< std::string, uint64_t > | tree_map_t |
typedef std::tr1::unordered_map < std::string, uint64_t > | hash_map_t |
typedef hash_map_t::const_iterator | const_iterator |
typedef hash_map_t::iterator | iterator |
Public Member Functions | |
Vocabulary () | |
Vocabulary (const bfs::path &path) throw (std::invalid_argument) | |
Vocabulary (const Vocabulary &vocabulary) | |
~Vocabulary () | |
void | accumulate (const std::string &word, uint64_t frequency=1) |
const_iterator | begin () const |
iterator | begin () |
void | clear () |
bool | contains (const std::string &word) const |
const_iterator | end () const |
iterator | end () |
const_iterator | find (const std::string &word) const |
iterator | find (const std::string &word) |
uint64_t | frequency (const std::string &word) const |
void | get_words (std::vector< std::string > &words) const |
bool | insert (const std::string &word, uint64_t frequency=0) |
bool | is_empty () const |
void | load (const bfs::path &file) throw (std::invalid_argument) |
void | save (const bfs::path &file) throw (std::invalid_argument) |
size_t | size () const |
Static Public Attributes | |
static const char | delim = '\t' |
static const char | newline = '\n' |
Private Attributes | |
hash_map_t | hash_map_ |
A class that represents a vocabulary. The vocabulary is case sensitive.
Created on Jun 22, 2008
Definition at line 28 of file Vocabulary.hpp.
typedef hash_map_t::const_iterator aitools::invertedindex::Vocabulary::const_iterator |
Defines the iterator type to browse the vocabulary read-only.
Definition at line 51 of file Vocabulary.hpp.
typedef std::tr1::unordered_map<std::string, uint64_t> aitools::invertedindex::Vocabulary::hash_map_t |
Defines the internal hash-map-based vocabulary type. This map is unsorted, it is used to store the internal vocabulary permanently and to provide fast access on it.
Definition at line 46 of file Vocabulary.hpp.
typedef hash_map_t::iterator aitools::invertedindex::Vocabulary::iterator |
Defines the iterator type to browse the vocabulary.
Definition at line 56 of file Vocabulary.hpp.
typedef std::map<std::string, uint64_t> aitools::invertedindex::Vocabulary::tree_map_t |
Defines the internal tree-map-based vocabulary type. This map is sorted, it is temporarily used if the order of words is relevant.
Definition at line 39 of file Vocabulary.hpp.
aitools::invertedindex::Vocabulary::Vocabulary | ( | ) |
The constructor.
Definition at line 4 of file Vocabulary.cpp.
Referenced by Vocabulary().
aitools::invertedindex::Vocabulary::Vocabulary | ( | const bfs::path & | path | ) | throw (std::invalid_argument) |
The explicit constructor. Loads a vocabulary from file.
Definition at line 7 of file Vocabulary.cpp.
References Vocabulary().
aitools::invertedindex::Vocabulary::Vocabulary | ( | const Vocabulary & | vocabulary | ) |
The copy constructor.
Definition at line 12 of file Vocabulary.cpp.
aitools::invertedindex::Vocabulary::~Vocabulary | ( | ) |
The destructor.
Definition at line 16 of file Vocabulary.cpp.
void aitools::invertedindex::Vocabulary::accumulate | ( | const std::string & | word, | |
uint64_t | frequency = 1 | |||
) |
Accumulates the frequency of the given word with the given frequency or inserts the word with its frequency if it does not exist yet.
Definition at line 20 of file Vocabulary.cpp.
Referenced by BOOST_AUTO_TEST_CASE().
Vocabulary::iterator aitools::invertedindex::Vocabulary::begin | ( | ) |
Returns an iterator pointing to the first word in the vocabulary, or the end iterator, if the vocabulary is empty.
Definition at line 37 of file Vocabulary.cpp.
References hash_map_.
Vocabulary::const_iterator aitools::invertedindex::Vocabulary::begin | ( | ) | const |
Returns an const iterator pointing to the first word in the vocabulary, or the end iterator, if the vocabulary is empty.
Definition at line 31 of file Vocabulary.cpp.
void aitools::invertedindex::Vocabulary::clear | ( | ) |
Delete all words of the vocabulary.
Definition at line 43 of file Vocabulary.cpp.
References hash_map_.
Referenced by BOOST_AUTO_TEST_CASE().
bool aitools::invertedindex::Vocabulary::contains | ( | const std::string & | word | ) | const |
Checks if the vocabulary contains a given word.
word | the word to looking for. |
Definition at line 49 of file Vocabulary.cpp.
References hash_map_.
Referenced by BOOST_AUTO_TEST_CASE(), and aitools::invertedindex::StorageSearcher::quantile().
Vocabulary::iterator aitools::invertedindex::Vocabulary::end | ( | ) |
Returns an iterator pointing to the end of the vocabulary. Since this iterator points to a position one behind the last element, it must not be dereferenced.
Definition at line 61 of file Vocabulary.cpp.
References hash_map_.
Vocabulary::const_iterator aitools::invertedindex::Vocabulary::end | ( | ) | const |
Returns an const iterator pointing to the end of the vocabulary. Since this iterator points to a position one behind the last element, it must not be dereferenced.
Definition at line 55 of file Vocabulary.cpp.
References hash_map_.
Referenced by BOOST_AUTO_TEST_CASE(), and Java_de_aitools_aq_invertedindex_core_IndexSearcher_natGetFrequency().
Vocabulary::iterator aitools::invertedindex::Vocabulary::find | ( | const std::string & | word | ) |
Returns an iterator pointing to the entry of the given word.
Definition at line 73 of file Vocabulary.cpp.
References hash_map_.
Vocabulary::const_iterator aitools::invertedindex::Vocabulary::find | ( | const std::string & | word | ) | const |
Returns an iterator pointing to the entry of the given word.
Definition at line 67 of file Vocabulary.cpp.
References hash_map_.
Referenced by BOOST_AUTO_TEST_CASE(), and Java_de_aitools_aq_invertedindex_core_IndexSearcher_natGetFrequency().
uint64_t aitools::invertedindex::Vocabulary::frequency | ( | const std::string & | word | ) | const |
Returns the frequency to the given word.
word |
Definition at line 79 of file Vocabulary.cpp.
References hash_map_.
void aitools::invertedindex::Vocabulary::get_words | ( | std::vector< std::string > & | words | ) | const |
Copies all words into the given vector. The set of words is always sorted.
Definition at line 86 of file Vocabulary.cpp.
Referenced by BOOST_AUTO_TEST_CASE().
bool aitools::invertedindex::Vocabulary::insert | ( | const std::string & | word, | |
uint64_t | frequency = 0 | |||
) |
Inserts a word to the vocabulary.
word | the word to insert into the vocabulary. |
Definition at line 98 of file Vocabulary.cpp.
Referenced by BOOST_AUTO_TEST_CASE().
bool aitools::invertedindex::Vocabulary::is_empty | ( | ) | const |
Checks if the vocabulary is empty.
Definition at line 104 of file Vocabulary.cpp.
References hash_map_.
Referenced by BOOST_AUTO_TEST_CASE().
void aitools::invertedindex::Vocabulary::load | ( | const bfs::path & | file | ) | throw (std::invalid_argument) [virtual] |
Loads an vocabulary from an external file.
file | path and name of the input file. |
Implements aitools::invertedindex::Externalizable.
Definition at line 110 of file Vocabulary.cpp.
References hash_map_.
Referenced by BOOST_AUTO_TEST_CASE().
void aitools::invertedindex::Vocabulary::save | ( | const bfs::path & | file | ) | throw (std::invalid_argument) [virtual] |
Saves an vocabulary to an external file. The vocabulary file is always sorted.
file | path and name of the output file. |
Implements aitools::invertedindex::Externalizable.
Definition at line 135 of file Vocabulary.cpp.
Referenced by BOOST_AUTO_TEST_CASE(), and aitools::invertedindex::StorageBuilder::close().
size_t aitools::invertedindex::Vocabulary::size | ( | ) | const |
Gets the size of the vocabulary.
Definition at line 152 of file Vocabulary.cpp.
Referenced by BOOST_AUTO_TEST_CASE().
const char aitools::invertedindex::Vocabulary::delim = '\t' [static] |
Definition at line 60 of file Vocabulary.hpp.
Definition at line 193 of file Vocabulary.hpp.
Referenced by begin(), clear(), contains(), end(), find(), frequency(), is_empty(), and load().
const char aitools::invertedindex::Vocabulary::newline = '\n' [static] |
Definition at line 61 of file Vocabulary.hpp.