diff --git a/DESIGN b/DESIGN index b6ce4261..7881a81d 100644 --- a/DESIGN +++ b/DESIGN @@ -66,40 +66,6 @@ Callbacks: -Filter: - Filtering is used to generate a subset of songs displayed by the UI to - that users can choose from. The inverted index is generated at startup - so there is no need for a remove() function, since it will be wiped - the next time the application starts. - -- Parsing: - - Scan over the input text to create a list of words using the following - characters as delimiters: \/,;()_-~+" - - While scanning, convert the string to lowercase and strip out any - other special characters. - -- API: - std::string filter :: add(const std::string &key, unsigned int track_id); - Parse the key into words following the "Parsing" section above. - Generate substrings for each word and add each (substring, - track_id) pair to the index. Return the lowercased text to the - caller. - - To generate substrings, iterate over the word starting from - the front. For example: "goron" would contain the substrings - {g, go, gor, goro, goron}. - - std::string filter :: lowercase(const std::string &text); - Parse the text into lowercased words following the "Parsing" - section above. Return the lowercased string to the caller. - - void filter :: search(const std::string &text, std::set &res); - This function finds all track_ids matching the input text. - Parse the string into substrings and take the intersection of - all sets returned by the index for each substring. - - - Idle queue: The idle queue is used to schedule function calls that run at a later time. diff --git a/TODO b/TODO index 1471b0c7..7c07132c 100644 --- a/TODO +++ b/TODO @@ -86,3 +86,7 @@ Future work: - Show gcov graph - Run cppcheck - Read tags with gstreamer? + - Move search functions from filter to index code + - Move tag index into tagdb + - Rename filter.cpp -> text.cpp (or something to reflect + that it is used for text processing) diff --git a/core/filter.cpp b/core/filter.cpp index 8a4edc4a..37689362 100644 --- a/core/filter.cpp +++ b/core/filter.cpp @@ -58,12 +58,12 @@ static void parse_text(const std::string &text, std::list &ret) ret.push_back(word); } -static void add_substrings(const std::string &text, unsigned int track_id) +static void add_substrings(const std::string &text, unsigned int index) { std::string substr; for (unsigned int i = 1; i <= text.size(); i++) { substr = text.substr(0, i); - filter_index.insert(substr, track_id); + filter_index.insert(substr, index); } } @@ -81,14 +81,14 @@ static std::string reassemble_text(std::list text) return res; } -std::string filter :: add(const std::string &text, unsigned int track_id) +std::string filter :: add(const std::string &text, unsigned int index) { std::list parsed; std::list::iterator it; parse_text(text, parsed); for (it = parsed.begin(); it != parsed.end(); it++) - add_substrings(*it, track_id); + add_substrings(*it, index); return reassemble_text(parsed); } diff --git a/include/core/filter.h b/include/core/filter.h index 8fdc0b47..3a195758 100644 --- a/include/core/filter.h +++ b/include/core/filter.h @@ -9,31 +9,50 @@ #include /** - * Namespace for searching the tag database. + * The filter layer is used to find a subset of songs based on an input + * string. Since this layer does text processing, it also provides a + * functions for converting strings to lowercase. + * + * The text processing is mostly interested in alphanumeric characters, so + * any special characters included in the input text will be stripped out. + * Tabs, spaces and the following characters are used to delimit words: + * + * \/,;()_-~+" */ namespace filter { /** - * Break a string into words and add each word to an index. + * Convert the key to lowercase and break into individual words. + * Generate substrings for each word and add each (substring, index) + * pair to the Index. * - * @param text The text to parse. - * @param id An identifier paired with the text. + * To generate substrings: iterate over the word starting from the + * first character, and append a character for every iteration. For + * example, the word "goron" would contain the substrings: + * { g, go, gor, goro, goron }. + * + * @param text The text to parse. + * @param index The track index to pair with the text. * @return The lowercase form of the input text. */ std::string add(const std::string &, unsigned int); /** - * Search the entire index for identifiers paired with the input text. + * Break the input text into lowercase words and search the Index + * for matches. The results set should be filled out with the + * intersection of the IndexEntry for each word. If any word does + * not appear in the index, then the result set should be empty. * * @param text The text to search for. - * @param res The results set will be filled in with the ids found. + * @param res The results set to fill in with matching indexes. */ void search(const std::string &, std::set &); /** - * Returns the lowercase version of the input text. + * Converts the input text to lowercase and returns the result. * - * @param text The text to be converted into lowercase. + * @param text The text to be converted. + * @return The lowercase form of the input text. */ std::string lowercase(const std::string &);