Filter: Add more detailed documentation
Also remove the corresponding section of the DESIGN file. Signed-off-by: Anna Schumaker <Anna@OcarinaProject.net>
This commit is contained in:
parent
e81f17360e
commit
2a65fe8db0
34
DESIGN
34
DESIGN
|
@ -66,40 +66,6 @@ Callbacks:
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
Filter:
|
|
||||||
Filtering is used to generate a subset of songs displayed by the UI to
|
|
||||||
that users can choose from. The inverted index is generated at startup
|
|
||||||
so there is no need for a remove() function, since it will be wiped
|
|
||||||
the next time the application starts.
|
|
||||||
|
|
||||||
- Parsing:
|
|
||||||
- Scan over the input text to create a list of words using the following
|
|
||||||
characters as delimiters: \/,;()_-~+"
|
|
||||||
- While scanning, convert the string to lowercase and strip out any
|
|
||||||
other special characters.
|
|
||||||
|
|
||||||
- API:
|
|
||||||
std::string filter :: add(const std::string &key, unsigned int track_id);
|
|
||||||
Parse the key into words following the "Parsing" section above.
|
|
||||||
Generate substrings for each word and add each (substring,
|
|
||||||
track_id) pair to the index. Return the lowercased text to the
|
|
||||||
caller.
|
|
||||||
|
|
||||||
To generate substrings, iterate over the word starting from
|
|
||||||
the front. For example: "goron" would contain the substrings
|
|
||||||
{g, go, gor, goro, goron}.
|
|
||||||
|
|
||||||
std::string filter :: lowercase(const std::string &text);
|
|
||||||
Parse the text into lowercased words following the "Parsing"
|
|
||||||
section above. Return the lowercased string to the caller.
|
|
||||||
|
|
||||||
void filter :: search(const std::string &text, std::set<track_id> &res);
|
|
||||||
This function finds all track_ids matching the input text.
|
|
||||||
Parse the string into substrings and take the intersection of
|
|
||||||
all sets returned by the index for each substring.
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
Idle queue:
|
Idle queue:
|
||||||
The idle queue is used to schedule function calls that run at a later
|
The idle queue is used to schedule function calls that run at a later
|
||||||
time.
|
time.
|
||||||
|
|
4
TODO
4
TODO
|
@ -86,3 +86,7 @@ Future work:
|
||||||
- Show gcov graph
|
- Show gcov graph
|
||||||
- Run cppcheck
|
- Run cppcheck
|
||||||
- Read tags with gstreamer?
|
- Read tags with gstreamer?
|
||||||
|
- Move search functions from filter to index code
|
||||||
|
- Move tag index into tagdb
|
||||||
|
- Rename filter.cpp -> text.cpp (or something to reflect
|
||||||
|
that it is used for text processing)
|
||||||
|
|
|
@ -58,12 +58,12 @@ static void parse_text(const std::string &text, std::list<std::string> &ret)
|
||||||
ret.push_back(word);
|
ret.push_back(word);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void add_substrings(const std::string &text, unsigned int track_id)
|
static void add_substrings(const std::string &text, unsigned int index)
|
||||||
{
|
{
|
||||||
std::string substr;
|
std::string substr;
|
||||||
for (unsigned int i = 1; i <= text.size(); i++) {
|
for (unsigned int i = 1; i <= text.size(); i++) {
|
||||||
substr = text.substr(0, i);
|
substr = text.substr(0, i);
|
||||||
filter_index.insert(substr, track_id);
|
filter_index.insert(substr, index);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -81,14 +81,14 @@ static std::string reassemble_text(std::list<std::string> text)
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string filter :: add(const std::string &text, unsigned int track_id)
|
std::string filter :: add(const std::string &text, unsigned int index)
|
||||||
{
|
{
|
||||||
std::list<std::string> parsed;
|
std::list<std::string> parsed;
|
||||||
std::list<std::string>::iterator it;
|
std::list<std::string>::iterator it;
|
||||||
|
|
||||||
parse_text(text, parsed);
|
parse_text(text, parsed);
|
||||||
for (it = parsed.begin(); it != parsed.end(); it++)
|
for (it = parsed.begin(); it != parsed.end(); it++)
|
||||||
add_substrings(*it, track_id);
|
add_substrings(*it, index);
|
||||||
return reassemble_text(parsed);
|
return reassemble_text(parsed);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -9,31 +9,50 @@
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Namespace for searching the tag database.
|
* The filter layer is used to find a subset of songs based on an input
|
||||||
|
* string. Since this layer does text processing, it also provides a
|
||||||
|
* functions for converting strings to lowercase.
|
||||||
|
*
|
||||||
|
* The text processing is mostly interested in alphanumeric characters, so
|
||||||
|
* any special characters included in the input text will be stripped out.
|
||||||
|
* Tabs, spaces and the following characters are used to delimit words:
|
||||||
|
*
|
||||||
|
* \/,;()_-~+"
|
||||||
*/
|
*/
|
||||||
namespace filter {
|
namespace filter {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Break a string into words and add each word to an index.
|
* Convert the key to lowercase and break into individual words.
|
||||||
|
* Generate substrings for each word and add each (substring, index)
|
||||||
|
* pair to the Index.
|
||||||
*
|
*
|
||||||
* @param text The text to parse.
|
* To generate substrings: iterate over the word starting from the
|
||||||
* @param id An identifier paired with the text.
|
* first character, and append a character for every iteration. For
|
||||||
|
* example, the word "goron" would contain the substrings:
|
||||||
|
* { g, go, gor, goro, goron }.
|
||||||
|
*
|
||||||
|
* @param text The text to parse.
|
||||||
|
* @param index The track index to pair with the text.
|
||||||
* @return The lowercase form of the input text.
|
* @return The lowercase form of the input text.
|
||||||
*/
|
*/
|
||||||
std::string add(const std::string &, unsigned int);
|
std::string add(const std::string &, unsigned int);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Search the entire index for identifiers paired with the input text.
|
* Break the input text into lowercase words and search the Index
|
||||||
|
* for matches. The results set should be filled out with the
|
||||||
|
* intersection of the IndexEntry for each word. If any word does
|
||||||
|
* not appear in the index, then the result set should be empty.
|
||||||
*
|
*
|
||||||
* @param text The text to search for.
|
* @param text The text to search for.
|
||||||
* @param res The results set will be filled in with the ids found.
|
* @param res The results set to fill in with matching indexes.
|
||||||
*/
|
*/
|
||||||
void search(const std::string &, std::set<unsigned int> &);
|
void search(const std::string &, std::set<unsigned int> &);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the lowercase version of the input text.
|
* Converts the input text to lowercase and returns the result.
|
||||||
*
|
*
|
||||||
* @param text The text to be converted into lowercase.
|
* @param text The text to be converted.
|
||||||
|
* @return The lowercase form of the input text.
|
||||||
*/
|
*/
|
||||||
std::string lowercase(const std::string &);
|
std::string lowercase(const std::string &);
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue