使用标准库:文本查询程序
#include "my_TextQuery.h"
#include "make_plural.h"
#include <cstddef>
#include <memory>
#include <sstream>
#include <string>
#include <vector>
#include <map>
#include <set>
#include <iostream>
#include <fstream>
#include <cctype>
#include <cstring>
#include <utility>
using std::size_t;
using std::shared_ptr;
using std::istringstream;
using std::string;
using std::getline;
using std::vector;
using std::map;
using std::set;
using std::cerr;
using std::cout;
using std::cin;
using std::ostream;
using std::endl;
using std::ifstream;
using std::ispunct;
using std::tolower;
using std::strlen;
using std::pair;
// read the input file and build the map of lines to line numbers
TextQuery::TextQuery(ifstream &is): file(new vector<string>)
{
string text;
while (getline(is, text)) { // for each line in the file
file.push_back(text); // remember this line of text
int n = file.size() - 1; // the current line number
istringstream line(text); // separate the line into words
string word;
while (line >> word) { // for each word in that line
word = cleanup_str(word);
// if word isn't already in wm, subscripting adds a new entry
auto &lines = wm[word]; // lines is a shared_ptr
if (!lines) // that pointer is null the first time we see word
lines.reset(new set<line_no>); // allocate a new set
lines->insert(n); // insert this line number
}
}
}
// not covered in the book -- cleanup_str removes
// punctuation and converts all text to lowercase so that
// the queries operate in a case insensitive manner
string TextQuery::cleanup_str(const string &word)
{
string ret;
for (auto it = word.begin(); it != word.end(); ++it) {
if (!ispunct(*it))
ret += tolower(*it);
}
return ret;
}
QueryResult
TextQuery::query(const string &sought) const
{
// we'll return a pointer to this set if we don't find sought
static shared_ptr<set<line_no>> nodata(new set<line_no>);
// use find and not a subscript to avoid adding words to wm!
auto loc = wm.find(cleanup_str(sought));
if (loc == wm.end())
return QueryResult(sought, nodata, file); // not found
else
return QueryResult(sought, loc->second, file);
}
ostream &print(ostream & os, const QueryResult &qr)
{
// if the word was found, print the count and all occurrences
os << qr.sought << " occurs " << qr.lines->size() << " "
<< make_plural(qr.lines->size(), "time", "s") << endl;
// print each line in which the word appeared
for (auto num : *qr.lines) // for every element in the set
// don't confound the user with text lines starting at 0
os << "\t(line " << num + 1 << ") "
<< qr.file.begin().deref(num) << endl;
return os;
}
// debugging routine, not covered in the book
void TextQuery::display_map()
{
auto iter = wm.cbegin(), iter_end = wm.cend();
// for each word in the map
for ( ; iter != iter_end; ++iter) {
cout << "word: " << iter->first << " {";
// fetch location vector as a const reference to avoid copying it
auto text_locs = iter->second;
auto loc_iter = text_locs->cbegin(),
loc_iter_end = text_locs->cend();
// print all line numbers for this word
while (loc_iter != loc_iter_end)
{
cout << *loc_iter;
if (++loc_iter != loc_iter_end)
cout << ", ";
}
cout << "}\n"; // end list of output this word
}
cout << endl; // finished printing entire map
}
#ifndef TEXTQUERY_H
#define TEXTQUERY_H
#include <memory>
#include <string>
#include <vector>
#include <map>
#include <set>
#include <fstream>
#include "my_QueryResult.h"
/* this version of the query classes includes two
* members not covered in the book:
* cleanup_str: which removes punctuation and
* converst all text to lowercase
* display_map: a debugging routine that will print the contents
* of the lookup mape
*/
class QueryResult; // declaration needed for return type in the query function
class TextQuery {
public:
using line_no = std::vector<std::string>::size_type;
TextQuery(std::ifstream&);
QueryResult query(const std::string&) const;
void display_map(); // debugging aid: print the map
private:
StrBlob file; // input file
// maps each word to the set of the lines in which that word appears
std::map<std::string,
std::shared_ptr<std::set<line_no>>> wm;
// canonicalizes text: removes punctuation and makes everything lower case
static std::string cleanup_str(const std::string&);
};
#endif
#include <cstddef>
using std::size_t;
#include <string>
using std::string;
#include <iostream>
using std::cout; using std::endl;
#ifndef MAKE_PLURAL_H
#define MAKE_PLURAL_H
// return the plural version of word if ctr is greater than 1
inline
string make_plural(size_t ctr, const string &word,
const string &ending)
{
return (ctr > 1) ? word + ending : word;
}
#endif
#ifndef QUERYRESULT_H
#define QUERYRESULT_H
#include <memory>
#include <string>
#include <vector>
#include <set>
#include <iostream>
#include "my_StrBlob.h"
class QueryResult {
friend std::ostream& print(std::ostream&, const QueryResult&);
public:
typedef std::vector<std::string>::size_type line_no;
typedef std::set<line_no>::const_iterator line_it;
QueryResult(std::string s,
std::shared_ptr<std::set<line_no>> p,
StrBlob f):
sought(s), lines(p), file(f) { }
std::set<line_no>::size_type size() const { return lines->size(); }
line_it begin() const { return lines->cbegin(); }
line_it end() const { return lines->cend(); }
StrBlob get_file() { return file; }
private:
std::string sought; // word this query represents
std::shared_ptr<std::set<line_no>> lines; // lines it's on
StrBlob file; //input file
};
std::ostream &print(std::ostream&, const QueryResult&);
#endif
#ifndef MY_STRBLOB_H
#define MY_STRBLOB_H
#include <vector>
#include <string>
#include <initializer_list>
#include <memory>
#include <stdexcept>
using namespace std;
// 提前声明,StrBlob中的友类声明所需
class StrBlobPtr;
class StrBlob
{
friend class StrBlobPtr;
public:
typedef vector<string>::size_type size_type;
StrBlob();
StrBlob(initializer_list<string> il);
StrBlob(vector<string> *p);
size_type size() const { return data->size(); }
bool empty() const { return data->empty(); }
// 添加和删除元素
void push_back(const string &t) {data->push_back(t);}
void pop_back();
// 元素访问
string& front();
const string& front() const;
string& back();
const string& back() const ;
// 提供给StrBlobPtr的接口
StrBlobPtr begin(); // 定义StrBlobPtr后才能定义这两个函数
StrBlobPtr end();
// const版本
StrBlobPtr begin() const;
StrBlobPtr end() const;
private:
shared_ptr<std::vector<std::string>> data;
// 如果data[i]不合法,抛出一个异常
void check(size_type i, const std::string &msg) const;
};
inline StrBlob::StrBlob(): data(make_shared<vector<string>>()) { }
inline StrBlob::StrBlob(initializer_list<string> il) : data(make_shared<vector<string>>(il)) { }
inline StrBlob::StrBlob(vector<string> *p): data(p) { }
inline void StrBlob::check(size_type i, const string &msg) const
{
if (i >= data->size())
{
throw out_of_range(msg);
}
}
inline string& StrBlob::front()
{
// 如果vector为空,check会抛出一个异常
check(0, "front on empty StrBlob");
return data->front();
}
// const版本front
inline const string& StrBlob::front() const
{
check(0, "front on empty StrBlob");
return data->front();
}
inline string& StrBlob::back()
{
check(0, "back on empty StrBlob");
return data->back();
}
// const版本back
inline const string& StrBlob::back() const
{
check(0, "back on empty StrBlob");
return data->back();
}
inline void StrBlob::pop_back()
{
check(0, "pop_back on empty StrBlob");
data->pop_back();
}
// 当试图访问一个不存在的元素时,StrBlobPtr抛出一个异常
class StrBlobPtr
{
friend bool eq(const StrBlobPtr&, const StrBlobPtr&);
public:
StrBlobPtr(): curr(0) { }
StrBlobPtr(StrBlob &a, size_t sz = 0): wptr(a.data), curr(sz) { }
StrBlobPtr(const StrBlob &a, size_t sz = 0): wptr(a.data), curr(sz) { }
string& deref() const;
string& deref(int off) const;
StrBlobPtr& incr(); // 前缀递增
StrBlobPtr& decr(); // 前缀递减
private:
// 若检查成功,check返回一个指向vector的shared_ptr
shared_ptr<vector<string>> check(size_t, const string&) const;
// 保存一个weak_ptr,意味着底层vector可能会被销毁
weak_ptr<vector<string>> wptr;
size_t curr; // 在数组中的当前位置
};
inline shared_ptr<vector<string>> StrBlobPtr::check(size_t i, const string &msg) const
{
auto ret = wptr.lock(); // vector还存在吗?
if (!ret)
{
throw runtime_error("unbound StrBlobPtr");
}
if (i >= ret->size())
{
throw out_of_range(msg);
}
return ret; // 否则,返回指向vector的shared_ptr
}
inline string& StrBlobPtr::deref() const
{
auto p = check(curr, "dereference past end");
return (*p)[curr]; // (*p)是对象所指向的vector
}
inline string& StrBlobPtr::deref(int off) const
{
auto p = check(curr + off, "dereference past end");
return (*p)[curr + off]; // (*p)是对象所指向的vector
}
// 前缀递增:返回递增后的对象的引用
inline StrBlobPtr& StrBlobPtr::incr()
{
// 如果curr已经指向容器的尾后位置,就不能递增它
check(curr, "increment past end of StrBlobPtr");
++curr; // 推进当前位置
return *this;
}
// 前缀递减:返回递减后的对象的引用
inline StrBlobPtr& StrBlobPtr::decr()
{
// 如果curr已经为0,递减它就会产生一个非法下标
--curr; // 递减当前位置
check(-1, "decrement past begin of StrBlobPtr");
return *this;
}
// StrBlob的begin和end成员的定义
inline StrBlobPtr StrBlob::begin()
{
return StrBlobPtr(*this);
}
inline StrBlobPtr StrBlob::end()
{
auto ret = StrBlobPtr(*this, data->size());
return ret;
}
// const版本
inline StrBlobPtr StrBlob::begin() const
{
return StrBlobPtr(*this);
}
inline StrBlobPtr StrBlob::end() const
{
auto ret = StrBlobPtr(*this, data->size());
return ret;
}
// StrBlobPtr的比较操作
inline bool eq(const StrBlobPtr &lhs, const StrBlobPtr &rhs)
{
auto l = lhs.wptr.lock(), r = rhs.wptr.lock();
// 若底层的vector是同一个
if (l == r)
{
// 则两个指针都是空,或者指向相同元素时,它们相等
return (!r || lhs.curr == rhs.curr);
}
else
{
return false; // 若指向不同vector,则不可能相等
}
}
inline bool neq(const StrBlobPtr &lhs, const StrBlobPtr &rhs)
{
return !eq(lhs, rhs);
}
#endif