libept  0.5.25
textsearch.h
Go to the documentation of this file.
1 #ifndef EPT_TEXTSEARCH_TEXTSEARCH_H
2 #define EPT_TEXTSEARCH_TEXTSEARCH_H
3 
9 /*
10  * Copyright (C) 2007 Enrico Zini <enrico@debian.org>
11  *
12  * This program is free software; you can redistribute it and/or modify
13  * it under the terms of the GNU General Public License as published by
14  * the Free Software Foundation; either version 2 of the License, or
15  * (at your option) any later version.
16  *
17  * This program is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20  * GNU General Public License for more details.
21  *
22  * You should have received a copy of the GNU General Public License
23  * along with this program; if not, write to the Free Software
24  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25  */
26 
27 #include <xapian.h>
28 #include <vector>
29 #include <string>
30 
31 namespace ept {
32 namespace apt {
33 class Apt;
34 class PackageRecord;
35 }
36 namespace debtags {
37 class Debtags;
38 }
39 namespace textsearch {
40 
41 // Allocate value indexes for known values
42 const Xapian::valueno VAL_APT_INSTALLED_SIZE = 1;
43 const Xapian::valueno VAL_APT_PACKAGE_SIZE = 2;
44 const Xapian::valueno VAL_POPCON = 10;
45 const Xapian::valueno VAL_ITERATING_RATING = 20;
46 const Xapian::valueno VAL_ITERATING_FUNCTIONALITY = 21;
47 const Xapian::valueno VAL_ITERATING_USABILITY = 22;
48 const Xapian::valueno VAL_ITERATING_SECURITY = 23;
49 const Xapian::valueno VAL_ITERATING_PERFORMANCE = 24;
50 const Xapian::valueno VAL_ITERATING_QUALITY = 25;
51 const Xapian::valueno VAL_ITERATING_SUPPORT = 26;
52 const Xapian::valueno VAL_ITERATING_ADOPTION = 27;
53 // If you need to index a value and cannot edit this file, feel free to use any
54 // value starting from 1000000
55 
56 
57 /*
58 Fallback on apt scan searches when index is not present
59 
60 Explicitly decide at instantiation (or at any other time) if a rebuild should
61 be performed. Just adding a 'rebuildIfNeeded' method would be enough.
62 
63 17:14 #xapian < enrico> Hello. I'm finally in a position of writing a library to maintain
64  a xapian index with Debian package descriptions in a Debian system
65 17:14 #xapian < enrico> I have a question, though
66 17:14 #xapian < enrico> The descriptions change regularly as people run 'apt-get update'
67 17:15 #xapian < enrico> I'd need to have a way to update the description index after
68  apt-get update, without rebuilding it from scratch
69 17:15 #xapian < enrico> Is there some documentation on how to do that? I can't exactly
70  tell Xapian "the new description for package foo is this" because
71  I'd need the xapian id
72 19:11 #xapian < omega> you can add a unique term with a boolean prefix?
73 19:11 #xapian < omega> like Qpackage-name
74 19:11 #xapian < omega> then you search for it and replace_document
75 19:24 #xapian < richardb> Or indeed, you use the "replace_document()" form which takes a
76  unique_id term.
77 19:25 #xapian < richardb> Xapian::docid replace_document(const std::string &
78  unique_term,
79 19:25 #xapian < richardb> const Xapian::Document &
80  document);
81 19:43 #xapian < enrico> unique term
82 19:43 #xapian < enrico> nice!
83 19:44 #xapian < enrico> can I use a non-alpha prefix, like :package-name ?
84 19:45 #xapian < enrico> or pkg:package-name
85 19:45 #xapian < enrico> I suppose I can
86 */
87 
103 {
104 protected:
105  time_t m_timestamp;
106  Xapian::Database m_db;
107  Xapian::Stem m_stem;
108 
110  static std::string toLower(const std::string& str);
111 
118  void normalize_and_add(Xapian::Document& doc, const std::string& term, int& pos) const;
119 
120 public:
122  {
123  virtual ~ExtraIndexer() {}
124  virtual void operator()(Xapian::Document& doc, const apt::PackageRecord& rec) const = 0;
125  };
126 
127  TextSearch();
128 
130  Xapian::Database& db() { return m_db; }
131 
133  const Xapian::Database& db() const { return m_db; }
134 
136  time_t timestamp() const { return m_timestamp; }
137 
139  bool hasData() const { return m_timestamp > 0; }
140 
142  bool needsRebuild(apt::Apt& apt);
143 
153  bool rebuildIfNeeded(
154  apt::Apt& apt,
155  const std::vector<const ExtraIndexer*>& extraIndexers = std::vector<const ExtraIndexer*>());
156 
160  Xapian::docid docidByName(const std::string& pkgname) const;
161 
165  Xapian::Query makeORQuery(const std::string& keywords) const;
166 
173  Xapian::Query makePartialORQuery(const std::string& keywords) const;
174 
178  template<typename ITER>
179  Xapian::Query makeORQuery(const ITER& begin, const ITER& end) const
180  {
181  std::vector<std::string> terms;
182  // Insert both the lowercased and the stemmed lowercased query terms
183  for (ITER i = begin; i != end; ++i)
184  {
185  std::string t = toLower(*i);
186  std::string s = m_stem(t);
187  terms.push_back(t);
188  if (s != t)
189  terms.push_back("Z" + s);
190  }
191  return Xapian::Query(Xapian::Query::OP_OR, terms.begin(), terms.end());
192  }
193 
195  std::vector<std::string> expand(Xapian::Enquire& enq) const;
196 
197 // std::vector<std::string> similar(const std::string& pkg);
198 
202  Xapian::Query makeRelatedQuery(const std::string& pkgname) const;
203 
207  double getDoubleValue(const std::string& pkgname, Xapian::valueno val_id) const;
208 
212  int getIntValue(const std::string& pkgname, Xapian::valueno val_id) const;
213 };
214 
215 }
216 }
217 
218 // vim:set ts=4 sw=4:
219 #endif
const Xapian::valueno VAL_ITERATING_RATING
Definition: textsearch.h:45
time_t timestamp() const
Timestamp of when the Xapian database was last updated.
Definition: textsearch.h:136
const Xapian::valueno VAL_ITERATING_SECURITY
Definition: textsearch.h:48
Xapian::Database m_db
Definition: textsearch.h:106
const Xapian::valueno VAL_ITERATING_FUNCTIONALITY
Definition: textsearch.h:46
const Xapian::valueno VAL_ITERATING_USABILITY
Definition: textsearch.h:47
const Xapian::valueno VAL_APT_PACKAGE_SIZE
Definition: textsearch.h:43
Xapian::Stem m_stem
Definition: textsearch.h:107
Xapian::Query makeORQuery(const ITER &begin, const ITER &end) const
Build a query with the given keywords, specified as iterators of strings.
Definition: textsearch.h:179
const Xapian::valueno VAL_ITERATING_QUALITY
Definition: textsearch.h:50
Definition: textsearch.h:121
virtual ~ExtraIndexer()
Definition: textsearch.h:123
const Xapian::Database & db() const
Access the Xapian database.
Definition: textsearch.h:133
const Xapian::valueno VAL_ITERATING_PERFORMANCE
Definition: textsearch.h:49
-*- C++ -*- (c) 2006, 2007 Petr Rockai me@mornfall.net
Definition: apt.cc:43
RecordParser specialised with access methods for common Debian package information.
Definition: packagerecord.h:36
const Xapian::valueno VAL_ITERATING_ADOPTION
Definition: textsearch.h:52
High-level access to the Apt cache, as a data provider for the ept framework.
Definition: apt/apt.h:60
bool hasData() const
Returns true if the index has data.
Definition: textsearch.h:139
Maintains and accesses a Xapian index of package descriptions.
Definition: textsearch.h:102
const Xapian::valueno VAL_POPCON
Definition: textsearch.h:44
time_t m_timestamp
Definition: textsearch.h:105
const Xapian::valueno VAL_ITERATING_SUPPORT
Definition: textsearch.h:51
Xapian::Database & db()
Access the Xapian database.
Definition: textsearch.h:130
const Xapian::valueno VAL_APT_INSTALLED_SIZE
Definition: textsearch.h:42