libept  0.5.25
vocabulary.h
Go to the documentation of this file.
1 #ifndef EPT_DEBTAGS_VOCABULARY_H
2 #define EPT_DEBTAGS_VOCABULARY_H
3 
9 /*
10  * Copyright (C) 2003,2004,2005,2006,2007 Enrico Zini <enrico@debian.org>
11  *
12  * This program is free software; you can redistribute it and/or modify
13  * it under the terms of the GNU General Public License as published by
14  * the Free Software Foundation; either version 2 of the License, or
15  * (at your option) any later version.
16  *
17  * This program is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20  * GNU General Public License for more details.
21  *
22  * You should have received a copy of the GNU General Public License
23  * along with this program; if not, write to the Free Software
24  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25  */
26 
27 #include <ept/debtags/tag.h>
28 #include <tagcoll/diskindex/mmap.h>
29 
30 #include <string>
31 #include <vector>
32 #include <map>
33 
34 namespace ept {
35 namespace debtags {
36 
38 {
39 public:
40  class FacetIndex : public tagcoll::diskindex::MMap
41  {
42  protected:
43  // Layout of the data in the index
44  struct Item {
45  int offset;
46  int size;
47  int firsttag;
48  int lasttag;
49  const char name[];
50  };
51  inline Item* item(int id) const
52  {
53  if (id >= 0 && (unsigned)id < size())
54  return (Item*)(m_buf + ((int*)m_buf)[id]);
55  return NULL;
56  }
57 
58  public:
59  FacetIndex() : tagcoll::diskindex::MMap() {}
60  FacetIndex(const tagcoll::diskindex::MasterMMap& master, size_t idx)
61  : tagcoll::diskindex::MMap(master, idx) {}
62 
64  size_t size() const { return m_size == 0 ? 0 : *(int*)m_buf / sizeof(int); }
66  size_t offset(int id) const { Item* i = item(id); return i == NULL ? 0 : i->offset; }
68  size_t size(int id) const { Item* i = item(id); return i == NULL ? 0 : i->size; }
70  int firsttag(int id) const { Item* i = item(id); return i == NULL ? -1 : i->firsttag; }
72  int lasttag(int id) const { Item* i = item(id); return i == NULL ? -1 : i->lasttag; }
74  const char* name(int id) const { Item* i = item(id); return i == NULL ? "" : i->name; }
76  int id(const char* name) const;
77  int id(const std::string& name) const { return id(name.c_str()); }
78  };
79 
80  class TagIndex : public tagcoll::diskindex::MMap
81  {
82  protected:
83  // Layout of the data in the index
84  struct Item {
85  int offset;
86  int size;
87  int facet;
88  const char name[];
89  };
90  inline Item* item(int id) const
91  {
92  if (id >= 0 && (unsigned)id < size())
93  return (Item*)(m_buf + ((int*)m_buf)[id]);
94  return NULL;
95  }
96 
97  public:
98  TagIndex() : tagcoll::diskindex::MMap() {}
99  TagIndex(const tagcoll::diskindex::MasterMMap& master, size_t idx)
100  : tagcoll::diskindex::MMap(master, idx) {}
101 
103  size_t size() const { return m_size == 0 ? 0 : *(int*)m_buf / sizeof(int); }
105  size_t offset(int id) const { Item* i = item(id); return i == NULL ? 0 : i->offset; }
107  size_t size(int id) const { Item* i = item(id); return i == NULL ? 0 : i->size; }
109  int facet(int id) const { Item* i = item(id); return i == NULL ? -1 : i->facet; }
111  const char* name(int id) const { Item* i = item(id); return i == NULL ? "" : i->name; }
113  int id(const char* name) const;
114  int id(const std::string& name) const { return id(name.c_str()); }
115  };
116 
117 protected:
118  // Master MMap index container
119  tagcoll::diskindex::MasterMMap mastermmap;
120 
121  time_t m_timestamp;
122 
123  // Mmapped vocabulary file
124  std::string voc_fname;
125  int voc_fd;
126  size_t voc_size;
127  const char* voc_buf;
128 
129  // Facet and tag indexes
132 
133  // Cached parsed facet and tag records
134  mutable std::vector< std::map<std::string, std::string> > m_facetData;
135  mutable std::vector< std::map<std::string, std::string> > m_tagData;
136  // Empty parsed data to return when data is asked for IDs == -1
137  std::map<std::string, std::string> emptyData;
138 
139  void parseVocBuf(std::map<std::string, std::string>& res, size_t ofs, size_t len) const;
140 
141 public:
142  Vocabulary();
143  ~Vocabulary();
144 
146  time_t timestamp() const { return m_timestamp; }
147 
149  bool hasData() const { return m_timestamp != 0; }
150 
151  const FacetIndex& facetIndex() const { return findex; }
152  const TagIndex& tagIndex() const { return tindex; }
153 
157  bool hasFacet(const std::string& name) const
158  {
159  return findex.id(name.c_str()) != -1;
160  }
161 
165  bool hasTag(const std::string& fullname) const
166  {
167  return tindex.id(fullname.c_str()) != -1;
168  }
169 
173  Facet facetByID(int id) const;
174 
178  Tag tagByID(int id) const;
179 
180  template<typename IDS>
181  std::set<Tag> tagsByID(const IDS& ids) const
182  {
183  std::set<Tag> res;
184  for (typename IDS::const_iterator i = ids.begin();
185  i != ids.end(); ++i)
186  res.insert(tagByID(*i));
187  return res;
188  }
189 
193  Facet facetByTag(int id) const { return facetByID(tindex.facet(id)); }
194 
198  Facet facetByName(const std::string& name) const { return facetByID(findex.id(name)); }
199 
203  Tag tagByName(const std::string& fullname) const { return tagByID(tindex.id(fullname)); }
204 
208  std::set< Facet > facets() const
209  {
210  std::set< Facet > res;
211  for (size_t i = 0; i < findex.size(); i++)
212  res.insert(facetByID(i));
213  return res;
214  }
215 
219  std::set< Tag > tags() const
220  {
221  std::set< Tag > res;
222  for (size_t i = 0; i < tindex.size(); i++)
223  res.insert(tagByID(i));
224  return res;
225  }
226 
230  std::set< Tag > tags(int facet) const
231  {
232  std::set< Tag > res;
233  for (int i = findex.firsttag(facet); i != -1 && i <= findex.lasttag(facet); i++)
234  res.insert(tagByID(i));
235  return res;
236  }
237 
238  std::set< Tag > tags(const std::string& facetName) const
239  {
240  return tags(findex.id(facetName));
241  }
242 
243  std::set< Tag > tags(const Facet& facet) const
244  {
245  return tags(facet.id());
246  }
247 
248 #if 0
249  const DerivedTagList& getEquations() const throw () { return equations; }
251 
253  FacetSet facets(const FacetMatcher& filter) const throw () { return getFiltered(filter); }
254 #endif
255 
256 #if 0
257  // These functions are here just to be used by Facet and Tag. I'm not
258  // making them private because I don't want Facet and Tag to access other
259  // Vocabulary member, and other classes can't use these anyway as Facet::Data and
260  // Tag::Data are protected
261  const Facet::Data& facetData(int idx) { return m_facets[idx]; }
262  const Tag::Data& tagData(int idx) { return m_tags[idx]; }
263 #endif
264 
266  std::string facetName(int id) const { return findex.name(id); }
267 
269  std::string tagName(int id) const { return tindex.name(id); }
270 
272  std::string tagShortName(int id) const;
273 
274  const std::map<std::string, std::string>& facetData(int id) const;
275  const std::map<std::string, std::string>& tagData(int id) const;
276 };
277 
278 }
279 }
280 
281 // vim:set ts=4 sw=4:
282 #endif
int facet(int id) const
Get the id of the facet for this tag.
Definition: vocabulary.h:109
const char * name(int id) const
Get the name of this tag.
Definition: vocabulary.h:111
std::map< std::string, std::string > emptyData
Definition: vocabulary.h:137
int size
Definition: vocabulary.h:86
Debtags facets and tags.
const std::map< std::string, std::string > & tagData(int id) const
Definition: vocabulary.cc:214
int id(const std::string &name) const
Definition: vocabulary.h:114
size_t size(int id) const
Get the size of the facet data in the vocabulary for this tag.
Definition: vocabulary.h:107
std::vector< std::map< std::string, std::string > > m_facetData
Definition: vocabulary.h:134
int id(const std::string &name) const
Definition: vocabulary.h:77
const char * voc_buf
Definition: vocabulary.h:127
Definition: vocabulary.h:40
std::string facetName(int id) const
Get the facet name given the facet id.
Definition: vocabulary.h:266
int facet
Definition: vocabulary.h:87
const char * name(int id) const
Get the name of this facet.
Definition: vocabulary.h:74
Item * item(int id) const
Definition: vocabulary.h:51
std::string tagShortName(int id) const
Get the tag name given the tag id.
Definition: vocabulary.cc:190
Tag tagByID(int id) const
Return the tag with the given full name.
Definition: vocabulary.cc:164
std::set< Tag > tags(int facet) const
Return the tags in the given facet.
Definition: vocabulary.h:230
std::set< Tag > tags(const Facet &facet) const
Definition: vocabulary.h:243
void parseVocBuf(std::map< std::string, std::string > &res, size_t ofs, size_t len) const
Definition: vocabulary.cc:169
int size
Definition: vocabulary.h:46
size_t size(int id) const
Get the size of the facet data in the vocabulary for this facet.
Definition: vocabulary.h:68
TagIndex tindex
Definition: vocabulary.h:131
size_t size() const
Get the number of tags in the index.
Definition: vocabulary.h:103
FacetIndex(const tagcoll::diskindex::MasterMMap &master, size_t idx)
Definition: vocabulary.h:60
int voc_fd
Definition: vocabulary.h:125
std::set< Tag > tags() const
Return all the tags in the vocabulary.
Definition: vocabulary.h:219
Definition: vocabulary.h:84
Facet facetByID(int id) const
Return the facet with the given name.
Definition: vocabulary.cc:159
int id() const
Return the ID of this facet.
Definition: tag.h:135
std::set< Facet > facets() const
Return all the facets in the vocabulary.
Definition: vocabulary.h:208
std::string tagName(int id) const
Get the tag name given the tag id.
Definition: vocabulary.h:269
const char name[]
Definition: vocabulary.h:88
const char name[]
Definition: vocabulary.h:49
FacetIndex findex
Definition: vocabulary.h:130
Representation of a tag.
Definition: tag.h:163
Definition: vocabulary.h:80
const std::map< std::string, std::string > & facetData(int id) const
Definition: vocabulary.cc:200
Representation of a facet.
Definition: tag.h:60
int id(const char *name) const
Get the ID of the facet with this name.
Definition: vocabulary.cc:42
const TagIndex & tagIndex() const
Definition: vocabulary.h:152
int lasttag(int id) const
Get the id of the last tag for this facet.
Definition: vocabulary.h:72
bool hasTag(const std::string &fullname) const
Check if the vocabulary contains the tag `fullname'.
Definition: vocabulary.h:165
Definition: vocabulary.h:37
std::vector< std::map< std::string, std::string > > m_tagData
Definition: vocabulary.h:135
int offset
Definition: vocabulary.h:85
size_t size() const
Get the number of facets in the index.
Definition: vocabulary.h:64
int firsttag
Definition: vocabulary.h:47
const FacetIndex & facetIndex() const
Definition: vocabulary.h:151
TagIndex(const tagcoll::diskindex::MasterMMap &master, size_t idx)
Definition: vocabulary.h:99
int id(const char *name) const
Get the ID of the tag with this name.
Definition: vocabulary.cc:90
int firsttag(int id) const
Get the id of the first tag for this facet.
Definition: vocabulary.h:70
Vocabulary()
Definition: vocabulary.cc:113
Facet facetByTag(int id) const
Return the facet for the tag with the given ID.
Definition: vocabulary.h:193
bool hasFacet(const std::string &name) const
Check if the vocabulary contains the facet `name'.
Definition: vocabulary.h:157
size_t voc_size
Definition: vocabulary.h:126
Tag tagByName(const std::string &fullname) const
Return the tag with the given full name.
Definition: vocabulary.h:203
std::set< Tag > tags(const std::string &facetName) const
Definition: vocabulary.h:238
int offset
Definition: vocabulary.h:45
bool hasData() const
Return true if this data source has data, false if it's empty.
Definition: vocabulary.h:149
size_t offset(int id) const
Get the offset of the facet data in the vocabulary for this tag.
Definition: vocabulary.h:105
std::set< Tag > tagsByID(const IDS &ids) const
Definition: vocabulary.h:181
~Vocabulary()
Definition: vocabulary.cc:150
size_t offset(int id) const
Get the offset of the facet data in the vocabulary for this facet.
Definition: vocabulary.h:66
time_t m_timestamp
Definition: vocabulary.h:121
Item * item(int id) const
Definition: vocabulary.h:90
Facet facetByName(const std::string &name) const
Return the facet with the given name.
Definition: vocabulary.h:198
FacetIndex()
Definition: vocabulary.h:59
TagIndex()
Definition: vocabulary.h:98
tagcoll::diskindex::MasterMMap mastermmap
Definition: vocabulary.h:119
std::string voc_fname
Definition: vocabulary.h:124
time_t timestamp() const
Get the timestamp of when the index was last updated.
Definition: vocabulary.h:146
int lasttag
Definition: vocabulary.h:48