libept  0.5.25
vocabularymerger.h
Go to the documentation of this file.
1 /*
2  * Merge different vocabularies together and create the tag and facet indexes
3  *
4  * Copyright (C) 2003-2007 Enrico Zini <enrico@debian.org>
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19  */
20 
21 #include <tagcoll/diskindex/mmap.h>
22 #include <tagcoll/input/base.h>
23 #include <string>
24 #include <map>
25 #include <set>
26 
27 #ifndef EPT_DEBTAGS_VOCABULARYMERGER_H
28 #define EPT_DEBTAGS_VOCABULARYMERGER_H
29 
30 namespace ept {
31 namespace debtags {
32 
34 {
35 protected:
36  class FacetIndexer : public tagcoll::diskindex::MMapIndexer
37  {
38  protected:
40  public:
42  virtual ~FacetIndexer() {}
43  virtual int encodedSize() const;
44  virtual void encode(char* buf) const;
45  };
46  class TagIndexer : public tagcoll::diskindex::MMapIndexer
47  {
48  protected:
50  public:
52  virtual ~TagIndexer() {}
53  virtual int encodedSize() const;
54  virtual void encode(char* buf) const;
55  };
56  class TagData : public std::map<std::string, std::string>
57  {
58  public:
59  std::string name;
60  // Offset in the last written file (used for indexing)
61  long ofs;
62  int len;
63  int id;
64 
65  TagData() : ofs(0), len(0) {}
66  };
67  class FacetData : public std::map<std::string, std::string>
68  {
69  public:
70  std::string name;
71  std::map<std::string, TagData> tags;
72  // Offset in the last written file (used for indexing)
73  long ofs;
74  int len;
75  int id;
76 
77  FacetData() : ofs(0), len(0) {}
78 
79  TagData& obtainTag(const std::string& fullname);
80  };
81  std::map<std::string, FacetData> facets;
82  int tagCount;
85 
86  FacetData& obtainFacet(const std::string& name);
87  TagData& obtainTag(const std::string& fullname);
88 
89 public:
90  VocabularyMerger() : tagCount(0), findexer(*this), tindexer(*this) {}
91 
95  bool empty() const { return facets.empty(); }
96 
101  void read(tagcoll::input::Input& input);
102 
106  void write(const std::string& fname);
107 
111  void write(FILE* out);
112 
119  const tagcoll::diskindex::MMapIndexer& facetIndexer() const { return findexer; }
120 
127  const tagcoll::diskindex::MMapIndexer& tagIndexer() const { return tindexer; }
128 
132  bool hasFacet(const std::string& name) const
133  {
134  return facets.find(name) != facets.end();
135  }
136 
140  bool hasTag(const std::string& fullname) const;
141 
145  int tagID(const std::string& fullname) const;
146 
150  std::set<std::string> tagNames() const;
151 };
152 
153 }
154 }
155 
156 // vim:set ts=4 sw=4:
157 #endif
VocabularyMerger & vm
Definition: vocabularymerger.h:49
FacetIndexer findexer
Definition: vocabularymerger.h:83
TagIndexer tindexer
Definition: vocabularymerger.h:84
TagIndexer(VocabularyMerger &vm)
Definition: vocabularymerger.h:51
FacetData & obtainFacet(const std::string &name)
Definition: vocabularymerger.cc:88
FacetData()
Definition: vocabularymerger.h:77
Definition: vocabularymerger.h:56
void read(tagcoll::input::Input &input)
Parse and import the vocabulary from `input', merging the data with the previously imported ones...
Definition: vocabularymerger.cc:115
VocabularyMerger()
Definition: vocabularymerger.h:90
int len
Definition: vocabularymerger.h:62
void write(const std::string &fname)
Write the vocabulary data to the given file.
Definition: vocabularymerger.cc:211
Definition: vocabularymerger.h:67
std::map< std::string, TagData > tags
Definition: vocabularymerger.h:71
bool empty() const
Check if there is any data in the merged vocabulary.
Definition: vocabularymerger.h:95
virtual ~TagIndexer()
Definition: vocabularymerger.h:52
const tagcoll::diskindex::MMapIndexer & tagIndexer() const
Get the tag indexer.
Definition: vocabularymerger.h:127
int id
Definition: vocabularymerger.h:63
int tagCount
Definition: vocabularymerger.h:82
long ofs
Definition: vocabularymerger.h:73
int id
Definition: vocabularymerger.h:75
int len
Definition: vocabularymerger.h:74
std::string name
Definition: vocabularymerger.h:59
virtual ~FacetIndexer()
Definition: vocabularymerger.h:42
virtual int encodedSize() const
Definition: vocabularymerger.cc:325
bool hasTag(const std::string &fullname) const
Check if the vocabulary contains the tag `fullname'.
Definition: vocabularymerger.cc:158
virtual int encodedSize() const
Definition: vocabularymerger.cc:258
const tagcoll::diskindex::MMapIndexer & facetIndexer() const
Get the facet indexer.
Definition: vocabularymerger.h:119
bool hasFacet(const std::string &name) const
Check if the vocabulary contains the facet `name'.
Definition: vocabularymerger.h:132
Definition: vocabularymerger.h:46
TagData & obtainTag(const std::string &fullname)
Definition: vocabularymerger.cc:75
Definition: vocabularymerger.h:33
std::map< std::string, FacetData > facets
Definition: vocabularymerger.h:81
int tagID(const std::string &fullname) const
Return the ID for the given tag (or -1 if not found)
Definition: vocabularymerger.cc:178
virtual void encode(char *buf) const
Definition: vocabularymerger.cc:349
virtual void encode(char *buf) const
Definition: vocabularymerger.cc:280
long ofs
Definition: vocabularymerger.h:61
TagData()
Definition: vocabularymerger.h:65
FacetIndexer(VocabularyMerger &vm)
Definition: vocabularymerger.h:41
std::set< std::string > tagNames() const
Return a set with all tag names.
Definition: vocabularymerger.cc:201
std::string name
Definition: vocabularymerger.h:70
TagData & obtainTag(const std::string &fullname)
Definition: vocabularymerger.cc:101
Definition: vocabularymerger.h:36
VocabularyMerger & vm
Definition: vocabularymerger.h:39