1 : /* -*- C++ -*-
2 : * Tag vocabulary access
3 : *
4 : * Copyright (C) 2003--2007 Enrico Zini <enrico@debian.org>
5 : *
6 : * This program is free software; you can redistribute it and/or modify
7 : * it under the terms of the GNU General Public License as published by
8 : * the Free Software Foundation; either version 2 of the License, or
9 : * (at your option) any later version.
10 : *
11 : * This program is distributed in the hope that it will be useful,
12 : * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 : * GNU General Public License for more details.
15 : *
16 : * You should have received a copy of the GNU General Public License
17 : * along with this program; if not, write to the Free Software
18 : * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 : */
20 :
21 : #include <ept/debtags/vocabulary.h>
22 : #include <ept/debtags/maint/vocabularyindexer.h>
23 : #include <ept/debtags/maint/debdbparser.h>
24 : #include <ept/debtags/maint/path.h>
25 :
26 : #include <tagcoll/input/memory.h>
27 :
28 : #include <cstring>
29 : #include <sstream>
30 :
31 : #include <sys/types.h>
32 : #include <sys/stat.h>
33 : #include <fcntl.h>
34 : #include <sys/mman.h>
35 :
36 : using namespace tagcoll;
37 :
38 : namespace ept {
39 : namespace debtags {
40 :
41 9 : int Vocabulary::FacetIndex::id(const char* name) const
42 : {
43 9 : if (size() == 0) return -1;
44 : int begin, end;
45 :
46 : /* Binary search */
47 9 : begin = -1, end = size();
48 62 : while (end - begin > 1)
49 : {
50 44 : int cur = (end + begin) / 2;
51 44 : if (strcmp(item(cur)->name, name) > 0)
52 16 : end = cur;
53 : else
54 28 : begin = cur;
55 : }
56 :
57 9 : if (begin == -1 || strcmp(item(begin)->name, name) != 0)
58 : //throw NotFoundException(string("looking for the ID of string ") + str);
59 3 : return -1;
60 : else
61 6 : return begin;
62 : }
63 :
64 1105769 : int tagcmp(const char* tag1, const char* tag2)
65 : {
66 1105769 : const char* tsep1 = strstr(tag1, "::");
67 1105769 : if (tsep1 == NULL) return strcmp(tag1, tag2);
68 1105766 : const char* tsep2 = strstr(tag2, "::");
69 1105766 : if (tsep2 == NULL) return strcmp(tag1, tag2);
70 :
71 : // See what is the length of the shortest facet
72 1105766 : int len1 = tsep1 - tag1;
73 1105766 : int len2 = tsep2 - tag2;
74 1105766 : int minlen = len1 < len2 ? len1 : len2;
75 :
76 1105766 : int res = strncmp(tag1, tag2, minlen);
77 1105766 : if (res != 0)
78 : // Different facets
79 454813 : return res;
80 :
81 650953 : if (len1 == len2)
82 : // If the facet is the same, compare the tags
83 639402 : return strcmp(tsep1 + 2, tsep2 + 2);
84 : else
85 : // Two facets with similar prefixes
86 11551 : return len1 < len2 ? -1 : 1;
87 : }
88 :
89 106377 : int Vocabulary::TagIndex::id(const char* name) const
90 : {
91 106377 : if (size() == 0) return -1;
92 : int begin, end;
93 :
94 : /* Binary search */
95 106377 : begin = -1, end = size();
96 1212130 : while (end - begin > 1)
97 : {
98 999376 : int cur = (end + begin) / 2;
99 999376 : if (tagcmp(item(cur)->name, name) > 0)
100 432389 : end = cur;
101 : else
102 566987 : begin = cur;
103 : }
104 :
105 106377 : if (begin == -1 || tagcmp(item(begin)->name, name) != 0)
106 : //throw NotFoundException(string("looking for the ID of string ") + str);
107 132 : return -1;
108 : else
109 106245 : return begin;
110 : }
111 :
112 31 : Vocabulary::Vocabulary()
113 31 : : voc_fd(-1), voc_size(0), voc_buf(0)
114 : {
115 31 : std::string vocfname;
116 31 : std::string idxfname;
117 :
118 31 : if (!VocabularyIndexer::obtainWorkingVocabulary(vocfname, idxfname))
119 : {
120 2 : m_timestamp = 0;
121 2 : return;
122 : }
123 :
124 29 : m_timestamp = Path::timestamp(idxfname);
125 :
126 29 : mastermmap.init(idxfname);
127 :
128 : // Initialize the facet and tag indexes
129 29 : findex.init(mastermmap, 0);
130 29 : tindex.init(mastermmap, 1);
131 :
132 : // MMap the vocabulary
133 :
134 : // Open the file
135 29 : voc_fname = vocfname;
136 29 : if ((voc_fd = open(voc_fname.c_str(), O_RDONLY)) == -1)
137 0 : throw wibble::exception::File(voc_fname, "opening vocabulary file");
138 :
139 29 : off_t size = lseek(voc_fd, 0, SEEK_END);
140 29 : if (size == (off_t)-1)
141 0 : throw wibble::exception::File(voc_fname, "reading the size of vocabulary file");
142 29 : voc_size = size;
143 :
144 : // Map the file into memory
145 29 : if ((voc_buf = (const char*)mmap(0, voc_size, PROT_READ, MAP_PRIVATE, voc_fd, 0)) == MAP_FAILED)
146 0 : throw wibble::exception::File(voc_fname, "mmapping vocabulary file");
147 0 : }
148 :
149 31 : Vocabulary::~Vocabulary()
150 : {
151 : // Unmap and close the file
152 31 : if (voc_buf)
153 29 : munmap((void*)voc_buf, voc_size);
154 31 : if (voc_fd != -1)
155 29 : close(voc_fd);
156 31 : }
157 :
158 120 : Facet Vocabulary::facetByID(int id) const
159 : {
160 120 : return Facet(this, id);
161 : }
162 :
163 745056 : Tag Vocabulary::tagByID(int id) const
164 : {
165 745056 : return Tag(this, id);
166 : }
167 :
168 1301 : void Vocabulary::parseVocBuf(std::map<std::string, std::string>& res, size_t ofs, size_t len) const
169 : {
170 : // Access the right part of the mmapped buffer
171 1301 : std::stringstream name;
172 1301 : name << voc_fname << '+' << ofs << '-' << len;
173 1301 : input::Memory in(name.str(), voc_buf + ofs, len);
174 1301 : DebDBParser parser(in);
175 : // Parse the raw string data and store it in the cache vector
176 1301 : parser.nextRecord(res);
177 :
178 1301 : std::string desc = res["Description"];
179 2602 : if (!desc.empty())
180 : {
181 1298 : size_t pos = desc.find('\n');
182 1298 : if (pos == std::string::npos)
183 849 : res["_SD_"] = desc;
184 : else
185 449 : res["_SD_"] = desc.substr(0, pos);
186 1301 : }
187 1301 : }
188 :
189 1863 : std::string Vocabulary::tagShortName(int id) const
190 : {
191 1863 : const char* fullname = tindex.name(id);
192 1863 : char* sub = strstr(fullname, "::");
193 1863 : if (sub != NULL)
194 1863 : return sub + 2;
195 : else
196 0 : return fullname;
197 : }
198 :
199 176 : const std::map<std::string, std::string>& Vocabulary::facetData(int id) const
200 : {
201 176 : if (id < 0) return emptyData;
202 :
203 : // Enlarge the cache vector if needed
204 176 : if ((unsigned)id >= m_facetData.size())
205 59 : m_facetData.resize(id + 1);
206 :
207 176 : if (m_facetData[id].empty())
208 59 : parseVocBuf(m_facetData[id], findex.offset(id), findex.size(id));
209 :
210 176 : return m_facetData[id];
211 : }
212 :
213 3722 : const std::map<std::string, std::string>& Vocabulary::tagData(int id) const
214 : {
215 3722 : if (id < 0) return emptyData;
216 :
217 : // Enlarge the cache vector if needed
218 3722 : if ((unsigned)id >= m_tagData.size())
219 1242 : m_tagData.resize(id + 1);
220 :
221 3722 : if (m_tagData[id].empty())
222 1242 : parseVocBuf(m_tagData[id], tindex.offset(id), tindex.size(id));
223 :
224 3722 : return m_tagData[id];
225 : }
226 :
227 : }
228 6 : }
229 :
230 : // vim:set ts=4 sw=4:
|