1 : /** \file document.h
2 : * \brief API for working with documents
3 : */
4 : /* Copyright 1999,2000,2001 BrightStation PLC
5 : * Copyright 2002 Ananova Ltd
6 : * Copyright 2002,2003,2004,2006,2007 Olly Betts
7 : *
8 : * This program is free software; you can redistribute it and/or
9 : * modify it under the terms of the GNU General Public License as
10 : * published by the Free Software Foundation; either version 2 of the
11 : * License, or (at your option) any later version.
12 : *
13 : * This program is distributed in the hope that it will be useful,
14 : * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 : * GNU General Public License for more details.
17 : *
18 : * You should have received a copy of the GNU General Public License
19 : * along with this program; if not, write to the Free Software
20 : * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
21 : * USA
22 : */
23 :
24 : #ifndef XAPIAN_INCLUDED_DOCUMENT_H
25 : #define XAPIAN_INCLUDED_DOCUMENT_H
26 :
27 : #include <string>
28 :
29 : #include <xapian/base.h>
30 : #include <xapian/types.h>
31 : #include <xapian/termiterator.h>
32 : #include <xapian/visibility.h>
33 :
34 : namespace Xapian {
35 :
36 : class ValueIterator;
37 :
38 : /// A document in the database - holds data, values, terms, and postings
39 : class XAPIAN_VISIBILITY_DEFAULT Document {
40 : public:
41 : class Internal;
42 : /// @private @internal Reference counted internals.
43 : Xapian::Internal::RefCntPtr<Internal> internal;
44 :
45 : /** @private @internal Constructor is only used by internal classes.
46 : *
47 : * @param internal_ pointer to internal opaque class
48 : */
49 : explicit Document(Internal *internal_);
50 :
51 : /** Copying is allowed. The internals are reference counted, so
52 : * copying is cheap.
53 : */
54 : Document(const Document &other);
55 :
56 : /** Assignment is allowed. The internals are reference counted,
57 : * so assignment is cheap.
58 : */
59 : void operator=(const Document &other);
60 :
61 : /// Make a new empty Document
62 : Document();
63 :
64 : /// Destructor
65 : ~Document();
66 :
67 : /** Get value by number.
68 : *
69 : * Returns an empty string if no value with the given number is present
70 : * in the document.
71 : *
72 : * @param valueno The number of the value.
73 : */
74 : std::string get_value(Xapian::valueno valueno) const;
75 :
76 : /** Add a new value. It will replace any existing value with the
77 : * same number.
78 : */
79 : void add_value(Xapian::valueno valueno, const std::string &value);
80 :
81 : /// Remove any value with the given number.
82 : void remove_value(Xapian::valueno valueno);
83 :
84 : /// Remove all values associated with the document.
85 : void clear_values();
86 :
87 : /** Get data stored in the document.
88 : * This is a potentially expensive operation, and shouldn't normally
89 : * be used in a match decider functor. Put data for use by match
90 : * deciders in a value instead.
91 : */
92 : std::string get_data() const;
93 :
94 : /// Set data stored in the document.
95 : void set_data(const std::string &data);
96 :
97 : /** Add an occurrence of a term at a particular position.
98 : *
99 : * Multiple occurrences of the term at the same position are
100 : * represented only once in the positional information, but do
101 : * increase the wdf.
102 : *
103 : * If the term is not already in the document, it will be added to
104 : * it.
105 : *
106 : * @param tname The name of the term.
107 : * @param tpos The position of the term.
108 : * @param wdfinc The increment that will be applied to the wdf
109 : * for this term.
110 : */
111 : void add_posting(const std::string & tname,
112 : Xapian::termpos tpos,
113 : Xapian::termcount wdfinc = 1);
114 :
115 : /** Add a term to the document, without positional information.
116 : *
117 : * Any existing positional information for the term will be left
118 : * unmodified.
119 : *
120 : * @param tname The name of the term.
121 : * @param wdfinc The increment that will be applied to the wdf
122 : * for this term.
123 : */
124 : void add_term(const std::string & tname, Xapian::termcount wdfinc = 1);
125 :
126 : /** Remove a posting of a term from the document.
127 : *
128 : * Note that the term will still index the document even if all
129 : * occurrences are removed. To remove a term from a document
130 : * completely, use remove_term().
131 : *
132 : * @param tname The name of the term.
133 : * @param tpos The position of the term.
134 : * @param wdfdec The decrement that will be applied to the wdf
135 : * when removing this posting. The wdf will not go
136 : * below the value of 0.
137 : *
138 : * @exception Xapian::InvalidArgumentError will be thrown if the term
139 : * is not at the position specified in the position list for this term
140 : * in this document.
141 : *
142 : * @exception Xapian::InvalidArgumentError will be thrown if the term
143 : * is not in the document
144 : */
145 : void remove_posting(const std::string & tname,
146 : Xapian::termpos tpos,
147 : Xapian::termcount wdfdec = 1);
148 :
149 : /** Remove a term and all postings associated with it.
150 : *
151 : * @param tname The name of the term.
152 : *
153 : * @exception Xapian::InvalidArgumentError will be thrown if the term
154 : * is not in the document
155 : */
156 : void remove_term(const std::string & tname);
157 :
158 : /// Remove all terms (and postings) from the document.
159 : void clear_terms();
160 :
161 : /** The length of the termlist - i.e. the number of different terms
162 : * which index this document.
163 : */
164 : Xapian::termcount termlist_count() const;
165 :
166 : /// Iterator for the terms in this document.
167 : TermIterator termlist_begin() const;
168 :
169 : /// Equivalent end iterator for termlist_begin().
170 0 : TermIterator termlist_end() const {
171 0 : return TermIterator(NULL);
172 : }
173 :
174 : /// Count the values in this document.
175 : Xapian::termcount values_count() const;
176 :
177 : /// Iterator for the values in this document.
178 : ValueIterator values_begin() const;
179 :
180 : /// Equivalent end iterator for values_begin().
181 : ValueIterator values_end() const;
182 :
183 : /** Get the document id which is associated with this document (if any).
184 : *
185 : * NB If multiple databases are being searched together, then this
186 : * will be the document id in the individual database, not the merged
187 : * database!
188 : *
189 : * @return If this document came from a database, return the document
190 : * id in that database. Otherwise, return 0.
191 : */
192 : docid get_docid() const;
193 :
194 : /// Return a string describing this object.
195 : std::string get_description() const;
196 : };
197 :
198 : }
199 :
200 : #endif // XAPIAN_INCLUDED_DOCUMENT_H
|