1 from lucene import Term, IndexReader
2
4 """
5 Wrap L{lucene} term enumerator in Python iterator returning only term text,
6 and ensure that we don't iterate past the end of the given field.
7
8 @arg reader: Lucene reader
9 @type reader: L{IndexReader}
10
11 @arg field: field name in index
12 @type field: String
13
14 @arg value: value to start enumerating with, defaults to '' (enumerate all
15 values for this field)
16 @type value: String
17 """
18 for text, count in termIteratorCount(reader, field, value, count_needed = False):
19 yield text
20
22 """
23 Wrap L{lucene} term enumerator in Python iterator returning (term
24 text, count), and ensure that we don't iterate past the end of the
25 given field.
26
27 @arg reader: Lucene reader
28 @type reader: L{IndexReader}
29
30 @arg field: field name in index
31 @type field: String
32
33 @arg value: value to start enumerating with, defaults to '' (enumerate all
34 values for this field)
35 @type value: String
36
37 @arg count_needed: whether we need an accurate count, or whether we can
38 return as soon as we see a single valid document.
39 @type count_needed: Boolean
40 """
41 assert IndexReader.instance_(reader)
42
43 term_enumerator = reader.terms(Term(field, value))
44 termDocs = reader.termDocs()
45 while True:
46 term = term_enumerator.term()
47 if term is None or term.field() != field:
48 break
49 termDocs.seek(term_enumerator)
50 count = 0
51 while termDocs.next():
52 value = term.text()
53 count += 1
54 if not count_needed:
55 break
56 if count:
57 yield term.text(), count
58 if not term_enumerator.next():
59 break
60 termDocs.close()
61