grassyknoll.backend.lucene.TermIterator

1 from lucene import Term, IndexReader 2

3 -def termIterator(reader, field, value = ''):

4 """ 5 Wrap L{lucene} term enumerator in Python iterator returning only term text, 6 and ensure that we don't iterate past the end of the given field. 7 8 @arg reader: Lucene reader 9 @type reader: L{IndexReader} 10 11 @arg field: field name in index 12 @type field: String 13 14 @arg value: value to start enumerating with, defaults to '' (enumerate all 15 values for this field) 16 @type value: String 17 """ 18 for text, count in termIteratorCount(reader, field, value, count_needed = False): 19 yield text

20

21 -def termIteratorCount(reader, field, value = '', count_needed = True):

22 """ 23 Wrap L{lucene} term enumerator in Python iterator returning (term 24 text, count), and ensure that we don't iterate past the end of the 25 given field. 26 27 @arg reader: Lucene reader 28 @type reader: L{IndexReader} 29 30 @arg field: field name in index 31 @type field: String 32 33 @arg value: value to start enumerating with, defaults to '' (enumerate all 34 values for this field) 35 @type value: String 36 37 @arg count_needed: whether we need an accurate count, or whether we can 38 return as soon as we see a single valid document. 39 @type count_needed: Boolean 40 """ 41 assert IndexReader.instance_(reader) 42 43 term_enumerator = reader.terms(Term(field, value)) 44 termDocs = reader.termDocs() 45 while True: 46 term = term_enumerator.term() 47 if term is None or term.field() != field: 48 break 49 termDocs.seek(term_enumerator) 50 count = 0 51 while termDocs.next(): 52 value = term.text() 53 count += 1 54 if not count_needed: 55 break 56 if count: 57 yield term.text(), count 58 if not term_enumerator.next(): 59 break 60 termDocs.close()

61

Source Code for Module grassyknoll.backend.lucene.TermIterator