RE- . datapoint BS + RE, (, ):
html = """
<tr>
<td class="u">4</td>
<td class="unavailable-available">5</td>
<td class="a">6</td>
<td class="available-unavailable">7</td>
<td class="u">8</td>
"""
from pyparsing import makeHTMLTags, withAttribute, oneOf, SkipTo
td,tdEnd = makeHTMLTags("TD")
td.setParseAction(withAttribute(**{'class':oneOf("a unavailable-available")}))
patt = td + SkipTo(tdEnd)("contents") + tdEnd
for t in patt.searchString(html):
print t.dump()
['TD', ['class', 'unavailable-available'], False, '5', '</TD>']
- class: unavailable-available
- contents: 5
- empty: False
- endTd: </TD>
- startTd: ['TD', ['class', 'unavailable-available'], False]
- class: unavailable-available
- empty: False
['TD', ['class', 'a'], False, '6', '</TD>']
- class: a
- contents: 6
- empty: False
- endTd: </TD>
- startTd: ['TD', ['class', 'a'], False]
- class: a
- empty: False
t.contents t['class'] ( dict, class - Python).