"less than" (<) HTML, , HTML , . HTML , .
, , , , HTML. HTML XML:
f = File.open("table.html")
doc = Nokogiri::HTML(f)
, < 1 g. , TD:
doc.xpath('(//td)[1]/text()').to_s
=> "\n "
doc.xpath('(//td)[2]/text()').to_s
=> "0 %"
, . Nokogiri:
doc.errors
=> [#<Nokogiri::XML::SyntaxError: htmlParseStartTag: invalid element name>]
doc.errors[0].line
=> 3
Yup, 3 .
, , Nokogiri HTML, . . TagSoup , <, < :
% java -jar tagsoup-1.1.3.jar foo.html | xmllint --format -
src: foo.html
<?xml version="1.0" standalone="yes"?>
<html xmlns="http://www.w3.org/1999/xhtml">
<body>
<table>
<tbody>
<tr>
<th colspan="1" rowspan="1">Total Weight</th>
<td colspan="1" rowspan="1"><1 g</td>
<td colspan="1" rowspan="1" style="text-align: right">0 %</td>
</tr>
<tr>
<td colspan="3" rowspan="1" class="skinny_black_bar"/>
</tr>
</tbody>
</table>
</body>
</html>