I would use lxml instead if it is just processing hxml ...
It would be helpful if you are more specific, but you can try this if you are viewing links on a web page.
from lxml.html import parse
pdoc = parse(url_of_webpage)
doc = pdoc.getroot()
list_of_links = [i[2] for i in doc.iterlinks()]
list_of_links ['/en/images/logo_com.gif', 'http://www.brand.com/', '/en/images/logo.gif ']
doc.iterlinks() , form, img, a-tags , Element, , (form, a img), URL- ,
list_of_links = [i[2] for i in doc.iterlinks()]
URL- .
, URL- . URL-,
'/en/images/logo_com.gif'
'http://somedomain.com/en/images/logo_com.gif'
URL,
from lxml.html import parse
pdoc = parse(url_of_webpage)
doc = pdoc.getroot()
doc.make_links_absolute()
list_of_links = [i[2] for i in doc.iterlinks()]
URL- , -
for i in iterlinks():
url = i[2]
# some processing here with url...
, - , -,
from selenium import webdriver
from StringIO import StringIO
browser = webdriver.Firefox()
browser.get(url)
doc = parse(StringIO(browser.page_source)).getroot()