Regex - HTML. HTML.
BeautifulSoup Python. script, URL- 10 : domain.com Google.
import sys
import urllib2
if __name__ == "__main__":
sys.path.append("./BeautifulSoup")
from BeautifulSoup import BeautifulSoup
opener = urllib2.build_opener()
opener.addheaders = [('User-agent', 'Mozilla/5.0')]
for start in range(0,10):
url = "http://www.google.com/search?q=site:stackoverflow.com&start=" + str(start*10)
page = opener.open(url)
soup = BeautifulSoup(page)
for cite in soup.findAll('cite'):
print cite.text
:
stackoverflow.com/
stackoverflow.com/questions
stackoverflow.com/unanswered
stackoverflow.com/users
meta.stackoverflow.com/
blog.stackoverflow.com/
chat.meta.stackoverflow.com/
...
, , . Python , .