Use this script:
######## Fetch App names and genre of apps from playstore url, using pakage names ############# """ Reuirements for running this script: 1. requests library Note: Run this command to avoid insecureplatform warning pip install --upgrade ndg-httpsclient 2. bs4 pip install requests pip install bs4 """ import requests import csv from bs4 import BeautifulSoup # url to be used for package APP_LINK = "https://play.google.com/store/apps/details?id=" output_list = []; input_list = [] # get input file path print "Need input CSV file (absolute) path \nEnsure csv is of format: <package_name>, <id>\n\nEnter Path:" input_file_path = str(raw_input()) # store package names and ids in list of tuples with open(input_file_path, 'rb') as csvfile: for line in csvfile.readlines(): (p, i) = line.strip().split(',') input_list.append((p, i)) print "\n\nSit back and relax, this might take a while!\n\n" for package in input_list: # generate url, get html url = APP_LINK + package[0] r = requests.get(url) if not (r.status_code==404): data = r.text soup = BeautifulSoup(data, 'html.parser') # parse result x = ""; y = ""; try: x = soup.find('div', {'class': 'id-app-title'}) x = x.text except: print "Package name not found for: %s" %package[0] try: y = soup.find('span', {'itemprop': 'genre'}) y = y.text except: print "ID not found for: %s" %package[0] output_list.append([x,y]) else: print "App not found: %s" %package[0] # write to csv file with open('results.csv', 'w') as fp: a = csv.writer(fp, delimiter=",") a.writerows(output_list)
source share