How to get Google images

I want to get Google images for any request. I looked at the google search file but couldn't figure it out. I also saw some methods, they extract images, but only on the first page. I used the following method.

function getGoogleImg ($ k)
{
    $ url = "http://images.google.it/images?as_q=##query##&hl=it&imgtbs=z&btnG=Cerca+con+Google&as_epq=&as_oq=&as_eq=&imgtype=&imgsz=m&imgw=&imgh=&imgar=&imgar = & imgc = & as_sitesearch = & as_rights = & safe = images & as_st = y ";
    $ web_page = file_get_contents (str_replace ("## query ##", urlencode ($ k), $ url));
    $ tieni = stristr ($ web_page, "dyn.setResults (");
    $ tieni = str_replace ("dyn.setResults (", "", str_replace (stristr ($ tieni, ");"), "", $ tieni));
    $ tieni = str_replace ("[]", "", $ tieni);
    $ m = preg_split ("/ [[\ [\]] /", $ tieni);
    $ x = array ();
    for ($ i = 0; $ i <count ($ m); $ i ++)
    {
        $ m [$ i] = str_replace ("/ imgres? imgurl \\ x3d", "", $ m [$ i]);
        $ m [$ i] = str_replace (stristr ($ m [$ i], "\\ x26imgrefurl"), "", $ m [$ i]);
        $ m [$ i] = preg_replace ("/ ^ \" / i "," ", $ m [$ i]);
        $ m [$ i] = preg_replace ("/ ^, / i", "", $ m [$ i]);
        if ($ m [$ i]! = "")
        array_push ($ x, $ m [$ i]);
   }
   return $ x;
}

This function returns only 21 images. I want all images against this request. I do it in php

+3
2

, API- , , , .

, 22 som ajax/javascript - ( , , ), javascript-, , , php (). , $web_page 21 ( Google, javascript )? , , javascript? , , ?

API https://developers.google.com/image-search/v1/jsondevguide#json_snippets_php

, http://simplehtmldom.sourceforge.net/ . html DOM . file_get_contents curl , javascript .

+1

script Google Search, 100

script stackoverflow

Python - Google ?

, URL- Google, urllib2 BeautifulSoup

, u 3 Google

query= "Terminator 3"
query=  '+'.join(query.split())  #this will make the query terminator+3
url="https://www.google.co.in/search?q="+query+"&source=lnms&tbm=isch"
header={'User-Agent':"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.134 Safari/537.36"
}
req = urllib2.Request(url,headers=header)
soup= urllib2.urlopen(req)
soup = BeautifulSoup(soup)

html- , , -

, URL

, Google "div", { "class": "rg_meta" },

- BeautifulSoup

print soup.find_all("div",{"class":"rg_meta"})

u

<div class="rg_meta">{"cl":3,"cr":3,"ct":12,"id":"C0s-rtOZqcJOvM:","isu":"emuparadise.me","itg":false,"ity":"jpg","oh":540,"ou":"http://199.101.98.242/media/images/66433-Terminator_3_The_Redemption-1.jpg","ow":960,"pt":"Terminator 3 The Redemption ISO \\u0026lt; GCN ISOs | Emuparadise","rid":"VJSwsesuO1s1UM","ru":"http://www.emuparadise.me/Nintendo_Gamecube_ISOs/Terminator_3_The_Redemption/66433","s":"Screenshot Thumbnail / Media File 1 for Terminator 3 The Redemption","th":168,"tu":"https://encrypted-tbn2.gstatic.com/images?q\\u003dtbn:ANd9GcRs8dp-ojc4BmP1PONsXlvscfIl58k9hpu6aWlGV_WwJ33A26jaIw","tw":300}</div>

url

http://199.101.98.242/media/images/66433-Terminator_3_The_Redemption-1.jpg

u

ActualImages=[]# contains the link for Large original images, type of  image
for a in soup.find_all("div",{"class":"rg_meta"}):
    link , Type =json.loads(a.text)["ou"]  ,json.loads(a.text)["ity"]
    ActualImages.append((link,Type))

for i , (img , Type) in enumerate( ActualImages):
    try:
        req = urllib2.Request(img, headers={'User-Agent' : header})
        raw_img = urllib2.urlopen(req).read()
        if not os.path.exists(DIR):
            os.mkdir(DIR)
        cntr = len([i for i in os.listdir(DIR) if image_type in i]) + 1
        print cntr
        if len(Type)==0:
            f = open(DIR + image_type + "_"+ str(cntr)+".jpg", 'wb')
        else :
            f = open(DIR + image_type + "_"+ str(cntr)+"."+Type, 'wb')


        f.write(raw_img)
        f.close()
    except Exception as e:
        print "could not load : "+img
        print e

voila script Google

script

https://gist.github.com/rishabhsixfeet/8ff479de9d19549d5c2d8bfc14af9b88

+1

Source: https://habr.com/ru/post/1791668/


All Articles