Login on LinkedIn with python request sessions

I am trying to log in to LinkedIn using Python requests:

import sys import requests from BeautifulSoup import BeautifulSoup payload={ 'session-key' : ' user@email.com ', 'session-password' : 'password' } URL='https://www.linkedin.com/uas/login-submit' s=requests.session() s.post(URL,data=payload) r=s.get('http://www.linkedin.com/nhome') soup = BeautifulSoup(r.text) print soup.find('title') 

I cannot log in using this method. I even tried playing with csrf etc. In a payload, but not sessions that should take care of this for you?

Last line note: I use the header to check if I have successfully logged in. (I should see "Welcome!" LinkedIn, if I log in, instead I see the "World largest professional network LinkedIn"

Am I missing something? Thanks in advance!

+6
source share
3 answers

I changed the web scraping template that I use for most of my Python-based approaches to fit your needs. Confirmed that he worked with my own login information.

The way it works is to simulate a browser and maintain a cookieJar that stores your user session. Got it to work with BeautifulSoup for you.

Note. . This is the version of Python2. I have added a working Python3 example below on request.

 import cookielib import os import urllib import urllib2 import re import string from BeautifulSoup import BeautifulSoup username = " user@email.com " password = "password" cookie_filename = "parser.cookies.txt" class LinkedInParser(object): def __init__(self, login, password): """ Start up... """ self.login = login self.password = password # Simulate browser with cookies enabled self.cj = cookielib.MozillaCookieJar(cookie_filename) if os.access(cookie_filename, os.F_OK): self.cj.load() self.opener = urllib2.build_opener( urllib2.HTTPRedirectHandler(), urllib2.HTTPHandler(debuglevel=0), urllib2.HTTPSHandler(debuglevel=0), urllib2.HTTPCookieProcessor(self.cj) ) self.opener.addheaders = [ ('User-agent', ('Mozilla/4.0 (compatible; MSIE 6.0; ' 'Windows NT 5.2; .NET CLR 1.1.4322)')) ] # Login self.loginPage() title = self.loadTitle() print title self.cj.save() def loadPage(self, url, data=None): """ Utility function to load HTML from URLs for us with hack to continue despite 404 """ # We'll print the url in case of infinite loop # print "Loading URL: %s" % url try: if data is not None: response = self.opener.open(url, data) else: response = self.opener.open(url) return ''.join(response.readlines()) except: # If URL doesn't load for ANY reason, try again... # Quick and dirty solution for 404 returns because of network problems # However, this could infinite loop if there an actual problem return self.loadPage(url, data) def loginPage(self): """ Handle login. This should populate our cookie jar. """ html = self.loadPage("https://www.linkedin.com/") soup = BeautifulSoup(html) csrf = soup.find(id="loginCsrfParam-login")['value'] login_data = urllib.urlencode({ 'session_key': self.login, 'session_password': self.password, 'loginCsrfParam': csrf, }) html = self.loadPage("https://www.linkedin.com/uas/login-submit", login_data) return def loadTitle(self): html = self.loadPage("https://www.linkedin.com/feed/") soup = BeautifulSoup(html) return soup.find("title") parser = LinkedInParser(username, password) 

Update June 19, 2014: Added parsing for the CSRF token from the main page for use in the updated login process.

July 23, 2015 Patch. Adding a Python 3 example here. It basically requires replacing library locations and removing obsolete methods. It is not perfectly formatted or anything else, but it is functioning. Sorry for the rush. In the end, the principles and steps are identical.

 import http.cookiejar as cookielib import os import urllib import re import string from bs4 import BeautifulSoup username = " user@email.com " password = "password" cookie_filename = "parser.cookies.txt" class LinkedInParser(object): def __init__(self, login, password): """ Start up... """ self.login = login self.password = password # Simulate browser with cookies enabled self.cj = cookielib.MozillaCookieJar(cookie_filename) if os.access(cookie_filename, os.F_OK): self.cj.load() self.opener = urllib.request.build_opener( urllib.request.HTTPRedirectHandler(), urllib.request.HTTPHandler(debuglevel=0), urllib.request.HTTPSHandler(debuglevel=0), urllib.request.HTTPCookieProcessor(self.cj) ) self.opener.addheaders = [ ('User-agent', ('Mozilla/4.0 (compatible; MSIE 6.0; ' 'Windows NT 5.2; .NET CLR 1.1.4322)')) ] # Login self.loginPage() title = self.loadTitle() print(title) self.cj.save() def loadPage(self, url, data=None): """ Utility function to load HTML from URLs for us with hack to continue despite 404 """ # We'll print the url in case of infinite loop # print "Loading URL: %s" % url try: if data is not None: response = self.opener.open(url, data) else: response = self.opener.open(url) return ''.join([str(l) for l in response.readlines()]) except Exception as e: # If URL doesn't load for ANY reason, try again... # Quick and dirty solution for 404 returns because of network problems # However, this could infinite loop if there an actual problem return self.loadPage(url, data) def loadSoup(self, url, data=None): """ Combine loading of URL, HTML, and parsing with BeautifulSoup """ html = self.loadPage(url, data) soup = BeautifulSoup(html, "html5lib") return soup def loginPage(self): """ Handle login. This should populate our cookie jar. """ soup = self.loadSoup("https://www.linkedin.com/") csrf = soup.find(id="loginCsrfParam-login")['value'] login_data = urllib.parse.urlencode({ 'session_key': self.login, 'session_password': self.password, 'loginCsrfParam': csrf, }).encode('utf8') self.loadPage("https://www.linkedin.com/uas/login-submit", login_data) return def loadTitle(self): soup = self.loadSoup("https://www.linkedin.com/feed/") return soup.find("title") parser = LinkedInParser(username, password) 
+14
source

This is a much simpler version.

 import requests from bs4 import BeautifulSoup client = requests.Session() HOMEPAGE_URL = 'https://www.linkedin.com' LOGIN_URL = 'https://www.linkedin.com/uas/login-submit' html = client.get(HOMEPAGE_URL).content soup = BeautifulSoup(html) csrf = soup.find(id="loginCsrfParam-login")['value'] login_information = { 'session_key':'Login', 'session_password':'Password', 'loginCsrfParam': csrf, } client.post(LOGIN_URL, data=login_information) client.get('Any_Linkedin_URL') 
+10
source

The OP solution worked for me with only a very minor modification.

Change "session-key" to "session_key" and change "session-password" to session_password. ''

Other than that, the code is good as it stands.

+1
source

Source: https://habr.com/ru/post/954212/


All Articles