I use Scrapy to clear the gold that is behind the authentication screen. The web site is used ASP.net, and ASP was some stupid hidden fields are filled in due form (for example __VIEWSTATE, __EVENTTARGET).
When I call FormRequest.from_response(response,..., I expect it to automatically read these hidden fields from the response and fill them in a dictionary formdata, which is the Scrapy FormRequest Documentation says that it should do this.
But if so, why does the login process only work when I explicitly list these fields and fill them out?
class ItsyBitsy(Spider):
name = "itsybitsy"
allowed_domains = ["website.com"]
start_urls = ["http://website.com/cpanel/Default.aspx"]
def parse(self, response):
sel = Selector(response)
return [FormRequest.from_response(response,
formdata={
'tb_Username':'admin',
'tb_Password':'password',
'__VIEWSTATE':
sel.xpath("//input[@name='__VIEWSTATE']/@value").extract(),
'__EVENTVALIDATION':
sel.xpath("//input[@name='__EVENTVALIDATION']/@value").extract(),
'__EVENTTARGET': 'b_Login'
},
callback=self.after_login,
clickdata={'id':'b_Login'},
dont_click=True)]
def after_login(self, response):
pass
Edit: add HTML form
<form id="form1" action="Default.aspx" method="post" name="form1">
<div>
<input type="hidden" value="" id="__EVENTTARGET" name="__EVENTTARGET">
<input type="hidden" value="" id="__EVENTARGUMENT" name="__EVENTARGUMENT">
<input type="hidden" value="/wEPDwULLTE2OTg2NjA1NTAPZBYCAgMPZBYGAgMPD2QWAh4Kb25rZXlwcmVzcwUlcmV0dXJuIGNsaWNrQnV0dG9uKGV2ZW50LCAnYl9Mb2dpbicpO2QCBQ8PZBYCHwAFJXJldHVybiBjbGlja0J1dHRvbihldmVudCwgJ2JfTG9naW4nKTtkAgcPD2QWAh4Hb25jbGljawUPcmV0dXJuIGxvZ2luKCk7ZGRKt/WTOQThVTxB9Y0QcIuRqylCIw==" id="__VIEWSTATE" name="__VIEWSTATE">
</div>
<script type="text/javascript">
var theForm = document.forms['form1'];
if (!theForm) {
theForm = document.form1;
}
function __doPostBack(eventTarget, eventArgument) {
if (!theForm.onsubmit || (theForm.onsubmit() != false)) {
theForm.__EVENTTARGET.value = eventTarget;
theForm.__EVENTARGUMENT.value = eventArgument;
theForm.submit();
}
}
</script>
<div>
<input type="hidden" value="/wEWBAK0o8DDCQLxz5rcDwLF8dCIDALHyYWSA+rA4VJNaEpFIycMDHQPUOz393TI" id="__EVENTVALIDATION" name="__EVENTVALIDATION">
<input type="text" onkeypress="return clickButton(event, 'b_Login');" size="28" class="textfield-text" id="tb_Username" name="tb_Username">
<input type="password" onkeypress="return clickButton(event, 'b_Login');" size="28" class="textfield-text" id="tb_Password" name="tb_Password">
<a href="javascript:__doPostBack('b_Login','')" class="button-link" id="b_Login" onclick="return login();">Login</a>
</form>