Reliable slipping stock price tables

Problem:

My goal is to automate the clearing of the table with currency prices from this site stock prices . Since the stock broker does not provide an API, I have to find work around.

I have already searched for applications for this purpose, so as not to reinvent the wheel and waste time / money, but, unfortunately, I have not found any that will work with this site.

What I tried:

R is known for its simplicity and straightforward use. Let's look at the code, which is basically an example of copy-paste from a texbook:

library("rvest")
url <- "https://iqoption.com/en/historical-financial-quotes?active_id=1&tz_offset=120&date=2016-12-19-19-0"
population <- url %>%
  read_html() %>%
  html_nodes(xpath='//*[@id="mCSB_3_container"]/table') %>%
  html_table()
population
population <- population[[1]]

head(population)

Getting an empty table.

  1. JavaScript and casperJS

This option is by far the best, I can actually retrieve the data, but it is very slow and ultimately crashes "with exhausted memory":

var casper = require('casper').create({
  logLevel:'debug',
  verbose:true,
  loadImages: false,
  loadPlugins: false,
  webSecurityEnabled: false,
  userAgent: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.97 Safari/537.11"
});

var url = 'https://eu.iqoption.com/en/historical-financial-quotes?active_id=1&tz_offset=60&date=2016-12-19-21-0';
var length;
var fs = require('fs');
var sep = ';';
//var count = 0;
casper.start(url);

//date
var today = new Date();
var dd = today.getDate();
var mm = today.getMonth()+1; //January is 0!
var hh = today.getHours();  
var fff = today.getMilliseconds();  
var MM = today.getMinutes();

var yyyy = today.getFullYear();
if(dd<10){
    dd='0'+dd;
} 
if(mm<10){
    mm='0'+mm;
} 


var today = yyyy +'_'+mm + '_' +dd + '_'+ hh +'_'+ MM +'_'+ fff;
casper.echo(today);

function getCellContent(row, cell) {
    cellText = casper.evaluate(function(row, cell) {
        return document.querySelectorAll('table tbody tr')[row].childNodes[cell].innerText.trim();
    }, row, cell);
    return cellText;
}

function moveNext()
{
    var rows = casper.evaluate(function() {
        return document.querySelectorAll('table tbody tr');
    });
    length = rows.length;
    this.echo("table length: " + length);
};

//get 3 tables
for (var mins = 0; mins < 3; mins++)
{ 

    url = 'https://eu.iqoption.com/en/historical-financial-quotes?active_id=1&tz_offset=60&date=2016-12-19-21-' + mins;

    casper.echo(url);
    casper.thenOpen(url);
     casper.then(function() {
        this.waitForSelector('#mCSB_3_container table tbody tr');
    }); 

    casper.then(moveNext);

    casper.then(function() {
    for (var i = 0; i < length; i++) 
    {
        //this.echo("Date: " + getCellContent(i, 0));
        //this.echo("Bid: " + getCellContent(i, 1));
        //this.echo("Ask: " + getCellContent(i, 2));
        //this.echo("Quotes: " + getCellContent(i, 4));

        fs.write('prices_'+today+'.csv', getCellContent(i, 0) + sep + getCellContent(i, 1) + sep + getCellContent(i, 2) + sep + getCellContent(i, 4) + "\n", "a");
    }
    });  


}

casper.run(); 
this.echo("finished with processing");
  1. JavaScipt and PhantomJS

:

var webPage = require('webpage');
var page = webPage.create();

page.open('https://iqoption.com/en/historical-financial-quotes?active_id=1&tz_offset=120&date=2016-12-19-19-0', function(status) {

  var title = page.evaluate(function() {
    return document.querySelectorAll('table tbody tr');

  });
});
  1. Python BeautifulSoup

:

from bs4 import BeautifulSoup
from urllib2 import urlopen


url = "https://iqoption.com/en/historical-financial-quotes?active_id=1&tz_offset=120&date=2016-12-19-19-0"
soup = BeautifulSoup(urlopen(url), "lxml")

table = soup.findAll('table', attrs={ "class" : "quotes-table-result"})
print("table length is: "+ str(len(table)))
  1. Scrapy

"Scrapy Shell", .

  1. Pandas read_html()

Pandas :

ValueError: '. +'

:

import pandas as pd
import html5lib

f_states = pd.read_html("https://iqoption.com/en/historical-financial-quotes?active_id=1&tz_offset=120&date=2016-12-19-19-0")
print f_states

:

  • , , - HTML-?
  • - - ?

: , -, robots.txt, : Google-.

+4
2

, - XHR, .

, , (casperJS PhantomJS), , HTML- . , rvest urllib2 , JavaScript.

, , API, , " " " ":

. selenium pip:

pip install selenium

Chrome ( Firefox PhantomJS ). , , chromedriver Windows. , .

-, ( WebDriverWait ). pandas ( selenium, - , - casperJS):

import pandas as pd

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC


url = "https://iqoption.com/en/historical-financial-quotes?active_id=1&tz_offset=120&date=2016-12-19-19-0"

driver = webdriver.Chrome()
driver.maximize_window()
driver.get(url)

# wait for a table to load
wait = WebDriverWait(driver, 10)
wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "#mCSB_3_container table tbody tr")))

# read the page source and pass it to "pandas"
dfs = pd.read_html(driver.page_source)

# close the browser - we don't need it anymore, it job is done
driver.close()

print(dfs)  # dfs is a list of DataFrame instances

, HTML pandas - , HTML driver.page_source, . - BeautifulSoup lxml.html. .


, -, , -, - " " , "". txt ", / , " - " . :

+6

?

install.packages('quantmod')

...

> getSymbols("YHOO",src="google") # from google finance 
[1] "YHOO" 
> getSymbols("GOOG",src="yahoo") # from yahoo finance 
[1] "GOOG" 
> getSymbols("DEXJPUS",src="FRED") # FX rates from FRED 
[1] "DEXJPUS" 
> getSymbols("XPT/USD",src="Oanda") # Platinum from Oanda 
[1] "XPTUSD"

, , , . , ...

> # Specify lookup parameters, and save for future sessions. 
> 
> setSymbolLookup(YHOO='google',GOOG='yahoo') 
> setSymbolLookup(DEXJPUS='FRED') 
> setSymbolLookup(XPTUSD=list(name="XPT/USD",src="oanda")) 
> saveSymbolLookup(file="mysymbols.rda") 
> # new sessions call loadSymbolLookup(file="mysymbols.rda") 
> 
> getSymbols(c("YHOO","GOOG","DEXJPUS","XPTUSD")) 
[1] "YHOO" "GOOG" "DEXJPUS" "XPTUSD"

. .

http://www.quantmod.com/examples/intro/#data

-1

Source: https://habr.com/ru/post/1664996/


All Articles