Editor: see the following answer , linear regression is now natively supported in BigQuery. --Fh
, . Y = SLOPE * X + INTERCEPT , CORR.
. , , , . , .
SELECT Bucket,
SLOPE,
(SUM_OF_Y - SLOPE * SUM_OF_X) / N AS INTERCEPT,
CORRELATION
FROM (
SELECT Bucket,
N,
SUM_OF_X,
SUM_OF_Y,
CORRELATION * STDDEV_OF_Y / STDDEV_OF_X AS SLOPE,
CORRELATION
FROM (
SELECT Bucket,
COUNT(*) AS N,
SUM(X) AS SUM_OF_X,
SUM(Y) AS SUM_OF_Y,
STDDEV_POP(X) AS STDDEV_OF_X,
STDDEV_POP(Y) AS STDDEV_OF_Y,
CORR(X,Y) AS CORRELATION
FROM (SELECT state AS Bucket,
gestation_weeks AS X,
weight_pounds AS Y
FROM [publicdata.samples.natality])
WHERE Bucket IS NOT NULL AND
X IS NOT NULL AND
Y IS NOT NULL
GROUP BY Bucket));
STDDEV_POP CORR X Y, , , , .