How to select the last section in a BigQuery table?

I am trying to select data from the last section in a BigQuery table by date, but the query is still reading data from the entire table.

I tried (as far as I know, BigQuery does not support QUALIFY):

SELECT col FROM table WHERE _PARTITIONTIME = (
  SELECT pt FROM (
    SELECT pt, RANK() OVER(ORDER by pt DESC) as rnk FROM (
      SELECT _PARTITIONTIME AS pt FROM table GROUP BY 1)
    )
  )
  WHERE rnk = 1
);

But this does not work and reads all the lines.

SELECT col from table WHERE _PARTITIONTIME = TIMESTAMP('YYYY-MM-DD')

where 'YYYY-MM-DD'is the specific date that works.

However, I need to run this script in the future, but updating the table (s _PARTITIONTIME) is irregular. Is there a way to get data only from the last section in BigQuery?

+7
source share
6 answers

  October 2019 Update

- ( 2019 )

, , BigQuery

DECLARE max_date TIMESTAMP;
SET max_date = (
  SELECT MAX(_PARTITIONTIME) FROM project.dataset.partitioned_table');

SELECT * FROM 'project.dataset.partitioned_table'
WHERE _PARTITIONTIME = max_date;

, ..

, , OP Is there a way I can pull data only from the latest partition in BigQuery?, , , BQ , , . - Still something that easily to be addressed by having that logic scripted - first getting result of subquery and then use it in final query

SELECT * FROM [dataset.partitioned_table]
WHERE _PARTITIONTIME IN (
  SELECT MAX(TIMESTAMP(partition_id))
  FROM [dataset.partitioned_table$__PARTITIONS_SUMMARY__]
)  

SELECT * FROM [dataset.partitioned_table]
WHERE _PARTITIONTIME IN (
  SELECT MAX(_PARTITIONTIME) 
  FROM [dataset.partitioned_table]
)  
+1

, , Google, , .

, , :

, (, ), .

, , . .

, . , _PARTITIONTIME , , , , :

SELECT * FROM [dataset.partitioned_table]
    WHERE _PARTITIONDATE = DATE_SUB(CURRENT_DATE(), INTERVAL 1 DAY)

, , . INTERVAL 0 DAY, , , 0 , .

, , !

+5

:

#standardSQL
SELECT
  _PARTITIONTIME as pt
FROM
  `[DATASET].[TABLE]`
GROUP BY 1

.

:)

https://cloud.google.com/bigquery/docs/querying-partitioned-tables

+1

. , . , , , :

  1. ( - 1 ). .
  2. , .

:

WITH last_three_partitions as (select *, _PARTITIONTIME as PARTITIONTIME 
    FROM dataset.partitioned_table 
    WHERE  _PARTITIONTIME > TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 3 DAY))
SELECT col1, PARTITIONTIME from last_three_partitions 
WHERE PARTITIONTIME = (SELECT max(PARTITIONTIME) from last_three_partitions)
+1

__TABLES__ , ~ 3 . split ordinal - .

_PARTITIONTIME _TABLE_SUFFIX.

select * from 'project.dataset.tablePrefix*' 
where _PARTITIONTIME = (
    SELECT split(table_id,'tablePrefix')[ordinal(2)] FROM 'project.dataset.__TABLES__' 
    where table_id like 'tablePrefix%'
    order by table_id desc limit 1)
0

, , ( ):

( ):

SELECT MAX(views)
FROM 'fh-bigquery.wikipedia_v3.pageviews_2019' 
WHERE DATE(datehour) = DATE_SUB(CURRENT_DATE(), INTERVAL 7 DAY)     
AND wiki='es' 
# 122.2 MB processed

, - :

SELECT MAX(views)
FROM 'fh-bigquery.wikipedia_v3.pageviews_2019' 
WHERE DATE(datehour) = (SELECT DATE(MAX(datehour)) FROM 'fh-bigquery.wikipedia_v3.pageviews_2019' WHERE wiki='es')     
AND wiki='es'
# 50.6 GB processed

... 50,6

- , 2 :

max_date = (SELECT DATE(MAX(datehour)) FROM 'fh-bigquery.wikipedia_v3.pageviews_2019' WHERE wiki='es')   

;

SELECT MAX(views)
FROM 'fh-bigquery.wikipedia_v3.pageviews_2019' 
WHERE DATE(datehour) = {{max_date}}
AND wiki='es'
# 115.2 MB processed

BigQuery https://issuetracker.google.com/issues/36955074.

0

Source: https://habr.com/ru/post/1656006/


All Articles