Window function to fill in gaps in the timeline

Data

session                             time_interval       activity
c889ddb532e76c961c2944dd90b10142    2017-05-25 20:16:40 walking
c889ddb532e76c961c2944dd90b10142    2017-05-25 20:16:41 (null)
c889ddb532e76c961c2944dd90b10142    2017-05-25 20:16:42 (null)
c889ddb532e76c961c2944dd90b10142    2017-05-25 20:16:43 (null)
c889ddb532e76c961c2944dd90b10142    2017-05-25 20:16:44 walking
c889ddb532e76c961c2944dd90b10142    2017-05-25 20:16:45 (null)
c889ddb532e76c961c2944dd90b10142    2017-05-25 20:16:46 running
c889ddb532e76c961c2944dd90b10142    2017-05-25 20:16:47 (null)
c889ddb532e76c961c2944dd90b10142    2017-05-25 20:16:48 (null)
c889ddb532e76c961c2944dd90b10142    2017-05-25 20:16:49 (null)
dddjg894hlog8sdlf2090288fmma201c    2017-05-25 20:16:50 walking
dddjg894hlog8sdlf2090288fmma201c    2017-05-25 20:16:51 (null)
dddjg894hlog8sdlf2090288fmma201c    2017-05-25 20:16:52 (null)
dddjg894hlog8sdlf2090288fmma201c    2017-05-25 20:16:53 (null)
dddjg894hlog8sdlf2090288fmma201c    2017-05-25 20:16:54 running
dddjg894hlog8sdlf2090288fmma201c    2017-05-25 20:16:55 (null)
dddjg894hlog8sdlf2090288fmma201c    2017-05-25 20:16:56 (null)
dddjg894hlog8sdlf2090288fmma201c    2017-05-25 20:16:57 (null)
dddjg894hlog8sdlf2090288fmma201c    2017-05-25 20:16:58 resting
dddjg894hlog8sdlf2090288fmma201c    2017-05-25 20:16:59 (null)
dddjg894hlog8sdlf2090288fmma201c    2017-05-25 20:17:00 (null)
dddjg894hlog8sdlf2090288fmma201c    2017-05-25 20:17:01 (null)
dddjg894hlog8sdlf2090288fmma201c    2017-05-25 20:17:02 walking

SQL

SELECT session, 
       time_interval, 
       activity, 
       FIRST_VALUE(activity) 
         OVER ( 
           PARTITION BY session 
           ORDER BY time_interval 
               RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS activity_b
FROM   my_table; 

But this takes the first meaning of the session. How can I get the previous value for every second?

Desired Result

session                             time_interval       activity
c889ddb532e76c961c2944dd90b10142    2017-05-25 20:16:40 walking
c889ddb532e76c961c2944dd90b10142    2017-05-25 20:16:41 walking
c889ddb532e76c961c2944dd90b10142    2017-05-25 20:16:42 walking
c889ddb532e76c961c2944dd90b10142    2017-05-25 20:16:43 walking
c889ddb532e76c961c2944dd90b10142    2017-05-25 20:16:44 walking
c889ddb532e76c961c2944dd90b10142    2017-05-25 20:16:45 walking
c889ddb532e76c961c2944dd90b10142    2017-05-25 20:16:46 running
c889ddb532e76c961c2944dd90b10142    2017-05-25 20:16:47 running
c889ddb532e76c961c2944dd90b10142    2017-05-25 20:16:48 running
c889ddb532e76c961c2944dd90b10142    2017-05-25 20:16:49 running
dddjg894hlog8sdlf2090288fmma201c    2017-05-25 20:16:50 walking
dddjg894hlog8sdlf2090288fmma201c    2017-05-25 20:16:51 walking
dddjg894hlog8sdlf2090288fmma201c    2017-05-25 20:16:52 walking
dddjg894hlog8sdlf2090288fmma201c    2017-05-25 20:16:53 walking
dddjg894hlog8sdlf2090288fmma201c    2017-05-25 20:16:54 running
dddjg894hlog8sdlf2090288fmma201c    2017-05-25 20:16:55 running
dddjg894hlog8sdlf2090288fmma201c    2017-05-25 20:16:56 running
dddjg894hlog8sdlf2090288fmma201c    2017-05-25 20:16:57 running
dddjg894hlog8sdlf2090288fmma201c    2017-05-25 20:16:58 resting
dddjg894hlog8sdlf2090288fmma201c    2017-05-25 20:16:59 resting
dddjg894hlog8sdlf2090288fmma201c    2017-05-25 20:17:00 resting
dddjg894hlog8sdlf2090288fmma201c    2017-05-25 20:17:01 resting
dddjg894hlog8sdlf2090288fmma201c    2017-05-25 20:17:02 walking

SQL Fiddle has capacity, so here are some DDL

CREATE TABLE public.my_table (
  session varchar(32),
  time_interval timestamp,
  activity varchar(10));

INSERT INTO public.my_table VALUES ('c889ddb532e76c961c2944dd90b10142','2017-05-25 20:16:40','walking');
INSERT INTO public.my_table VALUES ('c889ddb532e76c961c2944dd90b10142','2017-05-25 20:16:41','');
INSERT INTO public.my_table VALUES ('c889ddb532e76c961c2944dd90b10142','2017-05-25 20:16:42','');
INSERT INTO public.my_table VALUES ('c889ddb532e76c961c2944dd90b10142','2017-05-25 20:16:43','');
INSERT INTO public.my_table VALUES ('c889ddb532e76c961c2944dd90b10142','2017-05-25 20:16:44','walking');
INSERT INTO public.my_table VALUES ('c889ddb532e76c961c2944dd90b10142','2017-05-25 20:16:45','');
INSERT INTO public.my_table VALUES ('c889ddb532e76c961c2944dd90b10142','2017-05-25 20:16:46','running');
INSERT INTO public.my_table VALUES ('c889ddb532e76c961c2944dd90b10142','2017-05-25 20:16:47','');
INSERT INTO public.my_table VALUES ('c889ddb532e76c961c2944dd90b10142','2017-05-25 20:16:48','');
INSERT INTO public.my_table VALUES ('c889ddb532e76c961c2944dd90b10142','2017-05-25 20:16:49','');
INSERT INTO public.my_table VALUES ('dddjg894hlog8sdlf2090288fmma201c','2017-05-25 20:16:50','walking');
INSERT INTO public.my_table VALUES ('dddjg894hlog8sdlf2090288fmma201c','2017-05-25 20:16:51','');
INSERT INTO public.my_table VALUES ('dddjg894hlog8sdlf2090288fmma201c','2017-05-25 20:16:52','');
INSERT INTO public.my_table VALUES ('dddjg894hlog8sdlf2090288fmma201c','2017-05-25 20:16:53','');
INSERT INTO public.my_table VALUES ('dddjg894hlog8sdlf2090288fmma201c','2017-05-25 20:16:54','running');
INSERT INTO public.my_table VALUES ('dddjg894hlog8sdlf2090288fmma201c','2017-05-25 20:16:55','');
INSERT INTO public.my_table VALUES ('dddjg894hlog8sdlf2090288fmma201c','2017-05-25 20:16:56','');
INSERT INTO public.my_table VALUES ('dddjg894hlog8sdlf2090288fmma201c','2017-05-25 20:16:57','');
INSERT INTO public.my_table VALUES ('dddjg894hlog8sdlf2090288fmma201c','2017-05-25 20:16:58','resting');
INSERT INTO public.my_table VALUES ('dddjg894hlog8sdlf2090288fmma201c','2017-05-25 20:16:59','');
INSERT INTO public.my_table VALUES ('dddjg894hlog8sdlf2090288fmma201c','2017-05-25 20:17:00','');
INSERT INTO public.my_table VALUES ('dddjg894hlog8sdlf2090288fmma201c','2017-05-25 20:17:01','');
INSERT INTO public.my_table VALUES ('dddjg894hlog8sdlf2090288fmma201c','2017-05-25 20:17:02','resting');
+4
source share
2 answers

This is really where you want the option ignore null. But this is not available. Thus, one method uses maximum scanning and join:

select t.session, t.time_interval, tt.activity
from (select t.*,
              max(case when t.activity is not null then t.time_interval end) over (partition by t.session order by t.time_interval) as value_ti
      from t
     ) t left join
     t tt
     on t.value_ti = tt.time_interval and t.session = tt.session;

This calculates the most recent time interval for each row when there was no value NULL. Then he comes back to get activity at this time.

, 3 NULL, lag():

select t.session, t.time_interval,
       coalesce(t.activity,
                lag(t.activity, 1) over (partition by t.session order by t.time_interval),
                lag(t.activity, 2) over (partition by t.session order by t.time_interval),
                lag(t.activity, 3) over (partition by t.session order by t.time_interval)
               ) as acctivity
from t;
+1

: activity is null , activity is '' . '' .

" ", . , activity != ''. FIRST_VALUE .

SELECT 
  "session"
, time_interval
, FIRST_VALUE(activity) OVER (PARTITION BY "session", sub_session) activity
FROM (
  SELECT 
      *
    , SUM(CASE WHEN activity != '' THEN 1 ELSE 0 END) OVER (PARTITION BY "session" ORDER BY time_interval) sub_session
  FROM my_table
) sub
0

Source: https://habr.com/ru/post/1677954/


All Articles