Skip to content

Encoding datelist ints 🔐

Tip

Solution to the following problem:

Result Set

Regardless of the database, the result set should look like:

user_id last_update activity_history
1 2024-02-01 2684356096
2 2024-02-01 940442496
3 2024-02-01 204672192
Expand for the DDL
solution(user_id, last_update, activity_history) as (
    values
        (1, '2024-02-01'::date, 2684356096),
        (2, '2024-02-01'::date,  940442496),
        (3, '2024-02-01'::date,  204672192)
)

Solution

Some SQL solutions per database are provided below.

DuckDB

with

session_datetimes as (
    select
        event_id,
        user_id,
        event_datetime as login_datetime,
        coalesce(
            (
                select min(event_datetime)
                from events as innr
                where 1=1
                    and events.user_id = innr.user_id
                    and innr.event_datetime > events.event_datetime
                    and innr.event_datetime <= events.event_datetime + interval '1 day'
                    and innr.event_type = 'logout'
            ),
            events.event_datetime + interval '1 day'
        ) as logout_datetime,
    from events
    where event_type = 'login'
),

event_groups as (
    select
        *,
        sum(is_new_session::int) over (order by login_datetime) as session_id
    from (
        select
            *,
            login_datetime >= lag(logout_datetime, 1, login_datetime) over (
                partition by user_id
                order by login_datetime
            ) as is_new_session
        from session_datetimes
    )
),

sessions as (
    select
        user_id,
        min(login_datetime)::date as login_date,
        max(logout_datetime)::date as logout_date
    from event_groups
    group by user_id, session_id
),

dates(active_date) as (
    select unnest(generate_series(
        (select min(event_datetime)::date from events),
        (select max(event_datetime)::date from events),
        interval '1 day'
    ))
),

activity(user_id, active_date, is_active) as (
    select
        users.user_id,
        dates.active_date::date as active_date,
        exists(
            select *
            from sessions
            where 1=1
                and users.user_id = sessions.user_id
                and dates.active_date between sessions.login_date
                                          and sessions.logout_date
        )::int as is_active,
        row_number() over (
            partition by users.user_id
            order by dates.active_date desc
        ) as step
    from (select distinct user_id from sessions) as users
        cross join dates
)

select
    user_id,
    max(active_date) as last_update,
    sum(is_active * power(2, step - 1)) as activity_history
from activity
group by user_id
order by user_id