Skip to content

Funnel analytics ⏬

Tip

Solution to the following problem:

Result Set

Regardless of the database, the result set should look like:

cohort stage mortgages step_rate total_rate
2024-01 full application 4 100.00 100.00
2024-01 decision 4 100.00 100.00
2024-01 documentation 3 75.00 75.00
2024-01 valuation inspection 3 100.00 75.00
2024-01 valuation made 3 100.00 75.00
2024-01 valuation submitted 3 100.00 75.00
2024-01 solicitation 1 33.33 25.00
2024-01 funds released 1 100.00 25.00
2024-02 full application 6 100.00 100.00
2024-02 decision 6 100.00 100.00
2024-02 documentation 4 66.67 66.67
2024-02 valuation inspection 4 100.00 66.67
2024-02 valuation made 4 100.00 66.67
2024-02 valuation submitted 4 100.00 66.67
2024-02 solicitation 3 75.00 50.00
2024-02 funds released 3 100.00 50.00
2024-03 full application 3 100.00 100.00
2024-03 decision 3 100.00 100.00
2024-03 documentation 1 33.33 33.33
2024-03 valuation inspection 1 100.00 33.33
2024-03 valuation made 1 100.00 33.33
2024-03 valuation submitted 1 100.00 33.33
2024-03 solicitation 0 0.00 0.00
2024-03 funds released 0 0.00 0.00
Expand for the DDL
solution(cohort, stage, mortgages, step_rate, total_rate) as (
    values
        ('2024-01', 'full application',     4, 100.00, 100.00),
        ('2024-01', 'decision',             4, 100.00, 100.00),
        ('2024-01', 'documentation',        3,  75.00,  75.00),
        ('2024-01', 'valuation inspection', 3, 100.00,  75.00),
        ('2024-01', 'valuation made',       3, 100.00,  75.00),
        ('2024-01', 'valuation submitted',  3, 100.00,  75.00),
        ('2024-01', 'solicitation',         1,  33.33,  25.00),
        ('2024-01', 'funds released',       1, 100.00,  25.00),
        ('2024-02', 'full application',     6, 100.00, 100.00),
        ('2024-02', 'decision',             6, 100.00, 100.00),
        ('2024-02', 'documentation',        4,  66.67,  66.67),
        ('2024-02', 'valuation inspection', 4, 100.00,  66.67),
        ('2024-02', 'valuation made',       4, 100.00,  66.67),
        ('2024-02', 'valuation submitted',  4, 100.00,  66.67),
        ('2024-02', 'solicitation',         3,  75.00,  50.00),
        ('2024-02', 'funds released',       3, 100.00,  50.00),
        ('2024-03', 'full application',     3, 100.00, 100.00),
        ('2024-03', 'decision',             3, 100.00, 100.00),
        ('2024-03', 'documentation',        1,  33.33,  33.33),
        ('2024-03', 'valuation inspection', 1, 100.00,  33.33),
        ('2024-03', 'valuation made',       1, 100.00,  33.33),
        ('2024-03', 'valuation submitted',  1, 100.00,  33.33),
        ('2024-03', 'solicitation',         0,   0.00,   0.00),
        ('2024-03', 'funds released',       0,   0.00,   0.00)
)

Solution

Some SQL solutions per database are provided below.

DuckDB

with

stages(stage, step) as (
    values
        ('full application',     1),
        ('decision',             2),
        ('documentation',        3),
        ('valuation inspection', 4),
        ('valuation made',       5),
        ('valuation submitted',  6),
        ('solicitation',         7),
        ('funds released',       8)
),

cohorts as (
    select
        applications.event_id,
        applications.event_date,
        applications.mortgage_id,
        applications.stage,
        stages.step,
        datetrunc('month', first_value(applications.event_date) over (
            partition by applications.mortgage_id
            order by stages.step
        )) as cohort
    from applications
        inner join stages
            using (stage)
),

cohorts_by_stage as (
    select
        cohort,
        stage,
        any_value(step) as step,
        count(*) as mortgages
    from cohorts
    group by
        cohort,
        stage
),

axis as (
    select
        cohort,
        stages.stage,
        stages.step
    from (select distinct cohort from cohorts_by_stage)
        cross join stages
),

funnel as (
    select
        axis.cohort,
        axis.stage,
        axis.step,
        coalesce(cohorts_by_stage.mortgages, 0) as mortgages,
    from axis
        left join cohorts_by_stage
            using (cohort, step)
)

select
    strftime(cohort, '%Y-%m') as cohort,
    stage,
    mortgages,
    round(100.0 * coalesce(mortgages / lag(mortgages, 1, mortgages) over cohort_by_step, 0), 2) as step_rate,
    round(100.0 * mortgages / first_value(mortgages) over cohort_by_step, 2) as total_rate,
from funnel
window cohort_by_step as (
    partition by cohort
    order by step
)
order by
    cohort,
    step