Funnel analytics ⏬
Result Set
Regardless of the database, the result set should look like:
cohort | stage | mortgages | step_rate | total_rate |
---|---|---|---|---|
2024-01 | full application | 4 | 100.00 | 100.00 |
2024-01 | decision | 4 | 100.00 | 100.00 |
2024-01 | documentation | 3 | 75.00 | 75.00 |
2024-01 | valuation inspection | 3 | 100.00 | 75.00 |
2024-01 | valuation made | 3 | 100.00 | 75.00 |
2024-01 | valuation submitted | 3 | 100.00 | 75.00 |
2024-01 | solicitation | 1 | 33.33 | 25.00 |
2024-01 | funds released | 1 | 100.00 | 25.00 |
2024-02 | full application | 6 | 100.00 | 100.00 |
2024-02 | decision | 6 | 100.00 | 100.00 |
2024-02 | documentation | 4 | 66.67 | 66.67 |
2024-02 | valuation inspection | 4 | 100.00 | 66.67 |
2024-02 | valuation made | 4 | 100.00 | 66.67 |
2024-02 | valuation submitted | 4 | 100.00 | 66.67 |
2024-02 | solicitation | 3 | 75.00 | 50.00 |
2024-02 | funds released | 3 | 100.00 | 50.00 |
2024-03 | full application | 3 | 100.00 | 100.00 |
2024-03 | decision | 3 | 100.00 | 100.00 |
2024-03 | documentation | 1 | 33.33 | 33.33 |
2024-03 | valuation inspection | 1 | 100.00 | 33.33 |
2024-03 | valuation made | 1 | 100.00 | 33.33 |
2024-03 | valuation submitted | 1 | 100.00 | 33.33 |
2024-03 | solicitation | 0 | 0.00 | 0.00 |
2024-03 | funds released | 0 | 0.00 | 0.00 |
Expand for the DDL
solution(cohort, stage, mortgages, step_rate, total_rate) as (
values
('2024-01', 'full application', 4, 100.00, 100.00),
('2024-01', 'decision', 4, 100.00, 100.00),
('2024-01', 'documentation', 3, 75.00, 75.00),
('2024-01', 'valuation inspection', 3, 100.00, 75.00),
('2024-01', 'valuation made', 3, 100.00, 75.00),
('2024-01', 'valuation submitted', 3, 100.00, 75.00),
('2024-01', 'solicitation', 1, 33.33, 25.00),
('2024-01', 'funds released', 1, 100.00, 25.00),
('2024-02', 'full application', 6, 100.00, 100.00),
('2024-02', 'decision', 6, 100.00, 100.00),
('2024-02', 'documentation', 4, 66.67, 66.67),
('2024-02', 'valuation inspection', 4, 100.00, 66.67),
('2024-02', 'valuation made', 4, 100.00, 66.67),
('2024-02', 'valuation submitted', 4, 100.00, 66.67),
('2024-02', 'solicitation', 3, 75.00, 50.00),
('2024-02', 'funds released', 3, 100.00, 50.00),
('2024-03', 'full application', 3, 100.00, 100.00),
('2024-03', 'decision', 3, 100.00, 100.00),
('2024-03', 'documentation', 1, 33.33, 33.33),
('2024-03', 'valuation inspection', 1, 100.00, 33.33),
('2024-03', 'valuation made', 1, 100.00, 33.33),
('2024-03', 'valuation submitted', 1, 100.00, 33.33),
('2024-03', 'solicitation', 0, 0.00, 0.00),
('2024-03', 'funds released', 0, 0.00, 0.00)
)
Solution
Some SQL solutions per database are provided below.
DuckDB
with
stages(stage, step) as (
values
('full application', 1),
('decision', 2),
('documentation', 3),
('valuation inspection', 4),
('valuation made', 5),
('valuation submitted', 6),
('solicitation', 7),
('funds released', 8),
),
cohorts as (
select
applications.event_id,
applications.event_date,
applications.mortgage_id,
applications.stage,
stages.step,
first_value(applications.event_date) over (
partition by applications.mortgage_id
order by stages.step
).strftime('%Y-%m') as cohort,
from applications
inner join stages
using (stage)
),
cohorts_by_stage as (
select
cohort,
stage,
any_value(step) as step,
count(*) as cohort_mortgages,
from cohorts
group by
cohort,
stage
),
axis as (
select
cohort,
stages.stage,
stages.step
from (select distinct cohort from cohorts_by_stage)
cross join stages
),
funnel as (
select
axis.cohort,
axis.stage,
axis.step,
coalesce(cohorts_by_stage.cohort_mortgages, 0) as mortgages,
lag(mortgages, 1, mortgages) over cohort_by_step as prev_mortgages,
first_value(mortgages) over cohort_by_step as first_mortgages,
from axis
left join cohorts_by_stage
using (cohort, step)
window cohort_by_step as (
partition by axis.cohort
order by axis.step
)
)
select
cohort,
stage,
mortgages,
round(100.0 * if(prev_mortgages = 0, 0, mortgages / prev_mortgages), 2) as step_rate,
round(100.0 * if(first_mortgages = 0, 0, mortgages / first_mortgages), 2) as total_rate,
from funnel
order by
cohort,
step