Temperature anomaly detection 🔍

Tip

Solution to the following problem:

temperature-anomaly-detection.md

Result Set

Regardless of the database, the result set should look like:

site_id	reading_datetime	temperature	average_temperature	percentage_increase
1	2021-01-02 02:01:17	22.43	20.0525	11.8564
1	2021-01-04 21:45:34	22.69	20.2700	11.9388
2	2021-01-02 01:59:43	23.10	20.6050	12.1087

Expand for the DDL

solution(site_id, reading_datetime, temperature, average_temperature, percentage_increase) as (
    values
        (1, '2021-01-02 02:01:17'::timestamp, 22.43, 20.0525, 11.8564),
        (1, '2021-01-04 21:45:34'::timestamp, 22.69, 20.2700, 11.9388),
        (2, '2021-01-02 01:59:43'::timestamp, 23.10, 20.6050, 12.1087)
)

Solution

Some SQL solutions per database are provided below.

DuckDB

with temperatures as (
    select
        site_id,
        reading_datetime,
        temperature,
        avg(temperature) over rows_around_site_reading as average_temperature
    from readings
    window rows_around_site_reading as (
        partition by site_id
        order by reading_datetime
        rows between 2 preceding
                 and 2 following
             exclude current row
    )
    qualify 4 = count(*) over rows_around_site_reading
)

select
    site_id,
    reading_datetime,
    temperature,
    round(average_temperature, 4) as average_temperature,
    round(100.0 * (temperature - average_temperature) / average_temperature, 4) as percentage_increase
from temperatures
where percentage_increase > 10
order by
    site_id,
    reading_datetime

SQLite, PostgreSQL

with temperatures as (
    select
        site_id,
        reading_datetime,
        temperature,
        avg(temperature) over rows_around_site_reading as average_temperature,
        count(*) over rows_around_site_reading as count_of_rows
    from readings
    window rows_around_site_reading as (
        partition by site_id
        order by reading_datetime
        rows between 2 preceding
                 and 2 following
             exclude current row
    )
)

select
    site_id,
    reading_datetime,
    temperature,
    round(average_temperature, 4) as average_temperature,
    round(100.0 * (temperature - average_temperature) / average_temperature, 4) as percentage_increase
from temperatures
where 1=1
    and count_of_rows = 4
    and (temperature - average_temperature) / average_temperature > 0.1
order by
    site_id,
    reading_datetime

Snowflake

with temperatures as (
    select
        site_id,
        reading_datetime,
        temperature,
        sum(temperature) over (
            partition by site_id
            order by reading_datetime rows between 2 preceding and 2 following
        ) as sum_temps
    from readings
    qualify 5 = count(*) over (
        partition by site_id
        order by reading_datetime rows between 2 preceding and 2 following
    )
)

select
    * exclude (sum_temps),
    round((sum_temps - temperature) / 4, 4) as average_temperature,
    round(100 * (temperature - average_temperature) / temperature, 4) as percentage_increase
from temperatures
where percentage_increase > 10
order by
    site_id,
    reading_datetime

SQL Server

with temperatures as (
    select
        site_id,
        reading_datetime,
        temperature,
        (sum(temperature) over rows_around_site_reading - temperature) / 4 as average_temperature,
        count(*) over rows_around_site_reading as count_of_rows
    from readings
    window rows_around_site_reading as (
        partition by site_id
        order by reading_datetime rows between 2 preceding and 2 following
    )
)

select
    site_id,
    reading_datetime,
    temperature,
    round(average_temperature, 4) as average_temperature,
    round(100.0 * (temperature - average_temperature) / temperature, 4) as percentage_increase
from temperatures
where 1=1
    and count_of_rows = 5
    and (temperature - average_temperature) / temperature > 0.1
order by
    site_id,
    reading_datetime