Skip to content

Temperature anomaly detection 🔍

Tip

Solution to the following problem:

Result Set

Regardless of the database, the result set should look like:

site_id reading_datetime temperature average_temperature percentage_increase
1 2021-01-02 02:01:17 22.43 20.0525 11.8564
1 2021-01-04 21:45:34 22.69 20.2700 11.9388
2 2021-01-02 01:59:43 23.10 20.6050 12.1087
Expand for the DDL
solution(site_id, reading_datetime, temperature, average_temperature, percentage_increase) as (
    values
        (1, '2021-01-02 02:01:17'::timestamp, 22.43, 20.0525, 11.8564),
        (1, '2021-01-04 21:45:34'::timestamp, 22.69, 20.2700, 11.9388),
        (2, '2021-01-02 01:59:43'::timestamp, 23.10, 20.6050, 12.1087)
)

Solution

Some SQL solutions per database are provided below.

DuckDB

with temperatures as (
    select
        site_id,
        reading_datetime,
        temperature,
        avg(temperature) over rows_around_site_reading as average_temperature
    from readings
    window rows_around_site_reading as (
        partition by site_id
        order by reading_datetime
        rows between 2 preceding
                 and 2 following
             exclude current row
    )
    qualify 4 = count(*) over rows_around_site_reading
)

select
    site_id,
    reading_datetime,
    temperature,
    round(average_temperature, 4) as average_temperature,
    round(100.0 * (temperature - average_temperature) / average_temperature, 4) as percentage_increase
from temperatures
where percentage_increase > 10
order by
    site_id,
    reading_datetime

SQLite, PostgreSQL

with temperatures as (
    select
        site_id,
        reading_datetime,
        temperature,
        avg(temperature) over rows_around_site_reading as average_temperature,
        count(*) over rows_around_site_reading as count_of_rows
    from readings
    window rows_around_site_reading as (
        partition by site_id
        order by reading_datetime
        rows between 2 preceding
                 and 2 following
             exclude current row
    )
)

select
    site_id,
    reading_datetime,
    temperature,
    round(average_temperature, 4) as average_temperature,
    round(100.0 * (temperature - average_temperature) / average_temperature, 4) as percentage_increase
from temperatures
where 1=1
    and count_of_rows = 4
    and (temperature - average_temperature) / average_temperature > 0.1
order by
    site_id,
    reading_datetime

Snowflake

with temperatures as (
    select
        site_id,
        reading_datetime,
        temperature,
        sum(temperature) over (
            partition by site_id
            order by reading_datetime rows between 2 preceding and 2 following
        ) as sum_temps
    from readings
    qualify 5 = count(*) over (
        partition by site_id
        order by reading_datetime rows between 2 preceding and 2 following
    )
)

select
    * exclude (sum_temps),
    round((sum_temps - temperature) / 4, 4) as average_temperature,
    round(100 * (temperature - average_temperature) / temperature, 4) as percentage_increase
from temperatures
where percentage_increase > 10
order by
    site_id,
    reading_datetime

SQL Server

with temperatures as (
    select
        site_id,
        reading_datetime,
        temperature,
        (sum(temperature) over rows_around_site_reading - temperature) / 4 as average_temperature,
        count(*) over rows_around_site_reading as count_of_rows
    from readings
    window rows_around_site_reading as (
        partition by site_id
        order by reading_datetime rows between 2 preceding and 2 following
    )
)

select
    site_id,
    reading_datetime,
    temperature,
    round(average_temperature, 4) as average_temperature,
    round(100.0 * (temperature - average_temperature) / temperature, 4) as percentage_increase
from temperatures
where 1=1
    and count_of_rows = 5
    and (temperature - average_temperature) / temperature > 0.1
order by
    site_id,
    reading_datetime