import pandas as pd
from statsmodels.tsa.seasonal import STL
import matplotlib.pyplot as plt
from datetime import datetime
import os
data_folder = '../data/'

3: Seasonal-Trend Decomposition using LOESS (STL)#

Read the Data#

ice_cream_interest = pd.read_csv(os.path.join(data_folder, 'ice_cream_interest.csv'))
ice_cream_interest['month'] = pd.to_datetime(ice_cream_interest.month)
ice_cream_interest.set_index('month', inplace=True)
plt.figure(figsize=(10,4))
plt.plot(ice_cream_interest)
[<matplotlib.lines.Line2D at 0x7faa9cc4a580>]
../../../_images/cb1a003154d416993d75e6315d38b7b5941107c05a333fa47595cbdef66af8bd.png
plt.figure(figsize=(10,4))
plt.plot(ice_cream_interest)
for year in range(2004,2021):
    plt.axvline(datetime(year,1,1), color='k', linestyle='--', alpha=0.5)
../../../_images/4fb5c236ed04915f1d392447b70212b05d90a47a1aa71a36ee5055e384a51694.png

Visual Inspection: Mid-2011 and Late-2016#

Perform STL Decomp#

stl = STL(ice_cream_interest)
result = stl.fit()
seasonal, trend, resid = result.seasonal, result.trend, result.resid
plt.figure(figsize=(8,6))

plt.subplot(4,1,1)
plt.plot(ice_cream_interest)
plt.title('Original Series', fontsize=16)

plt.subplot(4,1,2)
plt.plot(trend)
plt.title('Trend', fontsize=16)

plt.subplot(4,1,3)
plt.plot(seasonal)
plt.title('Seasonal', fontsize=16)

plt.subplot(4,1,4)
plt.plot(resid)
plt.title('Residual', fontsize=16)

plt.tight_layout()
../../../_images/eeeed2276866d7d06865660cf6b0fd8ab797168069675432d51a1cd43b010781.png
estimated = trend + seasonal
plt.figure(figsize=(12,4))
plt.plot(ice_cream_interest)
plt.plot(estimated)
[<matplotlib.lines.Line2D at 0x7faa9cb6a310>]
../../../_images/32a57675172fb57b388a30c00a1be3209f7aece1b3ea909252727f50a0e56439.png

Anomaly Detection#

resid_mu = resid.mean()
resid_dev = resid.std()

lower = resid_mu - 3*resid_dev
upper = resid_mu + 3*resid_dev
plt.figure(figsize=(10,4))
plt.plot(resid)

plt.fill_between([datetime(2003,1,1), datetime(2021,8,1)], lower, upper, color='g', alpha=0.25, linestyle='--', linewidth=2)
plt.xlim(datetime(2003,9,1), datetime(2020,12,1))
(12296.0, 18597.0)
../../../_images/4fed803325a67af87efe5174098fd837ec90aa15e3d6c6892e1c23596345c0aa.png
anomalies = ice_cream_interest[(resid < lower) | (resid > upper)]
plt.figure(figsize=(10,4))
plt.plot(ice_cream_interest)
for year in range(2004,2021):
    plt.axvline(datetime(year,1,1), color='k', linestyle='--', alpha=0.5)
    
plt.scatter(anomalies.index, anomalies.interest, color='r', marker='D')
<matplotlib.collections.PathCollection at 0x7faa9d44e190>
../../../_images/6d1e689e19fcfdad27e26f26dc6b55a70b45954402cae80ca2738ee7836ddc3a.png
anomalies
interest
month
2011-04-01 45
2015-12-01 25
2016-12-01 66