import pandas as pd
from statsmodels.tsa.seasonal import STL
import matplotlib.pyplot as plt
from datetime import datetime
import os
data_folder = '../data/'
3: Seasonal-Trend Decomposition using LOESS (STL)#
Read the Data#
ice_cream_interest = pd.read_csv(os.path.join(data_folder, 'ice_cream_interest.csv'))
ice_cream_interest['month'] = pd.to_datetime(ice_cream_interest.month)
ice_cream_interest.set_index('month', inplace=True)
plt.figure(figsize=(10,4))
plt.plot(ice_cream_interest)
[<matplotlib.lines.Line2D at 0x7faa9cc4a580>]
data:image/s3,"s3://crabby-images/38610/38610d2842f8366faf6dcb12bdde005be203d727" alt="../../../_images/cb1a003154d416993d75e6315d38b7b5941107c05a333fa47595cbdef66af8bd.png"
plt.figure(figsize=(10,4))
plt.plot(ice_cream_interest)
for year in range(2004,2021):
plt.axvline(datetime(year,1,1), color='k', linestyle='--', alpha=0.5)
data:image/s3,"s3://crabby-images/95681/95681fe658db6c2d918382a9fe3ebb8cfc7d7441" alt="../../../_images/4fb5c236ed04915f1d392447b70212b05d90a47a1aa71a36ee5055e384a51694.png"
Visual Inspection: Mid-2011 and Late-2016#
Perform STL Decomp#
stl = STL(ice_cream_interest)
result = stl.fit()
seasonal, trend, resid = result.seasonal, result.trend, result.resid
plt.figure(figsize=(8,6))
plt.subplot(4,1,1)
plt.plot(ice_cream_interest)
plt.title('Original Series', fontsize=16)
plt.subplot(4,1,2)
plt.plot(trend)
plt.title('Trend', fontsize=16)
plt.subplot(4,1,3)
plt.plot(seasonal)
plt.title('Seasonal', fontsize=16)
plt.subplot(4,1,4)
plt.plot(resid)
plt.title('Residual', fontsize=16)
plt.tight_layout()
data:image/s3,"s3://crabby-images/0af25/0af25aa9fc2a68a9c6df6a68234a65d37222a929" alt="../../../_images/eeeed2276866d7d06865660cf6b0fd8ab797168069675432d51a1cd43b010781.png"
estimated = trend + seasonal
plt.figure(figsize=(12,4))
plt.plot(ice_cream_interest)
plt.plot(estimated)
[<matplotlib.lines.Line2D at 0x7faa9cb6a310>]
data:image/s3,"s3://crabby-images/08c0b/08c0ba14ec980a772a226fa7fc5c8ae8d18363fe" alt="../../../_images/32a57675172fb57b388a30c00a1be3209f7aece1b3ea909252727f50a0e56439.png"
Anomaly Detection#
resid_mu = resid.mean()
resid_dev = resid.std()
lower = resid_mu - 3*resid_dev
upper = resid_mu + 3*resid_dev
plt.figure(figsize=(10,4))
plt.plot(resid)
plt.fill_between([datetime(2003,1,1), datetime(2021,8,1)], lower, upper, color='g', alpha=0.25, linestyle='--', linewidth=2)
plt.xlim(datetime(2003,9,1), datetime(2020,12,1))
(12296.0, 18597.0)
data:image/s3,"s3://crabby-images/41a44/41a4459ef1e729261b341e0b13c820c56d481188" alt="../../../_images/4fed803325a67af87efe5174098fd837ec90aa15e3d6c6892e1c23596345c0aa.png"
anomalies = ice_cream_interest[(resid < lower) | (resid > upper)]
plt.figure(figsize=(10,4))
plt.plot(ice_cream_interest)
for year in range(2004,2021):
plt.axvline(datetime(year,1,1), color='k', linestyle='--', alpha=0.5)
plt.scatter(anomalies.index, anomalies.interest, color='r', marker='D')
<matplotlib.collections.PathCollection at 0x7faa9d44e190>
data:image/s3,"s3://crabby-images/e00e9/e00e9816d66beed442313b84b09b62ee1fa31351" alt="../../../_images/6d1e689e19fcfdad27e26f26dc6b55a70b45954402cae80ca2738ee7836ddc3a.png"
anomalies
interest | |
---|---|
month | |
2011-04-01 | 45 |
2015-12-01 | 25 |
2016-12-01 | 66 |