import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime
from datetime import timedelta
from pandas.plotting import register_matplotlib_converters
from statsmodels.tsa.stattools import acf, pacf
from statsmodels.tsa.statespace.sarimax import SARIMAX
register_matplotlib_converters()
from time import time

11: Anomaly Detection#

def parser(s):
    return datetime.strptime(s, '%Y-%m-%d')
#read data
catfish_sales = pd.read_csv('../data/catfish.csv', parse_dates=[0], index_col=0, squeeze=True, date_parser=parser)
/tmp/ipykernel_3777558/2935818357.py:2: FutureWarning: The squeeze argument has been deprecated and will be removed in a future version. Append .squeeze("columns") to the call to squeeze.


  catfish_sales = pd.read_csv('../data/catfish.csv', parse_dates=[0], index_col=0, squeeze=True, date_parser=parser)
#infer the frequency of the data
catfish_sales = catfish_sales.asfreq(pd.infer_freq(catfish_sales.index))
start_date = datetime(1996,1,1)
end_date = datetime(2000,1,1)
lim_catfish_sales = catfish_sales[start_date:end_date]
#At December 1 1998
lim_catfish_sales[datetime(1998,12,1)] = 10000
plt.figure(figsize=(10,4))
plt.plot(lim_catfish_sales)
plt.title('Catfish Sales in 1000s of Pounds', fontsize=20)
plt.ylabel('Sales', fontsize=16)
for year in range(start_date.year,end_date.year):
    plt.axvline(pd.to_datetime(str(year)+'-01-01'), color='k', linestyle='--', alpha=0.2)
../../../_images/17cb280ff6ef0f1e289519c116400b826b1f64c728606046b35172f3ec27cdef.png

Remove the trend#

first_diff = lim_catfish_sales.diff()[1:]
plt.figure(figsize=(10,4))
plt.plot(first_diff)
plt.title('Catfish Sales in 1000s of Pounds', fontsize=20)
plt.ylabel('Sales', fontsize=16)
for year in range(start_date.year,end_date.year):
    plt.axvline(pd.to_datetime(str(year)+'-01-01'), color='k', linestyle='--', alpha=0.2)
plt.axhline(0, color='k', linestyle='--', alpha=0.2)
<matplotlib.lines.Line2D at 0x7f10b1cedca0>
../../../_images/8c74e38bcbf984da91b8f0a0987d2ed709a752315377b8344eea467804833480.png

Get training and testing sets#

train_end = datetime(1999,7,1)
test_end = datetime(2000,1,1)

test_data = lim_catfish_sales[train_end + timedelta(days=1):test_end]

Make Predictions#

my_order = (0,1,0)
my_seasonal_order = (1, 0, 1, 12)
rolling_predictions = test_data.copy()
for train_end in test_data.index:
    train_data = lim_catfish_sales[:train_end-timedelta(days=1)]
    model = SARIMAX(train_data, order=my_order, seasonal_order=my_seasonal_order)
    model_fit = model.fit()

    pred = model_fit.forecast()
    rolling_predictions[train_end] = pred
/home/ubuntu/Documents/Projects/STI_FX_Intervention/.venv/lib/python3.9/site-packages/statsmodels/tsa/statespace/sarimax.py:997: UserWarning: Non-stationary starting seasonal autoregressive Using zeros as starting parameters.
  warn('Non-stationary starting seasonal autoregressive'
/home/ubuntu/Documents/Projects/STI_FX_Intervention/.venv/lib/python3.9/site-packages/statsmodels/tsa/statespace/sarimax.py:1009: UserWarning: Non-invertible starting seasonal moving average Using zeros as starting parameters.
  warn('Non-invertible starting seasonal moving average'
 This problem is unconstrained.
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            3     M =           10

At X0         0 variables are exactly at the bounds

At iterate    0    f=  1.00865D+01    |proj g|=  1.88677D+00

At iterate    5    f=  9.42806D+00    |proj g|=  1.15193D-01

At iterate   10    f=  9.37825D+00    |proj g|=  3.57802D-03

At iterate   15    f=  9.37017D+00    |proj g|=  1.52764D-02

At iterate   20    f=  9.25763D+00    |proj g|=  1.18506D-01

At iterate   25    f=  9.21700D+00    |proj g|=  9.28828D-04

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

   N    Tit     Tnf  Tnint  Skip  Nact     Projg        F
    3     27     29      1     0     0   9.042D-08   9.217D+00
  F =   9.2169957488805672     

CONVERGENCE: NORM_OF_PROJECTED_GRADIENT_<=_PGTOL            
/home/ubuntu/Documents/Projects/STI_FX_Intervention/.venv/lib/python3.9/site-packages/statsmodels/tsa/statespace/sarimax.py:1009: UserWarning: Non-invertible starting seasonal moving average Using zeros as starting parameters.
  warn('Non-invertible starting seasonal moving average'
 This problem is unconstrained.
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            3     M =           10

At X0         0 variables are exactly at the bounds

At iterate    0    f=  1.08193D+01    |proj g|=  5.52980D-01

At iterate    5    f=  9.46761D+00    |proj g|=  9.12352D-02

At iterate   10    f=  9.40117D+00    |proj g|=  4.02561D-02

At iterate   15    f=  9.36519D+00    |proj g|=  2.66287D-02
  ys=-2.680E-02  -gs= 6.684E-03 BFGS update SKIPPED
  ys=-3.133E-06  -gs= 2.118E-07 BFGS update SKIPPED
 Bad direction in the line search;
   refresh the lbfgs memory and restart the iteration.
At iterate   20    f=  9.35244D+00    |proj g|=  6.45941D-03

At iterate   25    f=  9.35242D+00    |proj g|=  5.26224D-03
 Bad direction in the line search;
   refresh the lbfgs memory and restart the iteration.

 Bad direction in the line search;
   refresh the lbfgs memory and restart the iteration.
At iterate   30    f=  9.35237D+00    |proj g|=  7.37719D-03

At iterate   35    f=  9.35225D+00    |proj g|=  1.23386D-02
  ys=-1.927E-04  -gs= 3.436E-05 BFGS update SKIPPED

At iterate   40    f=  9.35219D+00    |proj g|=  1.73730D-02
  ys=-2.328E-07  -gs= 1.077E-07 BFGS update SKIPPED
 Bad direction in the line search;
   refresh the lbfgs memory and restart the iteration.

 Warning:  more than 10 function and gradient
   evaluations in the last line search.  Termination
   may possibly be caused by a bad search direction.
/home/ubuntu/Documents/Projects/STI_FX_Intervention/.venv/lib/python3.9/site-packages/statsmodels/tsa/statespace/sarimax.py:997: UserWarning: Non-stationary starting seasonal autoregressive Using zeros as starting parameters.
  warn('Non-stationary starting seasonal autoregressive'
/home/ubuntu/Documents/Projects/STI_FX_Intervention/.venv/lib/python3.9/site-packages/statsmodels/tsa/statespace/sarimax.py:1009: UserWarning: Non-invertible starting seasonal moving average Using zeros as starting parameters.
  warn('Non-invertible starting seasonal moving average'
 This problem is unconstrained.
At iterate   45    f=  9.35217D+00    |proj g|=  1.62073D-03

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

   N    Tit     Tnf  Tnint  Skip  Nact     Projg        F
    3     45    228      5     4     0   1.621D-03   9.352D+00
  F =   9.3521737365165123     

CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH             
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            3     M =           10

At X0         0 variables are exactly at the bounds

At iterate    0    f=  1.02169D+01    |proj g|=  2.05161D+00

At iterate    5    f=  9.45797D+00    |proj g|=  6.72771D-02

At iterate   10    f=  9.40808D+00    |proj g|=  2.42390D-03

At iterate   15    f=  9.39350D+00    |proj g|=  2.13080D-02

At iterate   20    f=  9.20360D+00    |proj g|=  1.32847D-02

At iterate   25    f=  9.20276D+00    |proj g|=  2.92847D-05

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

   N    Tit     Tnf  Tnint  Skip  Nact     Projg        F
    3     26     29      1     0     0   2.867D-05   9.203D+00
  F =   9.2027590959410581     

CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH             
/home/ubuntu/Documents/Projects/STI_FX_Intervention/.venv/lib/python3.9/site-packages/statsmodels/tsa/statespace/sarimax.py:1009: UserWarning: Non-invertible starting seasonal moving average Using zeros as starting parameters.
  warn('Non-invertible starting seasonal moving average'
 This problem is unconstrained.

 Warning:  more than 10 function and gradient
   evaluations in the last line search.  Termination
   may possibly be caused by a bad search direction.
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            3     M =           10

At X0         0 variables are exactly at the bounds

At iterate    0    f=  9.74977D+00    |proj g|=  6.76205D-01

At iterate    5    f=  9.42227D+00    |proj g|=  3.14046D-01

At iterate   10    f=  9.24662D+00    |proj g|=  7.58416D-02

At iterate   15    f=  9.20068D+00    |proj g|=  2.17069D-02

At iterate   20    f=  9.17745D+00    |proj g|=  5.55019D-02
  ys=-1.070E-01  -gs= 1.840E-02 BFGS update SKIPPED

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

   N    Tit     Tnf  Tnint  Skip  Nact     Projg        F
    3     21     41      1     1     0   5.597D-02   9.177D+00
  F =   9.1774487580915114     

CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH             
/home/ubuntu/Documents/Projects/STI_FX_Intervention/.venv/lib/python3.9/site-packages/statsmodels/tsa/statespace/sarimax.py:1009: UserWarning: Non-invertible starting seasonal moving average Using zeros as starting parameters.
  warn('Non-invertible starting seasonal moving average'
 This problem is unconstrained.
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            3     M =           10

At X0         0 variables are exactly at the bounds

At iterate    0    f=  1.06607D+01    |proj g|=  5.90030D-01

At iterate    5    f=  9.41178D+00    |proj g|=  1.06608D-01

At iterate   10    f=  9.34059D+00    |proj g|=  4.62385D-02

At iterate   15    f=  9.30427D+00    |proj g|=  5.71632D-02
  ys=-1.872E-01  -gs= 1.328E-02 BFGS update SKIPPED
 Bad direction in the line search;
   refresh the lbfgs memory and restart the iteration.
/home/ubuntu/Documents/Projects/STI_FX_Intervention/.venv/lib/python3.9/site-packages/statsmodels/tsa/statespace/sarimax.py:1009: UserWarning: Non-invertible starting seasonal moving average Using zeros as starting parameters.
  warn('Non-invertible starting seasonal moving average'
 This problem is unconstrained.
At iterate   20    f=  9.29452D+00    |proj g|=  4.89056D-03

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

   N    Tit     Tnf  Tnint  Skip  Nact     Projg        F
    3     21     72      2     1     0   5.177D-03   9.295D+00
  F =   9.2945184761749271     

CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH             
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            3     M =           10

At X0         0 variables are exactly at the bounds

At iterate    0    f=  9.44886D+00    |proj g|=  6.73274D-01

At iterate    5    f=  9.32563D+00    |proj g|=  5.75231D-01

At iterate   10    f=  9.10124D+00    |proj g|=  7.51289D-02

At iterate   15    f=  9.05847D+00    |proj g|=  1.32918D-02

At iterate   20    f=  9.04952D+00    |proj g|=  9.02023D-03

At iterate   25    f=  9.04658D+00    |proj g|=  1.69952D-03

At iterate   30    f=  9.04650D+00    |proj g|=  5.39719D-04

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

   N    Tit     Tnf  Tnint  Skip  Nact     Projg        F
    3     32     49      1     0     0   5.630D-03   9.046D+00
  F =   9.0464650017000530     

CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH             
rolling_residuals = test_data - rolling_predictions
plt.figure(figsize=(10,4))
plt.plot(rolling_residuals)
plt.axhline(0, linestyle='--', color='k')
plt.title('Rolling Forecast Residuals from SARIMA Model', fontsize=20)
plt.ylabel('Error', fontsize=16)
Text(0, 0.5, 'Error')
../../../_images/913489a9d4a77139c79aca0a7e5b1da85b0a02d6ff19eb7f4be9b7b0f760d109.png
plt.figure(figsize=(10,4))

plt.plot(lim_catfish_sales)
plt.plot(rolling_predictions)

plt.legend(('Data', 'Predictions'), fontsize=16)

plt.title('Catfish Sales in 1000s of Pounds', fontsize=20)
plt.ylabel('Production', fontsize=16)
for year in range(start_date.year,end_date.year):
    plt.axvline(pd.to_datetime(str(year)+'-01-01'), color='k', linestyle='--', alpha=0.2)
../../../_images/0de452ab1e88ab00140854afdfd8e53f1a14e144558496501232356b2095044c.png
print('Mean Absolute Percent Error:', round(np.mean(abs(rolling_residuals/test_data)),4))
Mean Absolute Percent Error: 0.0815
print('Root Mean Squared Error:', np.sqrt(np.mean(rolling_residuals**2)))
Root Mean Squared Error: 2447.4759538453186

Detecting the Anomaly#

Attempt 1: Deviation Method#

plt.figure(figsize=(10,4))
plt.plot(lim_catfish_sales)
plt.title('Catfish Sales in 1000s of Pounds', fontsize=20)
plt.ylabel('Sales', fontsize=16)
for year in range(start_date.year,end_date.year):
    plt.axvline(pd.to_datetime(str(year)+'-01-01'), color='k', linestyle='--', alpha=0.2)
../../../_images/17cb280ff6ef0f1e289519c116400b826b1f64c728606046b35172f3ec27cdef.png
rolling_deviations = pd.Series(dtype=float, index = lim_catfish_sales.index)
for date in rolling_deviations.index:
    #get the window ending at this data point
    window = lim_catfish_sales.loc[:date]
    
    #get the deviation within this window
    rolling_deviations.loc[date] = window.std()
#get the difference in deviation between one time point and the next
diff_rolling_deviations = rolling_deviations.diff()
diff_rolling_deviations = diff_rolling_deviations.dropna()
plt.figure(figsize=(10,4))
plt.plot(diff_rolling_deviations)
plt.title('Deviation Differences', fontsize=20)
plt.ylabel('Sales', fontsize=16)
for year in range(start_date.year,end_date.year):
    plt.axvline(pd.to_datetime(str(year)+'-01-01'), color='k', linestyle='--', alpha=0.2)
../../../_images/948587e5a9e55a7e9b0db7ba18a7ed733f153b8f2d2292989de9defa585f72c8.png

Attempt 2: Seasonal Method#

month_deviations = lim_catfish_sales.groupby(lambda d: d.month).std()
plt.figure(figsize=(10,4))
plt.plot(month_deviations)
plt.title('Deviation by Month', fontsize=20)
plt.ylabel('Sales', fontsize=16)
Text(0, 0.5, 'Sales')
../../../_images/2ca9c2cc03efc6de2c15368741aaa2c4521548b546c99a274a28127fab6a12d3.png

So, the anomaly occurs in a December#

december_data = lim_catfish_sales[lim_catfish_sales.index.month == 12]
december_data
Date
1996-12-01    16898
1997-12-01    18278
1998-12-01    10000
1999-12-01    22372
Name: Total, dtype: int64
min_dev = 9999999
curr_anomaly = None
for date in december_data.index:
    other_data = december_data[december_data.index != date]
    curr_dev = other_data.std()
    if curr_dev < min_dev:
        min_dev = curr_dev
        curr_anomaly = date
curr_anomaly
Timestamp('1998-12-01 00:00:00')

What to do about the anomaly?#

Simple Idea: use mean of other months#

adjusted_data = lim_catfish_sales.copy()
adjusted_data.loc[curr_anomaly] = december_data[(december_data.index != curr_anomaly) & (december_data.index < test_data.index[0])].mean()
plt.figure(figsize=(10,4))
plt.plot(lim_catfish_sales, color='firebrick', alpha=0.4)
plt.plot(adjusted_data)
plt.title('Catfish Sales in 1000s of Pounds', fontsize=20)
plt.ylabel('Sales', fontsize=16)
for year in range(start_date.year,end_date.year):
    plt.axvline(pd.to_datetime(str(year)+'-01-01'), color='k', linestyle='--', alpha=0.2)
plt.axvline(curr_anomaly, color='k', alpha=0.7)
<matplotlib.lines.Line2D at 0x7f10b1a3cd30>
../../../_images/1e5f388e3616b81d606b1fd0ae0508b76858fcd544aad76b1853966a005907e3.png

Resulting Predictions#

train_end = datetime(1999,7,1)
test_end = datetime(2000,1,1)

test_data = adjusted_data[train_end + timedelta(days=1):test_end]
rolling_predictions = test_data.copy()
for train_end in test_data.index:
    train_data = adjusted_data[:train_end-timedelta(days=1)]
    model = SARIMAX(train_data, order=my_order, seasonal_order=my_seasonal_order)
    model_fit = model.fit()

    pred = model_fit.forecast()
    rolling_predictions[train_end] = pred
/home/ubuntu/Documents/Projects/STI_FX_Intervention/.venv/lib/python3.9/site-packages/statsmodels/tsa/statespace/sarimax.py:997: UserWarning: Non-stationary starting seasonal autoregressive Using zeros as starting parameters.
  warn('Non-stationary starting seasonal autoregressive'
 This problem is unconstrained.
/home/ubuntu/Documents/Projects/STI_FX_Intervention/.venv/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
  warnings.warn("Maximum Likelihood optimization failed to "
/home/ubuntu/Documents/Projects/STI_FX_Intervention/.venv/lib/python3.9/site-packages/statsmodels/tsa/statespace/sarimax.py:997: UserWarning: Non-stationary starting seasonal autoregressive Using zeros as starting parameters.
  warn('Non-stationary starting seasonal autoregressive'
/home/ubuntu/Documents/Projects/STI_FX_Intervention/.venv/lib/python3.9/site-packages/statsmodels/tsa/statespace/sarimax.py:1009: UserWarning: Non-invertible starting seasonal moving average Using zeros as starting parameters.
  warn('Non-invertible starting seasonal moving average'
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            3     M =           10

At X0         0 variables are exactly at the bounds

At iterate    0    f=  9.02989D+00    |proj g|=  1.39448D+00

At iterate    5    f=  8.65274D+00    |proj g|=  4.92136D-01

At iterate   10    f=  8.58895D+00    |proj g|=  1.41914D-02

At iterate   15    f=  8.58843D+00    |proj g|=  1.95742D-04

At iterate   20    f=  8.58841D+00    |proj g|=  8.25947D-03

At iterate   25    f=  8.58344D+00    |proj g|=  6.37140D-02

At iterate   30    f=  8.52923D+00    |proj g|=  6.24736D-03

At iterate   35    f=  8.52907D+00    |proj g|=  1.02194D-03

At iterate   40    f=  8.52895D+00    |proj g|=  1.88970D-03

At iterate   45    f=  8.52886D+00    |proj g|=  1.44991D-03

At iterate   50    f=  8.52884D+00    |proj g|=  6.55285D-04

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

   N    Tit     Tnf  Tnint  Skip  Nact     Projg        F
    3     50     68      1     0     0   6.553D-04   8.529D+00
  F =   8.5288391511597741     

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT                 
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            3     M =           10

At X0         0 variables are exactly at the bounds

At iterate    0    f=  9.01339D+00    |proj g|=  1.34785D+00

At iterate    5    f=  8.64177D+00    |proj g|=  4.38112D-01

At iterate   10    f=  8.59046D+00    |proj g|=  1.04906D-02

At iterate   15    f=  8.59022D+00    |proj g|=  2.14865D-04

At iterate   20    f=  8.59019D+00    |proj g|=  8.95238D-03

At iterate   25    f=  8.58544D+00    |proj g|=  6.49231D-02

At iterate   30    f=  8.52891D+00    |proj g|=  6.86211D-04

At iterate   35    f=  8.52880D+00    |proj g|=  3.23347D-03

At iterate   40    f=  8.52871D+00    |proj g|=  9.21060D-04
 This problem is unconstrained.

 Bad direction in the line search;
   refresh the lbfgs memory and restart the iteration.
           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

   N    Tit     Tnf  Tnint  Skip  Nact     Projg        F
    3     44     74      2     0     0   8.741D-06   8.529D+00
  F =   8.5286758066920854     

CONVERGENCE: NORM_OF_PROJECTED_GRADIENT_<=_PGTOL            
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            3     M =           10

At X0         0 variables are exactly at the bounds

At iterate    0    f=  9.05490D+00    |proj g|=  1.51925D+00

At iterate    5    f=  8.64187D+00    |proj g|=  1.28743D-01

At iterate   10    f=  8.56327D+00    |proj g|=  2.44168D-02

At iterate   15    f=  8.56172D+00    |proj g|=  1.76571D-04

At iterate   20    f=  8.56171D+00    |proj g|=  1.23540D-03

At iterate   25    f=  8.56082D+00    |proj g|=  9.66500D-03

At iterate   30    f=  8.52254D+00    |proj g|=  1.02194D-02

At iterate   35    f=  8.52104D+00    |proj g|=  3.47220D-03

At iterate   40    f=  8.52086D+00    |proj g|=  5.34944D-03

At iterate   45    f=  8.52078D+00    |proj g|=  2.79134D-03

At iterate   50    f=  8.52075D+00    |proj g|=  7.59471D-04

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

   N    Tit     Tnf  Tnint  Skip  Nact     Projg        F
    3     50     62      1     0     0   7.595D-04   8.521D+00
  F =   8.5207537625163301     

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT                 
/home/ubuntu/Documents/Projects/STI_FX_Intervention/.venv/lib/python3.9/site-packages/statsmodels/tsa/statespace/sarimax.py:997: UserWarning: Non-stationary starting seasonal autoregressive Using zeros as starting parameters.
  warn('Non-stationary starting seasonal autoregressive'
/home/ubuntu/Documents/Projects/STI_FX_Intervention/.venv/lib/python3.9/site-packages/statsmodels/tsa/statespace/sarimax.py:1009: UserWarning: Non-invertible starting seasonal moving average Using zeros as starting parameters.
  warn('Non-invertible starting seasonal moving average'
 This problem is unconstrained.
/home/ubuntu/Documents/Projects/STI_FX_Intervention/.venv/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
  warnings.warn("Maximum Likelihood optimization failed to "
/home/ubuntu/Documents/Projects/STI_FX_Intervention/.venv/lib/python3.9/site-packages/statsmodels/tsa/statespace/sarimax.py:997: UserWarning: Non-stationary starting seasonal autoregressive Using zeros as starting parameters.
  warn('Non-stationary starting seasonal autoregressive'
/home/ubuntu/Documents/Projects/STI_FX_Intervention/.venv/lib/python3.9/site-packages/statsmodels/tsa/statespace/sarimax.py:1009: UserWarning: Non-invertible starting seasonal moving average Using zeros as starting parameters.
  warn('Non-invertible starting seasonal moving average'
 This problem is unconstrained.
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            3     M =           10

At X0         0 variables are exactly at the bounds

At iterate    0    f=  9.09568D+00    |proj g|=  1.65700D+00

At iterate    5    f=  8.69211D+00    |proj g|=  4.53682D-01

At iterate   10    f=  8.54756D+00    |proj g|=  3.40350D-02

At iterate   15    f=  8.54083D+00    |proj g|=  3.33430D-03

At iterate   20    f=  8.54082D+00    |proj g|=  1.58057D-04

At iterate   25    f=  8.54078D+00    |proj g|=  1.06184D-02

At iterate   30    f=  8.53674D+00    |proj g|=  1.18313D-01

At iterate   35    f=  8.51481D+00    |proj g|=  3.12128D-03

At iterate   40    f=  8.51245D+00    |proj g|=  3.27935D-03

At iterate   45    f=  8.51183D+00    |proj g|=  4.67688D-03

At iterate   50    f=  8.51160D+00    |proj g|=  6.58486D-03

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

   N    Tit     Tnf  Tnint  Skip  Nact     Projg        F
    3     50     58      1     0     0   6.585D-03   8.512D+00
  F =   8.5116027468333648     

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT                 
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            3     M =           10

At X0         0 variables are exactly at the bounds

At iterate    0    f=  9.11292D+00    |proj g|=  1.65876D+00
/home/ubuntu/Documents/Projects/STI_FX_Intervention/.venv/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
  warnings.warn("Maximum Likelihood optimization failed to "
/home/ubuntu/Documents/Projects/STI_FX_Intervention/.venv/lib/python3.9/site-packages/statsmodels/tsa/statespace/sarimax.py:997: UserWarning: Non-stationary starting seasonal autoregressive Using zeros as starting parameters.
  warn('Non-stationary starting seasonal autoregressive'
/home/ubuntu/Documents/Projects/STI_FX_Intervention/.venv/lib/python3.9/site-packages/statsmodels/tsa/statespace/sarimax.py:1009: UserWarning: Non-invertible starting seasonal moving average Using zeros as starting parameters.
  warn('Non-invertible starting seasonal moving average'
 This problem is unconstrained.
At iterate    5    f=  8.69850D+00    |proj g|=  5.10951D-01

At iterate   10    f=  8.54796D+00    |proj g|=  3.33515D-02

At iterate   15    f=  8.54222D+00    |proj g|=  3.24499D-03

At iterate   20    f=  8.54169D+00    |proj g|=  9.91802D-04

At iterate   25    f=  8.50994D+00    |proj g|=  3.59922D-02

At iterate   30    f=  8.50916D+00    |proj g|=  3.60959D-03

At iterate   35    f=  8.50902D+00    |proj g|=  2.80777D-03

At iterate   40    f=  8.50899D+00    |proj g|=  2.60936D-03

At iterate   45    f=  8.50896D+00    |proj g|=  1.05368D-03
  ys=-9.422E-07  -gs= 2.177E-06 BFGS update SKIPPED

At iterate   50    f=  8.50895D+00    |proj g|=  1.44994D-03

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

   N    Tit     Tnf  Tnint  Skip  Nact     Projg        F
    3     50     78      1     1     0   1.450D-03   8.509D+00
  F =   8.5089495859676330     

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT                 
RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            3     M =           10

At X0         0 variables are exactly at the bounds

At iterate    0    f=  8.72720D+00    |proj g|=  5.58769D-01

At iterate    5    f=  8.61908D+00    |proj g|=  1.33323D-01

At iterate   10    f=  8.59171D+00    |proj g|=  6.72396D-03

At iterate   15    f=  8.59153D+00    |proj g|=  3.52489D-04

At iterate   20    f=  8.59137D+00    |proj g|=  4.54451D-03
/home/ubuntu/Documents/Projects/STI_FX_Intervention/.venv/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
  warnings.warn("Maximum Likelihood optimization failed to "
/home/ubuntu/Documents/Projects/STI_FX_Intervention/.venv/lib/python3.9/site-packages/statsmodels/tsa/statespace/sarimax.py:1009: UserWarning: Non-invertible starting seasonal moving average Using zeros as starting parameters.
  warn('Non-invertible starting seasonal moving average'
 This problem is unconstrained.
At iterate   25    f=  8.56045D+00    |proj g|=  5.88962D-02

At iterate   30    f=  8.51572D+00    |proj g|=  9.39532D-04

At iterate   35    f=  8.51560D+00    |proj g|=  4.53458D-03

At iterate   40    f=  8.51555D+00    |proj g|=  1.48086D-04

At iterate   45    f=  8.51550D+00    |proj g|=  1.45504D-03

At iterate   50    f=  8.51549D+00    |proj g|=  5.86388D-04

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

   N    Tit     Tnf  Tnint  Skip  Nact     Projg        F
    3     50     85      1     0     0   5.864D-04   8.515D+00
  F =   8.5154939893926826     

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT                 
/home/ubuntu/Documents/Projects/STI_FX_Intervention/.venv/lib/python3.9/site-packages/statsmodels/base/model.py:604: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
  warnings.warn("Maximum Likelihood optimization failed to "
rolling_residuals = test_data - rolling_predictions
plt.figure(figsize=(10,4))
plt.plot(rolling_residuals)
plt.axhline(0, linestyle='--', color='k')
plt.title('Rolling Forecast Residuals from SARIMA Model', fontsize=20)
plt.ylabel('Error', fontsize=16)
Text(0, 0.5, 'Error')
../../../_images/8d11812c24ff74bb4b4f786c9e6a22d2b1343c6f714a12df47d8eaee6f6bf6a8.png
plt.figure(figsize=(10,4))

plt.plot(lim_catfish_sales)
plt.plot(rolling_predictions)

plt.legend(('Data', 'Predictions'), fontsize=16)

plt.title('Catfish Sales in 1000s of Pounds', fontsize=20)
plt.ylabel('Production', fontsize=16)
for year in range(start_date.year,end_date.year):
    plt.axvline(pd.to_datetime(str(year)+'-01-01'), color='k', linestyle='--', alpha=0.2)
../../../_images/45b8c12ca468c6937015ee685cacf7893e43dd9cb95a38e544013b17935fd230.png
print('Mean Absolute Percent Error:', round(np.mean(abs(rolling_residuals/test_data)),4))
Mean Absolute Percent Error: 0.0461
print('Root Mean Squared Error:', np.sqrt(np.mean(rolling_residuals**2)))
Root Mean Squared Error: 1327.4209125685188