--- title: ARIMA keywords: fastai sidebar: home_sidebar nb_path: "nbs/arima.ipynb" ---
{% raw %}
{% endraw %} {% raw %}
{% endraw %} {% raw %}
arima(ap, order=(2, 1, 1), seasonal={'order': (0, 1, 0), 'period': 12}, 
      include_mean=False, method='CSS-ML')['coef']
{'ar1': 0.466880673428245,
 'ar2': 0.22506344134934328,
 'ma1': -1.0964437716813402}
{% endraw %} {% raw %}

predict_arima[source]

predict_arima(model, n_ahead, newxreg=None, se_fit=True)

{% endraw %} {% raw %}
{% endraw %} {% raw %}
predict_arima(res, 10)
(array([448.03641765, 423.6958796 , 453.37910036, 496.53844782,
        508.4585562 , 572.14906375, 659.67853114, 644.08113724,
        546.38826999, 499.62227626]),
 array([11.53128398, 13.1873165 , 14.74522214, 15.74954361, 16.54939395,
        17.18562958, 17.7205789 , 18.18296878, 18.59424176, 18.96803794]))
{% endraw %} {% raw %}
predict_arima(res_intercept, 10)
(array([470.0097129 , 440.51623786, 442.19261859, 430.30698345,
        425.09828469, 417.37177946, 411.26885683, 404.89014495,
        399.07414049, 393.42000495]),
 array([ 30.69910087,  51.7119296 ,  61.89382391,  71.067199  ,
         77.98072868,  83.94819971,  88.93457943,  93.2568996 ,
         96.9956194 , 100.27008344]))
{% endraw %} {% raw %}
newdrift = np.arange(ap.size + 1, ap.size + 10 + 1).reshape(-1, 1)
newxreg = np.concatenate([newdrift, np.sqrt(newdrift)], axis=1)
predict_arima(res_xreg, 10, newxreg=newxreg)
(array([441.89777076, 463.6701726 , 489.91157043, 513.32941052,
        528.76719128, 534.25597523, 530.97877135, 522.35811387,
        512.68413676, 505.7523218 ]),
 array([25.04154425, 31.85337615, 33.32711649, 33.34240568, 34.68173961,
        37.33840629, 39.6879995 , 40.7797975 , 40.92280492, 40.99296076]))
{% endraw %} {% raw %}
myarima(ap, order=(2, 1, 1), seasonal={'order': (0, 1, 0), 'period': 12}, 
        constant=False, ic='aicc', method='CSS-ML')['aic']
1020.8073918488345
{% endraw %} {% raw %}

arima_string[source]

arima_string(model, padding=False)

{% endraw %} {% raw %}
{% endraw %} {% raw %}
arima_string(res_Arima_ex)
'Regression with ARIMA(0,0,0) errors'
{% endraw %} {% raw %}
arima_string(res_Arima)
'ARIMA(0,0,0) with drift        '
{% endraw %} {% raw %}

forecast_arima[source]

forecast_arima(model, h=None, level=None, fan=False, xreg=None, blambda=None, bootstrap=False, npaths=5000, biasadj=None)

{% endraw %} {% raw %}
{% endraw %} {% raw %}

fitted_arima[source]

fitted_arima(model, h=1)

Returns h-step forecasts for the data used in fitting the model.

{% endraw %} {% raw %}
{% endraw %} {% raw %}

auto_arima_f[source]

auto_arima_f(x, d=None, D=None, max_p=5, max_q=5, max_P=2, max_Q=2, max_order=5, max_d=2, max_D=1, start_p=2, start_q=2, start_P=1, start_Q=1, stationary=False, seasonal=True, ic='aicc', stepwise=True, nmodels=94, trace=False, approximation=None, method=None, truncate=None, xreg=None, test='kpss', test_kwargs=None, seasonal_test='seas', seasonal_test_kwargs=None, allowdrift=True, allowmean=True, blambda=None, biasadj=False, parallel=False, num_cores=2, period=1)

{% endraw %} {% raw %}
{% endraw %} {% raw %}
mod = auto_arima_f(ap, period=12, method='CSS-ML', trace=True)
ARIMA(2,1,2)(1,1,1)[12]                   :inf

ARIMA(0,1,0)(0,1,0)[12]                   :1031.5393581671838

ARIMA(1,1,0)(1,1,0)[12]                   :1020.5919299313473

ARIMA(0,1,1)(0,1,1)[12]                   :1021.1978414716

ARIMA(1,1,0)(0,1,0)[12]                   :1020.4966626269295

ARIMA(1,1,0)(0,1,1)[12]                   :1021.1141377784866

ARIMA(1,1,0)(1,1,1)[12]                   :1022.6657138771359

ARIMA(2,1,0)(0,1,0)[12]                   :1022.5909032832317

ARIMA(1,1,1)(0,1,0)[12]                   :1022.5906051142277

ARIMA(0,1,1)(0,1,0)[12]                   :1020.7342476851575

ARIMA(2,1,1)(0,1,0)[12]                   :1021.1273918488346

ARIMA(2,1,1)(0,1,0)[12]                   :1021.1273918488346
Now re-fitting the best model(s) without approximations...


ARIMA(1,1,0)(0,1,0)[12]                   :1020.4966626269295
{% endraw %} {% raw %}

print_statsforecast_ARIMA(model, digits=3, se=True)

{% endraw %} {% raw %}
{% endraw %} {% raw %}
print_statsforecast_ARIMA(mod)
ARIMA(1,1,0)(0,1,0)[12]                   

Coefficients:
               ar1
coefficient -0.300
s.e.         0.007

sigma^2 = 139.156: log likelihood = -508.20

AIC=1020.4
{% endraw %} {% raw %}

class ARIMASummary[source]

ARIMASummary(model)

ARIMA Summary.

{% endraw %} {% raw %}
{% endraw %} {% raw %}

class AutoARIMA[source]

AutoARIMA(d:Optional[int]=None, D:Optional[int]=None, max_p:int=5, max_q:int=5, max_P:int=2, max_Q:int=2, max_order:int=5, max_d:int=2, max_D:int=1, start_p:int=2, start_q:int=2, start_P:int=1, start_Q:int=1, stationary:bool=False, seasonal:bool=True, ic:str='aicc', stepwise:bool=True, nmodels:int=94, trace:bool=False, approximation:Optional[bool]=None, method:Optional[str]=None, truncate:Optional[bool]=None, test:str='kpss', test_kwargs:Optional[str]=None, seasonal_test:str='seas', seasonal_test_kwargs:Optional[typing.Dict]=None, allowdrift:bool=True, allowmean:bool=True, blambda:Optional[float]=None, biasadj:bool=False, parallel:bool=False, num_cores:int=2, period:int=1)

An AutoARIMA estimator.

Returns best ARIMA model according to either AIC, AICc or BIC value. The function conducts a search over possible model within the order constraints provided.

Parameters

d: int optional (default None) Order of first-differencing. If missing, will choose a value based on test. D: int optional (default None) Order of seasonal-differencing. If missing, will choose a value based on season_test. max_p: int (default 5) Maximum value of p. max_q: int (default 5) Maximum value of q. max_P: int (default 2) Maximum value of P. max_Q: int (default 2) Maximum value of Q. max_order: int (default 5) Maximum value of p+q+P+Q if model selection is not stepwise. max_d: int (default 2) Maximum number of non-seasonal differences max_D: int (default 1) Maximum number of seasonal differences start_p: int (default 2) Starting value of p in stepwise procedure. start_q: int (default 2) Starting value of q in stepwise procedure. start_P: int (default 1) Starting value of P in stepwise procedure. start_Q: int (default 1) Starting value of Q in stepwise procedure. stationary: bool (default False) If True, restricts search to stationary models. seasonal: bool (default True) If False, restricts search to non-seasonal models. ic: str (default 'aicc') Information criterion to be used in model selection. stepwise: bool (default True) If True, will do stepwise selection (faster). Otherwise, it searches over all models. Non-stepwise selection can be very slow, especially for seasonal models. nmodels: int (default 94) Maximum number of models considered in the stepwise search. trace: bool (default False) If True, the list of ARIMA models considered will be reported. approximation: bool optional (default None) If True, estimation is via conditional sums of squares and the information criteria used for model selection are approximated. The final model is still computed using maximum likelihood estimation. Approximation should be used for long time series or a high seasonal period to avoid excessive computation times. method: str optional (default None) fitting method: maximum likelihood or minimize conditional sum-of-squares. The default (unless there are missing values) is to use conditional-sum-of-squares to find starting values, then maximum likelihood. Can be abbreviated. truncate: bool optional (default None) An integer value indicating how many observations to use in model selection. The last truncate values of the series are used to select a model when truncate is not None and approximation=True. All observations are used if either truncate=None or approximation=False. test: str (default 'kpss') Type of unit root test to use. See ndiffs for details. test_kwargs: str optional (default None) Additional arguments to be passed to the unit root test. seasonal_test: str (default 'seas') This determines which method is used to select the number of seasonal differences. The default method is to use a measure of seasonal strength computed from an STL decomposition. Other possibilities involve seasonal unit root tests. seasonal_test_kwargs: dict optional (default None) Additional arguments to be passed to the seasonal unit root test. See nsdiffs for details. allowdrift: bool (default True) If True, models with drift terms are considered. allowmean: bool (default True) If True, models with a non-zero mean are considered. blambda: float optional (default None) Box-Cox transformation parameter. If lambda="auto", then a transformation is automatically selected using BoxCox.lambda. The transformation is ignored if None. Otherwise, data transformed before model is estimated. biasadj: bool (default False) Use adjusted back-transformed mean for Box-Cox transformations. If transformed data is used to produce forecasts and fitted values, a regular back transformation will result in median forecasts. If biasadj is True, an adjustment will be made to produce mean forecasts and fitted values. parallel: bool (default False) If True and stepwise = False, then the specification search is done in parallel. This can give a significant speedup on multicore machines. num_cores: int (default 2) Allows the user to specify the amount of parallel processes to be used if parallel = True and stepwise = False. If None, then the number of logical cores is automatically detected and all available cores are used. period: int (default 1) Number of observations per unit of time. For example 24 for Hourly data.

Notes

  • This implementation is a mirror of Hyndman's forecast::auto.arima.

References

[1] https://github.com/robjhyndman/forecast

{% endraw %} {% raw %}
{% endraw %} {% raw %}
model = AutoARIMA()
{% endraw %} {% raw %}
model = model.fit(ap)
{% endraw %} {% raw %}
model.predict(h=7)
mean
0 464.631601
1 483.363282
2 490.196787
3 489.549975
4 485.653255
5 481.407362
6 478.248913
{% endraw %} {% raw %}
model.predict(h=7, level=80)
lo_80% mean hi_80%
0 426.326487 464.631601 502.936714
1 419.928952 483.363282 546.797612
2 411.105075 490.196787 569.288499
3 401.920876 489.549975 577.179074
4 393.609048 485.653255 577.697462
5 386.910310 481.407362 575.904414
6 382.073456 478.248913 574.424370
{% endraw %} {% raw %}
model.predict(h=7, level=(80, 90))
lo_80% lo_90% mean hi_80% hi_90%
0 426.326487 415.467520 464.631601 502.936714 513.795682
1 419.928952 401.946201 483.363282 546.797612 564.780363
2 411.105075 388.683674 490.196787 569.288499 591.709900
3 401.920876 377.079244 489.549975 577.179074 602.020706
4 393.609048 367.515794 485.653255 577.697462 603.790716
5 386.910310 360.121709 481.407362 575.904414 602.693015
6 382.073456 354.809051 478.248913 574.424370 601.688776
{% endraw %} {% raw %}
model.predict_in_sample()
mean
0 111.888000
1 112.688513
2 120.747483
3 136.303743
4 124.909752
... ...
139 630.376068
140 567.956065
141 451.814466
142 443.498419
143 374.088365

144 rows × 1 columns

{% endraw %} {% raw %}
model.predict_in_sample(level=50)
lo_50% mean hi_50%
0 91.727745 111.888000 132.048255
1 92.528257 112.688513 132.848768
2 100.587228 120.747483 140.907738
3 116.143488 136.303743 156.463998
4 104.749497 124.909752 145.070007
... ... ... ...
139 610.215813 630.376068 650.536323
140 547.795810 567.956065 588.116320
141 431.654211 451.814466 471.974721
142 423.338164 443.498419 463.658674
143 353.928110 374.088365 394.248620

144 rows × 3 columns

{% endraw %} {% raw %}
model.predict_in_sample(level=(80, 90))
lo_90% lo_80% mean hi_80% hi_90%
0 73.582887 62.723919 111.888000 150.193114 161.052081
1 74.383399 63.524432 112.688513 150.993626 161.852593
2 82.442370 71.583402 120.747483 159.052597 169.911564
3 97.998630 87.139662 136.303743 174.608857 185.467824
4 86.604639 75.745671 124.909752 163.214866 174.073833
... ... ... ... ... ...
139 592.070954 581.211987 630.376068 668.681181 679.540149
140 529.650951 518.791984 567.956065 606.261178 617.120146
141 413.509353 402.650385 451.814466 490.119580 500.978547
142 405.193306 394.334338 443.498419 481.803533 492.662500
143 335.783252 324.924284 374.088365 412.393479 423.252446

144 rows × 5 columns

{% endraw %} {% raw %}
model.model_.summary()
ARIMA(2,1,1)                   

Coefficients:
               ar1    ar2    ma1
coefficient  1.166 -0.460 -0.846
s.e.         0.004  0.006  0.007

sigma^2 = 893.391: log likelihood = -687.03

AIC=1382.05
{% endraw %} {% raw %}
model.summary()
ARIMA(2,1,1)                   

Coefficients:
               ar1    ar2    ma1
coefficient  1.166 -0.460 -0.846
s.e.         0.004  0.006  0.007

sigma^2 = 893.391: log likelihood = -687.03

AIC=1382.05
{% endraw %} {% raw %}
model_x = AutoARIMA(approximation=False)
{% endraw %} {% raw %}
model_x = model_x.fit(ap, np.hstack([np.sqrt(drift), np.log(drift)]))
{% endraw %} {% raw %}
model_x.predict(h=12, X=np.hstack([np.sqrt(newdrift), np.log(newdrift)]), level=(80, 90))
lo_80% lo_90% mean hi_80% hi_90%
0 444.847963 434.808168 480.263437 515.678911 525.718706
1 414.132100 398.250623 470.154163 526.176226 542.057702
2 422.259329 403.968541 486.780261 551.301193 569.591981
3 398.925698 379.813546 466.344002 533.762305 552.874457
4 400.961769 381.849617 468.380072 535.798375 554.910527
5 402.992047 383.879895 470.410351 537.828654 556.940806
6 405.016585 385.904433 472.434888 539.853191 558.965344
7 407.035432 387.923280 474.453735 541.872038 560.984191
8 409.048638 389.936486 476.466941 543.885244 562.997397
9 411.056252 391.944099 478.474555 545.892858 565.005010
{% endraw %} {% raw %}
model_x.predict_in_sample()
mean
0 73.205186
1 116.319149
2 100.318853
3 107.862723
4 108.927698
... ...
139 608.317523
140 564.682620
141 429.362250
142 442.344697
143 391.284005

144 rows × 1 columns

{% endraw %} {% raw %}
model_x.predict_in_sample(level=(80, 90))
lo_90% lo_80% mean hi_80% hi_90%
0 37.789712 27.749917 73.205186 108.620660 118.660455
1 80.903675 70.863880 116.319149 151.734624 161.774418
2 64.903379 54.863584 100.318853 135.734328 145.774123
3 72.447249 62.407454 107.862723 143.278198 153.317993
4 73.512224 63.472429 108.927698 144.343172 154.382967
... ... ... ... ... ...
139 572.902048 562.862253 608.317523 643.732997 653.772792
140 529.267145 519.227350 564.682620 600.098094 610.137889
141 393.946776 383.906981 429.362250 464.777724 474.817519
142 406.929222 396.889427 442.344697 477.760171 487.799966
143 355.868531 345.828736 391.284005 426.699480 436.739275

144 rows × 5 columns

{% endraw %} {% raw %}
model_x.summary()
Regression with ARIMA(0,0,3) errors

Coefficients:
               ma1    ma2    ma3    ex_1    ex_2
coefficient  1.226  0.904  0.552  57.136 -45.774
s.e.         0.052  0.123  0.084  15.718  44.469

sigma^2 = 763.684: log likelihood = -679.81

AIC=1371.61
{% endraw %}