# Libraries
from fredapi import Fred
import pandas as pd
# Read Data
fred_api_key = pd.read_csv('fred_api_key.txt', header=None)
fred = Fred(api_key=fred_api_key.iloc[0,0])
df = fred.get_series('CHNGDPNQDSMEI').to_frame(name='gdp')
df.index = pd.DatetimeIndex(df.index.values,freq=df.index.inferred_freq)
# Convert to billions of Yuan
df['gdp'] = df['gdp']/1e9
print(f'number of rows/quarters = {len(df)}')
print(df.head(2)); print(df.tail(2))

number of rows/quarters = 127
               gdp
1992-01-01  526.28
1992-04-01  648.43
                 gdp
2023-04-01  30803.76
2023-07-01  31999.23

# Plotting
import matplotlib.pyplot as plt
# Plot
fig, ax = plt.subplots(figsize=(6.5,2.5));
ax.plot(df.gdp);
ax.set_title('China Nominal GDP (Yuan)');
ax.yaxis.set_major_formatter('{x:,.0f}B')
ax.grid(); ax.autoscale(tight=True)

# Scientific computing
import numpy as np
# Remove expontential trend
df['loggdp'] = np.log(df['gdp'])

# Partial auto-correlation function
from statsmodels.graphics.tsaplots import plot_pacf as pacf
# Plot Autocorrelation Function
fig, ax = plt.subplots(figsize=(6.5,2))
pacf(df['loggdp'],ax); ax.grid(); ax.autoscale(tight=True)

# Year-over-year growth rate
df['d1loggdp'] = 100*(df['loggdp'] - df['loggdp'].shift(4))

# Plot
fig, ax = plt.subplots(figsize=(6.5,2));
ax.plot(df['d1loggdp']);
ax.set_title('China Nominal GDP Growth Rate (Year-over-year)');
ax.yaxis.set_major_formatter('{x:.0f}%')
ax.grid(); ax.autoscale(tight=True)

# Auto-correlation function
from statsmodels.graphics.tsaplots import plot_acf as acf
# Plot Autocorrelation Function
fig, ax = plt.subplots(figsize=(6.5,2))
acf(df['d1loggdp'].dropna(),ax)
ax.grid(); ax.autoscale(tight=True)

# Plot Autocorrelation Function
fig, ax = plt.subplots(figsize=(6.5,1.5))
pacf(df['d1loggdp'].dropna(),ax)
ax.grid(); ax.autoscale(tight=True);

# ARIMA model
from statsmodels.tsa.arima.model import ARIMA
# Define model
mod0 = ARIMA(df['d1loggdp'],order=(1,0,0))
# Estimate model
res = mod0.fit(); summary = res.summary()
# Print summary tables
tab0 = summary.tables[0].as_html(); tab1 = summary.tables[1].as_html(); tab2 = summary.tables[2].as_html()
#print(tab0); print(tab1); print(tab2)

# Second difference
df['d2loggdp'] = df['d1loggdp'] - df['d1loggdp'].shift(1)
# Plot
fig, ax = plt.subplots(figsize=(6.5,1.5));
ax.plot(df['d2loggdp']); ax.set_title('Difference of China Nominal GDP Growth Rate')
ax.yaxis.set_major_formatter('{x:.0f}pp')
ax.grid(); ax.autoscale(tight=True)

# Plot Autocorrelation Function
fig, ax = plt.subplots(figsize=(6.5,2.5))
acf(df['d2loggdp'].dropna(),ax)
ax.grid(); ax.autoscale(tight=True)

# ARIMA model
from statsmodels.tsa.arima.model import ARIMA
# Define model
mod0 = ARIMA(df['loggdp'],order=(1,2,0))
mod1 = ARIMA(df['d1loggdp'],order=(1,1,0))
mod2 = ARIMA(df['d2loggdp'],order=(1,0,0))

# Estimate model
res = mod0.fit(); summary = res.summary()
# Print summary tables
tab0 = summary.tables[0].as_html(); tab1 = summary.tables[1].as_html(); tab2 = summary.tables[2].as_html()
#print(tab0); print(tab1); print(tab2)

mod3 = ARIMA(df['loggdp'],order=(1,1,0),seasonal_order=(1,1,0,4))
# Estimate model
res = mod3.fit(); summary = res.summary()
# Print summary tables
tab0 = summary.tables[0].as_html(); tab1 = summary.tables[1].as_html(); tab2 = summary.tables[2].as_html()
#tab0 = summary.tables[0].as_text(); tab1 = summary.tables[1].as_text(); tab2 = summary.tables[2].as_text()
#print(tab0); print(tab1); print(tab2)

# ADF Test
from statsmodels.tsa.stattools import adfuller
# Function to organize ADF test results
def adf_test(data):
    keys = ['Test Statistic','p-value','# of Lags','# of Obs']
    values = adfuller(data)
    test = pd.DataFrame.from_dict(dict(zip(keys,values[0:4])),
                                  orient='index',columns=[data.name])
    return test

test = adf_test(df['d1loggdp'].dropna())
#print(test.to_markdown())

dl = []
for column in df.columns:
    test = adf_test(df[column].dropna())
    dl.append(test)
results = pd.concat(dl, axis=1)
#print(results.to_markdown())

# Read Data
fred_api_key = pd.read_csv('fred_api_key.txt', header=None)
fred = Fred(api_key=fred_api_key.iloc[0,0])
data = fred.get_series('UNRATE').to_frame(name='UR')
# Plot
fig, ax = plt.subplots(figsize=(6.5,2));
ax.plot(data.UR); ax.set_title('U.S. Unemployment Rate');
ax.yaxis.set_major_formatter('{x:,.1f}%')
ax.grid(); ax.autoscale(tight=True)

adf_UR = adf_test(data.UR)
print(adf_UR)

                        UR
Test Statistic   -3.918852
p-value           0.001900
# of Lags         1.000000
# of Obs        928.000000

Dep. Variable:	VALUE	No. Observations:	123
Model:	ARIMA(1, 0, 0)	Log Likelihood	-289.294

	coef	std err	z	P>\|z\|	[0.025	0.975]
const	12.9494	2.811	4.606	0.000	7.440	18.459
ar.L1	0.9399	0.026	36.315	0.000	0.889	0.991
sigma2	6.3511	0.348	18.243	0.000	5.669	7.033

Heteroskedasticity (H):	3.71	Skew:	0.11
Prob(H) (two-sided):	0.00	Kurtosis:	12.03

Dep. Variable:	VALUE	No. Observations:	127
Model:	ARIMA(1, 2, 0)	Log Likelihood	49.700

	coef	std err	z	P>\|z\|	[0.025	0.975]
ar.L1	-0.6646	0.076	-8.722	0.000	-0.814	-0.515
sigma2	0.0263	0.004	6.541	0.000	0.018	0.034

ARIMA Model¶

Summary¶

ARMA Model¶

Key Aspects¶

Integration¶

$I(2)$ Example¶

Seasonality¶

First Difference¶

Second Difference¶

Estimation¶

Estimation Results ARIMA($1,2,0$)¶

Estimation Results ARIMA($1,1,0$)¶

Estimation Results ARIMA($1, 0, 0$)¶

ARIMA `seasonal_order`¶

Unit Root(s)¶

AR($p$) Polynomial¶

Unit Roots of AR($p$)¶

Examples¶

Unit Root Test¶

`adfuller`¶

ADF test results¶

U.S. Unemployment Rate¶

Order Determination¶

Information Criterion¶

AIC vs. BIC¶

Likelihood Function¶

Example¶

Heteroskedasticity (H):	0.64	Skew:	-0.84
Prob(H) (two-sided):	0.15	Kurtosis:	2.35

Heteroskedasticity (H):	4.22	Skew:	0.32
Prob(H) (two-sided):	0.00	Kurtosis:	12.12

Heteroskedasticity (H):	4.34	Skew:	0.32
Prob(H) (two-sided):	0.00	Kurtosis:	12.10

Heteroskedasticity (H):	3.09	Skew:	-0.49
Prob(H) (two-sided):	0.00	Kurtosis:	10.65

	gdp	loggdp	d1loggdp	d2loggdp
Test Statistic	4.294	-1.27023	-3.1799	-6.54055
p-value	1	0.642682	0.0211781	9.37063e-09
# of Lags	7	8	4	3
# of Obs	119	118	118	118

	coef	std err	z	P>\|z\|	[0.025	0.975]
ar.L1	0.0024	0.045	0.053	0.958	-0.087	0.091
sigma2	6.5086	0.361	18.042	0.000	5.802	7.216

	coef	std err	z	P>\|z\|	[0.025	0.975]
const	-0.1863	0.231	-0.806	0.420	-0.639	0.267
ar.L1	-0.0025	0.045	-0.055	0.956	-0.092	0.087
sigma2	6.4739	0.357	18.126	0.000	5.774	7.174

	coef	std err	z	P>\|z\|	[0.025	0.975]
ar.L1	0.0024	0.048	0.050	0.960	-0.092	0.096
ar.S.L4	-0.2598	0.043	-6.034	0.000	-0.344	-0.175
sigma2	0.0006	3.64e-05	16.652	0.000	0.001	0.001

ARIMA Model¶

Summary¶

ARMA Model¶

Key Aspects¶

Integration¶

$I(2)$ Example¶

Seasonality¶

First Difference¶

Second Difference¶

Estimation¶

Estimation Results ARIMA($1,2,0$)¶

Estimation Results ARIMA($1,1,0$)¶

Estimation Results ARIMA($1, 0, 0$)¶

ARIMA seasonal_order¶

Unit Root(s)¶

AR($p$) Polynomial¶

Unit Roots of AR($p$)¶

Examples¶

Unit Root Test¶

adfuller¶

ADF test results¶

U.S. Unemployment Rate¶

Order Determination¶

Information Criterion¶

AIC vs. BIC¶

Likelihood Function¶

Example¶

ARIMA `seasonal_order`¶

`adfuller`¶