# Scientific computing
import numpy as np
# Matrix
A = np.array([
    [1, 2], 
    [2, 4]
])
# Rank
rank = np.linalg.matrix_rank(A)
print(f'rank(A) = {rank}')

rank(A) = 1
det(A) = 0.0000

# Matrix
A = np.array([
    [3, 5], 
    [2, 4]
])
# Rank
rank = np.linalg.matrix_rank(A)
print(f'rank(A) = {rank}')
# Determinant
det = np.linalg.det(A)
print(f'det(A) = {det:.4f}')

rank(A) = 2
det(A) = 2.0000

# Libraries
from fredapi import Fred
import pandas as pd
# Setup access to FRED
fred_api_key = pd.read_csv('fred_api_key.txt', header=None).iloc[0,0]
fred = Fred(api_key=fred_api_key)
# Series to get
series = ['GDP','PCE','GDPDEF']
rename = ['gdp','cons','price']
# Get and append data to list
dl = []
for idx, string in enumerate(series):
    var = fred.get_series(string).to_frame(name=rename[idx])
    dl.append(var)
    print(var.head(2)); print(var.tail(2))

            gdp
1946-01-01  NaN
1946-04-01  NaN
                  gdp
2025-07-01  31098.027
2025-10-01  31442.483
             cons
1959-01-01  306.1
1959-02-01  309.6
               cons
2025-12-01  21455.5
2026-01-01  21536.6
             price
1947-01-01  11.141
1947-04-01  11.299
              price
2025-07-01  129.430
2025-10-01  130.651

# Concatenate data to create data frame (time-series table)
raw = pd.concat(dl, axis=1, sort=False).sort_index()
# Resample/reindex to quarterly frequency
raw = raw.resample('QE').last()
# Display dataframe
display(raw.head(2))
display(raw.tail(2))

data = pd.DataFrame()
# log real GDP
data['logGDP'] = 100*(np.log(raw['gdp']/raw['price']))
data['dlogGDP'] = data['logGDP'].diff(1)
# log real Consumption
data['logCons'] = 100*(np.log(raw['cons']/raw['price']))
data['dlogCons'] = data['logCons'].diff(1)
# Select sample
sample = data['01-01-1960':'12-31-1984']
display(sample.head(2))
display(sample.tail(2))

# Data in levels
sample_lev = sample[['logGDP','logCons']]
# Plot data
import matplotlib.pyplot as plt
fig, ax = plt.subplots(figsize=(6.5,2.5))
ax.plot(sample_lev['logGDP'], 
        label='100*log(Real GDP)', linestyle='-')
ax.plot(sample_lev['logCons'], 
        label='100*log(Real Cons.)', linestyle='--')
ax.grid(); ax.legend(loc='upper left');

from statsmodels.tsa.stattools import adfuller
# Function to organize ADF test results
def adf_test(data,const_trend):
    keys = ['Test Statistic','p-value','# of Lags','# of Obs']
    values = adfuller(data,regression=const_trend)
    test = pd.DataFrame.from_dict(dict(zip(keys,values[0:4])),
                                  orient='index',columns=[data.name])
    return test

dl = []
for column in sample.columns:
    test = adf_test(sample[column],const_trend='c')
    dl.append(test)
results = pd.concat(dl, axis=1)
display(results)

# Control for deterministic time trend
dl = []
for column in sample_lev.columns:
    test = adf_test(sample_lev[column],'ct')
    dl.append(test)
results = pd.concat(dl, axis=1)
display(results)

# Johansen Cointegration Test
from statsmodels.tsa.vector_ar.vecm import coint_johansen
test = coint_johansen(sample_lev, det_order=1, k_ar_diff=1)
test_stats = test.lr1; crit_vals = test.cvt[:, 1]
# Print results
for r_0, (test_stat, crit_val) in enumerate(zip(test_stats, crit_vals)):
    print(f'H_0: r <= {r_0}')
    print(f'  Test Stat. = {test_stat:.2f}, 5% Crit. Value = {crit_val:.2f}')
    if test_stat > crit_val:
        print('  => Reject null hypothesis.')
    else:
        print('  => Fail to reject null hypothesis.')

H_0: r <= 0
  Test Stat. = 25.85, 5% Crit. Value = 18.40
  => Reject null hypothesis.
H_0: r <= 1
  Test Stat. = 3.69, 5% Crit. Value = 3.84
  => Fail to reject null hypothesis.

# Select number of lags in VECM
from statsmodels.tsa.vector_ar.vecm import select_order
lag_order_results = select_order(
    sample_lev, maxlags=8, deterministic='co')
print(f'Selected lag order (AIC) = {lag_order_results.aic}')

Selected lag order (AIC) = 1

# Determine number of cointegrating relationships
from statsmodels.tsa.vector_ar.vecm import select_coint_rank
coint_rank_results = select_coint_rank(
    sample_lev, method='trace', det_order=1, k_ar_diff=lag_order_results.aic)
print(f'Cointegration rank = {coint_rank_results.rank}')

Cointegration rank = 1

from statsmodels.tsa.vector_ar.vecm import VECM
# Estimate VECM
model_vecm = VECM(sample_lev, deterministic='co', 
            k_ar_diff=lag_order_results.aic, 
            coint_rank=coint_rank_results.rank)
results_vecm = model_vecm.fit()
tables = results_vecm.summary().tables
# Print summary tables
#for _, tab in enumerate(tables):
#    print(tab.as_html())

Pi = results_vecm.alpha@results_vecm.beta.T
rankPi = np.linalg.matrix_rank(Pi)
print(f'alpha = {results_vecm.alpha}')
print(f'beta = {results_vecm.beta}')
print(f'Pi = {Pi}')
print(f'rank(Pi) = {rankPi}')

alpha = [[-0.23609997]
 [-0.12179596]]
beta = [[ 1.       ]
 [-0.9444435]]
Pi = [[-0.23609997  0.22298308]
 [-0.12179596  0.1150294 ]]
rank(Pi) = 1

# Error Correction Term
ECT = sample_lev@results_vecm.beta
ECT.name = 'ECT'
# Plot data
fig, ax = plt.subplots(figsize=(6.5,2.5))
ax.plot(ECT); ax.grid(); ax.set_title(ECT.name);

# Unit Root Test   
test = adf_test(ECT,'c')
display(test)

# VAR model
from statsmodels.tsa.api import VAR
# make the VAR model
model_var = VAR(sample_lev)
# Estimate VAR(p)
results_var = model_var.fit(model_vecm.k_ar_diff + 1)
# Assign impulse response functions (IRFs)
irf = results_var.irf(20)

# Plot IRFs
fig = irf.plot(orth=False,impulse='logGDP',figsize=(6.5,4));
fig.suptitle(" ");

# Lag order and end of sample
p = results_var.k_ar
last_obs = sample_lev.values[-p:] 
# Forecast 6 quarters ahead
h = 6;
forecast = results_var.forecast(y=last_obs, steps=h)
forecast_df = pd.DataFrame(forecast, columns=sample_lev.columns)
dates = pd.date_range(start='03-31-1985', periods=h, freq='QE')
forecast_df.index = dates  # Assign to index
# Actual Data
start_date = pd.Timestamp('12-31-1984')-pd.DateOffset(months=3*p)
end_date = pd.Timestamp('12-31-1984')+pd.DateOffset(months=3*h)
print(f'start date = {start_date}, end date = {end_date}')
sample_forecast = data[start_date:end_date]

start date = 1984-06-30 00:00:00, end date = 1986-06-30 00:00:00

fig, ax = plt.subplots(figsize=(6.5,2))
forecast_df['logGDP'].plot(ax=ax, linestyle='--')
sample_forecast['logGDP'].plot(ax=ax, linestyle='-')
ax.grid(); ax.autoscale(tight=True);

fig, ax = plt.subplots(figsize=(6.5,2))
forecast_df['logCons'].plot(ax=ax, linestyle='--')
sample_forecast['logCons'].plot(ax=ax, linestyle='-')
ax.grid(); ax.autoscale(tight=True);

Rank, r =	Interpretation
0	no cointegration, 3 stochastic trends
1	1 cointegrating relation, 2 trends
2	2 relations, 1 trend
3	no trends → system is I(0)

	logGDP	dlogGDP	logCons	dlogCons
1960-03-31	356.027682	2.226717	306.351449	1.839417
1960-06-30	355.485842	-0.541840	306.068692	-0.282757

	logGDP	dlogGDP	logCons	dlogCons
1984-09-30	441.310256	0.959766	393.568460	0.634225
1984-12-31	442.127526	0.817270	394.731279	1.162819

	logGDP	dlogGDP	logCons	dlogCons
Test Statistic	-1.341693	-4.949552	-1.158324	-5.421674
p-value	0.609889	0.000028	0.691262	0.000003
# of Lags	2.000000	1.000000	2.000000	1.000000
# of Obs	97.000000	98.000000	97.000000	98.000000

	logGDP	logCons
Test Statistic	-2.482304	-2.202394
p-value	0.336865	0.488503
# of Lags	2.000000	2.000000
# of Obs	97.000000	97.000000

VECM¶

Summary¶

VECM¶

Cointegration Structure¶

Rank of a matrix¶

Examples¶

Cointegration¶

Johansen Cointegration Test¶

Example¶

Read Data¶

Create Dataframe¶

Transform Data¶

Plot Sample¶

ADF Unit Root Test¶

Trends¶

Cointegration Test¶

Estimation¶

Model Selection¶

Results¶

Cointegrating Relationship¶

Error Correction Term¶

Impulse Response Functions¶

Forecasting¶

	coef	std err	z	P>\|z\|	[0.025	0.975]
const	16.8363	4.371	3.852	0.000	8.269	25.403
L1.logGDP	0.1289	0.100	1.290	0.197	-0.067	0.325
L1.logCons	0.3300	0.123	2.677	0.007	0.088	0.572

	ECT
Test Statistic	-3.330790
p-value	0.013557
# of Lags	11.000000
# of Obs	88.000000

Det. terms outside the coint. relation & lagged endog. parameters for equation logCons
	coef	std err	z	P>\|z\|	[0.025	0.975]
const	9.3086	4.419	2.106	0.035	0.647	17.970
L1.logGDP	0.1865	0.101	1.845	0.065	-0.012	0.385
L1.logCons	-0.1434	0.125	-1.151	0.250	-0.388	0.101

Loading coefficients (alpha) for equation logCons
	coef	std err	z	P>\|z\|	[0.025	0.975]
ec1	-0.1218	0.063	-1.921	0.055	-0.246	0.002

Cointegration relations for loading-coefficients-column 1
	coef	std err	z	P>\|z\|	[0.025	0.975]
beta.1	1.0000	0	0	0.000	1.000	1.000
beta.2	-0.9444	0.015	-64.031	0.000	-0.973	-0.916