# Scientific computing
import numpy as np
# Matrix
A = np.array([
    [1, 2], 
    [2, 4]
])
# Rank
rank = np.linalg.matrix_rank(A)
# Determinant
det = np.linalg.det(A)
# Display
print(f'det = {det:.4f}')
print(rank)

det = 0.0000
1

# Matrix
A = np.array([
    [3, 5], 
    [2, 4]
])
# Rank
rank = np.linalg.matrix_rank(A)
# Determinant
det = np.linalg.det(A)
# Display
print(f'det = {det:.4f}')
print(rank)

det = 2.0000
2

# Data analysis
import pandas as pd
# List data sources (could be in local working directory)
sources = [
  'https://fred.stlouisfed.org/series/GDP/downloaddata/GDP.csv',
  'https://fred.stlouisfed.org/series/PCE/downloaddata/PCE.csv',
  'https://fred.stlouisfed.org/series/GDPDEF/downloaddata/GDPDEF.csv']
varnames = ['gdp','cons','price']
# Read and append data to list
dl = []
for idx, source in enumerate(sources):
    print('Read:',source)
    var = pd.read_csv(source,index_col='DATE',parse_dates=True)
    var = var.rename(columns={'VALUE':varnames[idx]})
    dl.append(var)

Read: https://fred.stlouisfed.org/series/GDP/downloaddata/GDP.csv
Read: https://fred.stlouisfed.org/series/PCE/downloaddata/PCE.csv
Read: https://fred.stlouisfed.org/series/GDPDEF/downloaddata/GDPDEF.csv

# Concatenate data to create data frame (time-series table)
raw = pd.concat(dl, axis=1).sort_index()
# Resample/reindex to quarterly frequency
raw = raw.resample('QE').last()
# Display dataframe
display(raw.head(2))
display(raw.tail(2))

data = pd.DataFrame()
# log real GDP
data['logGDP'] = 100*(np.log(raw['gdp']/raw['price']))
data['dlogGDP'] = data['logGDP'].diff(1)
# log real Consumption
data['logCons'] = 100*(np.log(raw['cons']/raw['price']))
data['dlogCons'] = data['logCons'].diff(1)
# Select sample
sample = data['01-01-1960':'12-31-1984']
display(sample.head(2))
display(sample.tail(2))

# Data in levels
sample_lev = sample[['logGDP','logCons']]
# Plot data
import matplotlib.pyplot as plt
fig, ax = plt.subplots(figsize=(6.5,2.5))
ax.plot(sample_lev['logGDP'], 
        label='100*log(Real GDP)', linestyle='-')
ax.plot(sample_lev['logCons'], 
        label='100*log(Real Cons.)', linestyle='--')
ax.grid(); ax.legend(loc='upper left');

from statsmodels.tsa.stattools import adfuller
# Function to organize ADF test results
def adf_test(data,const_trend):
    keys = ['Test Statistic','p-value','# of Lags','# of Obs']
    values = adfuller(data,regression=const_trend)
    test = pd.DataFrame.from_dict(dict(zip(keys,values[0:4])),
                                  orient='index',columns=[data.name])
    return test

dl = []
for column in sample.columns:
    test = adf_test(sample[column],const_trend='c')
    dl.append(test)
results = pd.concat(dl, axis=1)
display(results)

# Test data for deterministic time trend
dl = []
for column in sample_lev.columns:
    test = adf_test(sample_lev[column],'ct')
    dl.append(test)
results = pd.concat(dl, axis=1)
display(results)

# Johansen Cointegration Test
from statsmodels.tsa.vector_ar.vecm import coint_johansen
test = coint_johansen(sample_lev, det_order=-1, k_ar_diff=1)
test_stats = test.lr1; crit_vals = test.cvt[:, 1]
# Print results
for r_0, (test_stat, crit_val) in enumerate(zip(test_stats, crit_vals)):
    print(f'H_0: r <= {r_0}')
    print(f'  Test Stat. = {test_stat:.2f}, 5% Crit. Value = {crit_val:.2f}')
    if test_stat > crit_val:
        print('  => Reject null hypothesis.')
    else:
        print('  => Fail to reject null hypothesis.')

H_0: r <= 0
  Test Stat. = 34.30, 5% Crit. Value = 12.32
  => Reject null hypothesis.
H_0: r <= 1
  Test Stat. = 0.03, 5% Crit. Value = 4.13
  => Fail to reject null hypothesis.

# Select number of lags in VECM
from statsmodels.tsa.vector_ar.vecm import select_order
lag_order_results = select_order(
    sample_lev, maxlags=8, deterministic='co')
print(f'Selected lag order (AIC) = {lag_order_results.aic}')

Selected lag order (AIC) = 1

# Determine number of cointegrating relationships
from statsmodels.tsa.vector_ar.vecm import select_coint_rank
coint_rank_results = select_coint_rank(
    sample_lev, method='trace', det_order=-1, k_ar_diff=lag_order_results.aic)
print(f'Cointegration rank = {coint_rank_results.rank}')

Cointegration rank = 1

from statsmodels.tsa.vector_ar.vecm import VECM
# Estimate VECM
model_vecm = VECM(sample_lev, deterministic='co', 
            k_ar_diff=lag_order_results.aic, 
            coint_rank=coint_rank_results.rank)
results_vecm = model_vecm.fit()
tables = results_vecm.summary().tables
# Print summary tables
#for _, tab in enumerate(tables):
#    print(tab.as_html())

Pi = results_vecm.alpha@results_vecm.beta.T
rankPi = np.linalg.matrix_rank(Pi)
print(f'alpha = {results_vecm.alpha}')
print(f'beta = {results_vecm.beta}')
print(f'Pi = {Pi}')
print(f'rank(Pi) = {rankPi}')

alpha = [[-0.23609997]
 [-0.12179596]]
beta = [[ 1.       ]
 [-0.9444435]]
Pi = [[-0.23609997  0.22298308]
 [-0.12179596  0.1150294 ]]
rank(Pi) = 1

# Error Correction Term
ECT = sample_lev@results_vecm.beta
ECT.name = 'ECT'
# Plot data
fig, ax = plt.subplots(figsize=(6.5,2.5))
ax.plot(ECT); ax.grid(); ax.set_title(ECT.name);

# Unit Root Test   
test = adf_test(ECT,'c')
display(test)

# VAR model
from statsmodels.tsa.api import VAR
# make the VAR model
model_var = VAR(sample_lev)
# Estimate VAR(p)
results_var = model_var.fit(model_vecm.k_ar_diff + 1)
# Assign impulse response functions (IRFs)
irf = results_var.irf(20)

# Plot IRFs
fig = irf.plot(orth=False,impulse='logGDP',figsize=(6.5,4));
fig.suptitle(" ");

# Lag order and end of sample
p = results_var.k_ar
last_obs = sample_lev.values[-p:] 
# Forecast 6 quarters ahead
h = 6;
forecast = results_var.forecast(y=last_obs, steps=h)
forecast_df = pd.DataFrame(forecast, columns=sample_lev.columns)
dates = pd.date_range(start='03-31-1985', periods=h, freq='QE')
forecast_df.index = dates  # Assign to index
# Actual Data
start_date = pd.Timestamp('12-31-1984')-pd.DateOffset(months=3*p)
end_date = pd.Timestamp('12-31-1984')+pd.DateOffset(months=3*h)
print(f'start date = {start_date}, end date = {end_date}')
sample_forecast = data[start_date:end_date]

start date = 1984-06-30 00:00:00, end date = 1986-06-30 00:00:00

fig, ax = plt.subplots(figsize=(6.5,2))
forecast_df['logGDP'].plot(ax=ax, linestyle='--')
sample_forecast['logGDP'].plot(ax=ax, linestyle='-')
ax.grid(); ax.autoscale(tight=True);

fig, ax = plt.subplots(figsize=(6.5,2))
forecast_df['logCons'].plot(ax=ax, linestyle='--')
sample_forecast['logCons'].plot(ax=ax, linestyle='-')
ax.grid(); ax.autoscale(tight=True);

	gdp	cons	price
DATE
1947-03-31	243.164	NaN	11.141
1947-06-30	245.968	NaN	11.299

	gdp	cons	price
DATE
2024-12-31	29723.864	20408.1	126.257
2025-03-31	NaN	20439.3	NaN

	logGDP	dlogGDP	logCons	dlogCons
DATE
1960-03-31	356.027682	2.226717	306.351449	1.839417
1960-06-30	355.485842	-0.541840	306.068692	-0.282757

	logGDP	dlogGDP	logCons	dlogCons
DATE
1984-09-30	441.310256	0.959766	393.568460	0.634225
1984-12-31	442.127526	0.817270	394.731279	1.162819

	logGDP	dlogGDP	logCons	dlogCons
Test Statistic	-1.341693	-4.949552	-1.158324	-5.421674
p-value	0.609889	0.000028	0.691262	0.000003
# of Lags	2.000000	1.000000	2.000000	1.000000
# of Obs	97.000000	98.000000	97.000000	98.000000

VECM¶

Summary¶

VECM¶

Rank of a matrix¶

Examples¶

Cointegration¶

Johansen Cointegration Test¶

Example¶

Read Data¶

Create Dataframe¶

Transform Data¶

Plot Sample¶

ADF Unit Root Test¶

Trends¶

Cointegration Test¶

Estimation¶

Model Selection¶

Results¶

Cointegrating Relationship¶

Error Correction Term¶

Impulse Response Functions¶

Forecasting¶

	logGDP	logCons
Test Statistic	-2.482304	-2.202394
p-value	0.336865	0.488503
# of Lags	2.000000	2.000000
# of Obs	97.000000	97.000000

	coef	std err	z	P>\|z\|	[0.025	0.975]
const	16.8363	4.371	3.852	0.000	8.269	25.403
L1.logGDP	0.1289	0.100	1.290	0.197	-0.067	0.325
L1.logCons	0.3300	0.123	2.677	0.007	0.088	0.572

	ECT
Test Statistic	-3.330790
p-value	0.013557
# of Lags	11.000000
# of Obs	88.000000

Det. terms outside the coint. relation & lagged endog. parameters for equation logCons
	coef	std err	z	P>\|z\|	[0.025	0.975]
const	9.3086	4.419	2.106	0.035	0.647	17.970
L1.logGDP	0.1865	0.101	1.845	0.065	-0.012	0.385
L1.logCons	-0.1434	0.125	-1.151	0.250	-0.388	0.101

Loading coefficients (alpha) for equation logCons
	coef	std err	z	P>\|z\|	[0.025	0.975]
ec1	-0.1218	0.063	-1.921	0.055	-0.246	0.002

Cointegration relations for loading-coefficients-column 1
	coef	std err	z	P>\|z\|	[0.025	0.975]
beta.1	1.0000	0	0	0.000	1.000	1.000
beta.2	-0.9444	0.015	-64.031	0.000	-0.973	-0.916