Source code for pycmtensor.results

# results.py
"""PyCMTensor results module"""
import numpy as np
import pandas as pd
from numpy import nan_to_num as nan2num

from .statistics import *

__all__ = ["Results"]


[docs]class Results: """Class object to hold model results""" def __init__(self): self.build_time = None self.train_time = None self.iterations_per_sec = None self.n_params = None self.n_train_samples = None self.n_valid_samples = None self.seed = None self.null_loglikelihood = -np.inf self.init_loglikelihood = -np.inf self.best_loglikelihood = -np.inf self.best_valid_error = 1.0 self.best_step = 0 self.gnorm = None self.hessian_matrix = None self.bhhh_matrix = None self.betas = None self.weights = None self.performance_graph = None self.lr_history_graph = None
[docs] def rho_square(self): """Returns the rho square statistics""" return nan2num(1.0 - self.best_loglikelihood / self.null_loglikelihood)
[docs] def rho_square_bar(self): """Returns the rho square bar statistics""" k = self.n_params return nan2num(1.0 - (self.best_loglikelihood - k) / self.null_loglikelihood)
[docs] def loglikelihood_ratio_test(self): """Returns the log likelihood ratio test statistics""" return -2.0 * (self.null_loglikelihood - self.best_loglikelihood)
[docs] def AIC(self): """Returns the Akaike Information Criterion""" k = self.n_params return 2.0 * (k - self.best_loglikelihood)
[docs] def BIC(self): """Returns the Bayesian Information Criterion""" k = self.n_params n = self.n_train_samples return -2.0 * self.best_loglikelihood + k * np.log(n)
[docs] def benchmark(self): """Returns a pandas DataFrame of a summary of the model benchmark""" stats = pd.DataFrame(columns=["value"]) stats.loc["Seed"] = self.seed stats.loc["Model build time"] = self.build_time stats.loc["Model train time"] = self.train_time stats.loc["iterations per sec"] = f"{self.iterations_per_sec}/s" return stats
[docs] def model_statistics(self): """Returns a pandas DataFrame of a summary of the model training""" stats = pd.DataFrame(columns=["value"]).astype("object") stats.loc["Number of training samples used"] = int(self.n_train_samples) stats.loc["Number of validation samples used"] = int(self.n_valid_samples) stats.loc["Init. log likelihood"] = self.init_loglikelihood stats.loc["Final log likelihood"] = self.best_loglikelihood stats.loc["Accuracy"] = f"{100*(1-self.best_valid_error):.2f}%" stats.loc["Likelihood ratio test"] = self.loglikelihood_ratio_test() stats.loc["Rho square"] = self.rho_square() stats.loc["Rho square bar"] = self.rho_square_bar() stats.loc["Akaike Information Criterion"] = self.AIC() stats.loc["Bayesian Information Criterion"] = self.BIC() stats.loc["Final gradient norm"] = self.gnorm return stats
[docs] def beta_statistics(self): """Returns a pandas DataFrame of the model beta statistics""" betas = self.betas h = self.hessian_matrix bhhh = self.bhhh_matrix stats = pd.DataFrame( index=[b.name for b in betas if (b.status != 1)], columns=["value"] ) stats["std err"] = stderror(h, betas) stats["t-test"] = t_test(stats["std err"], betas) stats["p-value"] = p_value(stats["std err"], betas) stats["rob. std err"] = rob_stderror(h, bhhh, betas) stats["rob. t-test"] = t_test(stats["rob. std err"], betas) stats["rob. p-value"] = p_value(stats["rob. std err"], betas) stats.drop("value", axis=1, inplace=True) df = pd.DataFrame( data=[b().eval() for b in betas], index=[b.name for b in betas], columns=["value"], ) stats = pd.concat([df, stats], axis=1).sort_index().fillna("-").astype("O") return stats
[docs] def model_correlation_matrix(self): """Returns a pandas DataFrame of the model correlation matrix""" betas = self.betas h = self.hessian_matrix mat = pd.DataFrame( columns=[b.name for b in betas if (b.status != 1)], index=[b.name for b in betas if (b.status != 1)], data=correlation_matrix(h), ) return mat
[docs] def model_robust_correlation_matrix(self): """Returns a pandas DataFrame of the model (robust) correlation matrix""" betas = self.betas h = self.hessian_matrix bhhh = self.bhhh_matrix mat = pd.DataFrame( columns=[b.name for b in betas if (b.status != 1)], index=[b.name for b in betas if (b.status != 1)], data=rob_correlation_matrix(h, bhhh), ) return mat