USE_CUDA = False
VERBOSE = False

if USE_CUDA:
    import cudf

    %load_ext cudf.pandas
import importlib
import workbench.src.data_loader as data_loader
import workbench.src.data_process as data_process
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import workbench.utils.utils as utils
from IPython.display import Markdown
from sklearn.model_selection import cross_val_score, train_test_split, GroupKFold, cross_val_predict, GroupShuffleSplit
import workbench.src.feature_select as feature_select
import pandas as pd
from typing import List, Dict
import workbench.src.shared as shared
import workbench.src.validation as validation
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline
from workbench.src import graph
from workbench.src import simulation
from workbench.src import model_config
from scipy.stats import f_oneway, linregress
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

utils.pandas_config(pd)
utils.plt_config(plt)

sns.set_theme(style="darkgrid", palette="pastel")
plt.style.use("fivethirtyeight")

xg_boost_params = {"device": "cuda"}

importlib.reload(data_loader)
data = data_loader.load_data(inc_players=True)

importlib.reload(data_process)
processed_data = data_process.append_rolling_match_team_stats(data)
dual_df = processed_data.dual_df
full_df = data_process.add_team_strategies(
    source_df=dual_df, team_attrs_df=data.team_attrs_df
)

if VERBOSE:
    leicester_matches = full_df[full_df["team_id"] == 8197]
    leicester_matches = leicester_matches[
        leicester_matches["season_start_year"] == 2015
        ]

    match_api_id = 1987598
    # Leicester vs Chelsea last game of the 2015 season
    tt = leicester_matches[leicester_matches["match_api_id"] == match_api_id]
    tt2 = data.matches_df[data.matches_df_short["match_api_id"] == match_api_id]

    display(tt[["win_odds", "draw_odds", "opponent_win_odds"]])

    # Leicester played Away
    display(tt2[["home_win_odds", "draw_odds", "away_win_odds"]])
    display(tt2[["B365H", "B365D", "B365A"]])

# XGBoost only support classes starting from 0
full_df_model = full_df.copy()
full_df_model["result"] = full_df_model["result"].map({-1: 0, 0: 1, 1: 2})

importlib.reload(feature_select)
importlib.reload(model_config)
feature_set__map = model_config.get_config()


def get_pipeline(config, enable_hyperparameter_tuning=False):
    if config["preprocessing"]:
        pipeline_steps = [
            ("preprocessing", config["preprocessing"]),  # Add preprocessing step
            (
                "model",
                config["model"](
                    **(config["best_params"] if not enable_hyperparameter_tuning else {})
                ),
            ),
        ]
    else:
        pipeline_steps = [
            (
                "model",
                config["model"](
                    **(
                        config["best_params"]
                        if not enable_hyperparameter_tuning
                        else {}
                    )
                ),
            )
        ]

    pipeline = Pipeline(pipeline_steps)
    return pipeline


def get_feature_set_data(
        val: feature_select.FeatureSet, df: pd.DataFrame, time_based=False
) -> pd.DataFrame:
    feature_names = feature_select.get_feature_sets(val)
    feature_names.append("result")
    feature_names.append("match_api_id")
    feature_names.append("team_id")

    if time_based:
        feature_names.append("season_start_year")
        feature_names.append("stage")

    extracted_df = df[feature_names]
    return extracted_df.copy()


def cap_inf(_df):
    for col in _df.columns:
        if "ratio" in col:
            max_val = _df[col].replace(np.inf, np.nan).max()
            _df[col].replace(np.inf, max_val, inplace=True)

            min_val = _df[col].replace(-np.inf, np.nan).min()
            _df[col].replace(-np.inf, min_val, inplace=True)

importlib.reload(feature_select)
importlib.reload(validation)
importlib.reload(graph)
importlib.reload(shared)
ModelTrainingResult = shared.ModelTrainingResult

# Flag to enable/disable hyperparameter tuning
enable_hyperparameter_tuning = False

# kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=645)
group_kfold = GroupShuffleSplit(n_splits=5, random_state=42)
metrics_decls = ["accuracy", "precision_macro", "recall_macro", "f1_macro"]

# Results storage
cv_results = {}
feature_importances = {}
confusion_matrices: Dict[
    str, ModelTrainingResult
] = {}  # Store confusion matrices for each config/model

for key, config in feature_set__map.items():
    extracted_features = get_feature_set_data(config["feature_set"], full_df_model)

    # #Fill inf ratio columns:
    # for col in extracted_features.columns:
    #     if "ratio" in col:
    #         max_val = extracted_features[col].replace(np.inf, np.nan).max()
    #         extracted_features[col].replace(np.inf, max_val, inplace=True)
    # 
    #         min_val = extracted_features[col].replace(-np.inf, np.nan).min()
    #         extracted_features[col].replace(-np.inf, min_val, inplace=True)
    cap_inf(extracted_features)
    if not config["supports_nan"]:
        extracted_features = extracted_features.dropna()

    if config.get("use_cuml", False):
        extracted_features = cudf.DataFrame.from_pandas(extracted_features)

    labels = extracted_features["result"]
    extracted_features = extracted_features.drop(columns=["result"])

    match_team_ids = extracted_features[["match_api_id", "team_id"]]
    extracted_features = extracted_features.drop(columns=["match_api_id"])

    for col in extracted_features.select_dtypes(include=["object", "category"]).columns:
        extracted_features[col] = pd.Categorical(extracted_features[col]).codes

    for func in config["synthetic_funcs"]:
        extracted_features = func(extracted_features)

    if enable_hyperparameter_tuning and config["param_grid"]:
        steps = config["preprocessing"] + [
            (
                "model",
                config["model"](
                    **(
                        config["best_params"]
                        if not enable_hyperparameter_tuning
                        else {}
                    )
                ),
            )
        ]
        pipeline = Pipeline(steps)

        grid_search = GridSearchCV(
            pipeline, config["param_grid"], cv=kfold, scoring="f1_macro", n_jobs=-1
        )
        grid_search.fit(extracted_features, labels)
        best_pipeline = grid_search.best_estimator_
        tuning_results = grid_search.cv_results_
        cv_results[key] = {
            "n_samples": len(extracted_features),
            "best_score": grid_search.best_score_,
            "best_params": grid_search.best_params_,
            "all_scores": tuning_results,
        }
    else:
        pipeline = get_pipeline(config=config)
        if config.get("use_cuml", False):
            # Convert to pd DF for compatibility with Sklearn models/functions
            extracted_features = cudf.DataFrame.from_pandas(
                extracted_features
            ).to_pandas()
            labels = labels.to_pandas()

        # Split data for model fitting and eval
        extracted_features_with_ids = extracted_features.copy()
        extracted_features_with_ids["match_api_id"] = match_team_ids["match_api_id"]
        extracted_features_with_ids["team_id"] = match_team_ids["team_id"]

        match_ids_full_df = extracted_features_with_ids["match_api_id"]

        # Unique match_ids and split them
        unique_match_ids = np.unique(match_ids_full_df)
        train_ids, test_ids = train_test_split(unique_match_ids, test_size=0.25, random_state=42)
        # Function to filter rows based on match_id
        filter_rows = lambda ids: np.isin(match_ids_full_df, ids)

        # Construct train and test sets
        X_train, y_train = extracted_features_with_ids[filter_rows(train_ids)], labels[filter_rows(train_ids)]
        X_test, y_test = extracted_features_with_ids[filter_rows(test_ids)], labels[filter_rows(test_ids)]

        if VERBOSE:
            train_ids_set = set(train_ids)
            test_ids_set = set(test_ids)

            common_ids = train_ids_set.intersection(test_ids_set)

            if len(common_ids) == 0:
                print("No overlap in match IDs between train and test sets.")
            else:
                print(f"Overlap found in match IDs: {common_ids}")

        x_test_match_team_ids = X_test[["match_api_id", "team_id"]]

        for df in [X_train, X_test, y_train, y_test]:
            df.drop(columns=["match_api_id", "team_id"], inplace=True)

        pipeline.fit(X_train, y_train)

        # Evaluate the model on the test set
        (
            metrics,
            predictions,
            probabilities,
            probabilities_match_id,
        ) = validation.evaluate_model(pipeline, X_test, y_test, x_test_match_team_ids)

        class_accuracies = validation._compute_class_accuracies(
            pipeline, X_test, y_test
        )
        model_md = pipeline.named_steps["model"]
        if hasattr(model_md, "feature_importances_"):
            feature_names = X_train.columns
            feature_importances = model_md.feature_importances_
            feature_importances = zip(feature_names, feature_importances)

            feature_importances = pd.DataFrame(
                feature_importances, columns=["Feature", "Importance"]
            )
            feature_importances = feature_importances.sort_values(
                by="Importance", ascending=False
            )
        else:
            feature_importances = None
        confusion_matrices[key] = ModelTrainingResult(
            feature_importances=feature_importances,
            y_test=y_test,
            x_test=X_test,
            predictions=predictions,
            probabilities=probabilities,
            probabilities_match_id=probabilities_match_id,
            metrics=metrics,
            class_accuracies=class_accuracies,
        )

        extracted_features = extracted_features.drop(columns=["team_id"])

        cv_pipeline = get_pipeline(config=config)

        splits = list(group_kfold.split(extracted_features, labels, match_ids_full_df))

        # Compute cross-validation scores using precomputed splits
        cv_metrics_results = {
            metric: cross_val_score(
                cv_pipeline, extracted_features, labels,
                cv=splits,  # Use precomputed splits
                scoring=metric,
                n_jobs=-1  # Use all available cores
            )
            for metric in metrics_decls
        }

        cv_results[key] = {"n_samples": len(extracted_features)}
        for metric, scores in cv_metrics_results.items():
            cv_results[key][metric] = scores

# CUDA: 17.6s
# CPU: 1min 57.2ms

results_table = {}

for key in cv_results:
    if VERBOSE:
        print(f"\nResults for {key}")
        print(f'n-samples: {cv_results[key]["n_samples"]}')

    if "all_scores" in cv_results[key]:
        if VERBOSE:
            print(f'Best Score: {cv_results[key]["best_score"]:.3f}')
            print(f'Best Parameters: {cv_results[key]["best_params"]}')

        all_scores = cv_results[key]["all_scores"]
        param_scores = [
            (
                round(all_scores["mean_test_score"][i], 3),
                dict((param, all_scores["param_" + param][i]) for param in param_grid),
            )
            for i in range(len(all_scores["mean_test_score"]))
        ]
        if VERBOSE:
            print("All parameter sets and their scores:")
        for score, params in param_scores:
            print((score, params))

    else:
        results_table[key] = {}
        for metric in metrics_decls:
            results_table[key][metric] = round(np.mean(cv_results[key][metric]), 3)
            if VERBOSE:
                print(f"{metric}: {np.mean(cv_results[key][metric]):.3f}")

    bins = {">0.5": {}, "0.05 - 0.5": {}}

    for bin_name, features in bins.items():
        sorted_features = dict(
            sorted(features.items(), key=lambda item: round(item[1], 2), reverse=True)
        )
        if VERBOSE:
            print(f"{bin_name}: {sorted_features}")

metrics_df = pd.DataFrame.from_dict(results_table, orient="index").sort_values(
    by=["f1_macro"], ascending=False
)

/tmp/ipykernel_21044/1310694346.py:118: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.drop(columns=["match_api_id", "team_id"], inplace=True)
/tmp/ipykernel_21044/1310694346.py:118: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.drop(columns=["match_api_id", "team_id"], inplace=True)
/home/paulius/miniconda3/envs/rapids_v2/lib/python3.10/site-packages/sklearn/metrics/_classification.py:1471: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, msg_start, len(result))
/home/paulius/miniconda3/envs/rapids_v2/lib/python3.10/site-packages/sklearn/metrics/_classification.py:1471: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, msg_start, len(result))
/home/paulius/miniconda3/envs/rapids_v2/lib/python3.10/site-packages/sklearn/metrics/_classification.py:1471: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, msg_start, len(result))
/home/paulius/miniconda3/envs/rapids_v2/lib/python3.10/site-packages/sklearn/metrics/_classification.py:1471: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, msg_start, len(result))
/home/paulius/miniconda3/envs/rapids_v2/lib/python3.10/site-packages/sklearn/metrics/_classification.py:1471: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, msg_start, len(result))
/tmp/ipykernel_21044/1310694346.py:118: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.drop(columns=["match_api_id", "team_id"], inplace=True)
/tmp/ipykernel_21044/1310694346.py:118: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.drop(columns=["match_api_id", "team_id"], inplace=True)
/home/paulius/miniconda3/envs/rapids_v2/lib/python3.10/site-packages/sklearn/metrics/_classification.py:2922: UserWarning: The y_pred values do not sum to one. Starting from 1.5 thiswill result in an error.
  warnings.warn(
/home/paulius/miniconda3/envs/rapids_v2/lib/python3.10/site-packages/sklearn/metrics/_classification.py:2922: UserWarning: The y_pred values do not sum to one. Starting from 1.5 thiswill result in an error.
  warnings.warn(
/tmp/ipykernel_21044/1310694346.py:118: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.drop(columns=["match_api_id", "team_id"], inplace=True)
/tmp/ipykernel_21044/1310694346.py:118: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.drop(columns=["match_api_id", "team_id"], inplace=True)
/home/paulius/miniconda3/envs/rapids_v2/lib/python3.10/site-packages/sklearn/metrics/_classification.py:1471: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, msg_start, len(result))
/home/paulius/miniconda3/envs/rapids_v2/lib/python3.10/site-packages/sklearn/metrics/_classification.py:1471: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, msg_start, len(result))
/home/paulius/miniconda3/envs/rapids_v2/lib/python3.10/site-packages/sklearn/metrics/_classification.py:1471: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, msg_start, len(result))
/home/paulius/miniconda3/envs/rapids_v2/lib/python3.10/site-packages/sklearn/metrics/_classification.py:1471: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, msg_start, len(result))
/home/paulius/miniconda3/envs/rapids_v2/lib/python3.10/site-packages/sklearn/metrics/_classification.py:1471: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, msg_start, len(result))
/tmp/ipykernel_21044/1310694346.py:118: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.drop(columns=["match_api_id", "team_id"], inplace=True)
/tmp/ipykernel_21044/1310694346.py:118: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.drop(columns=["match_api_id", "team_id"], inplace=True)
/home/paulius/miniconda3/envs/rapids_v2/lib/python3.10/site-packages/sklearn/metrics/_classification.py:1471: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, msg_start, len(result))
/home/paulius/miniconda3/envs/rapids_v2/lib/python3.10/site-packages/sklearn/metrics/_classification.py:1471: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, msg_start, len(result))
/home/paulius/miniconda3/envs/rapids_v2/lib/python3.10/site-packages/sklearn/metrics/_classification.py:1471: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, msg_start, len(result))
/home/paulius/miniconda3/envs/rapids_v2/lib/python3.10/site-packages/sklearn/metrics/_classification.py:1471: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, msg_start, len(result))
/home/paulius/miniconda3/envs/rapids_v2/lib/python3.10/site-packages/sklearn/metrics/_classification.py:1471: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, msg_start, len(result))
/tmp/ipykernel_21044/1310694346.py:118: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.drop(columns=["match_api_id", "team_id"], inplace=True)
/tmp/ipykernel_21044/1310694346.py:118: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.drop(columns=["match_api_id", "team_id"], inplace=True)
/home/paulius/miniconda3/envs/rapids_v2/lib/python3.10/site-packages/sklearn/metrics/_classification.py:1471: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, msg_start, len(result))
/home/paulius/miniconda3/envs/rapids_v2/lib/python3.10/site-packages/sklearn/metrics/_classification.py:1471: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, msg_start, len(result))
/home/paulius/miniconda3/envs/rapids_v2/lib/python3.10/site-packages/sklearn/metrics/_classification.py:1471: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, msg_start, len(result))
/tmp/ipykernel_21044/1310694346.py:118: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.drop(columns=["match_api_id", "team_id"], inplace=True)
/tmp/ipykernel_21044/1310694346.py:118: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.drop(columns=["match_api_id", "team_id"], inplace=True)

# full_df_model["stage"].value_counts()

# time series cross-validation/rolling-window cross-validation based approach
max_stage = full_df_model['stage'].max()
stage_chunk_size = 10
seasons = sorted(full_df_model['season_start_year'].unique())
stages = range(0, max_stage + 1, stage_chunk_size)

initial_seasons = [2008, 2009, 2010, 2011]
subsequent_seasons = [season for season in seasons if season not in initial_seasons]

time_based_metrics = {key: {} for key in feature_set__map}
for key, config in feature_set__map.items():
    extracted_features = get_feature_set_data(config["feature_set"], full_df_model, time_based=True)
    cap_inf(extracted_features)
    if not config["supports_nan"]:
        extracted_features = extracted_features.dropna()

    labels = extracted_features['result']

    # Define training features
    training_features = extracted_features.drop(
        columns=['result', 'season_start_year', 'match_api_id', 'team_id', 'stage'])

    # Initial training set
    initial_train_mask = extracted_features['season_start_year'].isin(initial_seasons)
    X_train_initial, y_train_initial = training_features[initial_train_mask], labels[initial_train_mask]

    # Initialize pipeline with initial training data
    pipeline = get_pipeline(config)
    pipeline.fit(X_train_initial, y_train_initial)

    # Process each subsequent season
    for season in subsequent_seasons:
        for start_stage in stages:
            end_stage = min(start_stage + stage_chunk_size, max_stage + 1)

            # Define test mask for the current chunk
            test_chunk = (extracted_features['season_start_year'] == season) & \
                         (extracted_features['stage'] >= start_stage) & \
                         (extracted_features['stage'] < end_stage)

            match_api_ids = extracted_features[test_chunk][['match_api_id', 'team_id']]
            X_test, y_test = training_features[test_chunk], labels[test_chunk]

            # computed_metrics = compute_metrics(pipeline, X_test, y_test)
            # Evaluate the model on the test set
            (
                metrics,
                predictions,
                probabilities,
                probabilities_match_id,
            ) = validation.evaluate_model(pipeline, X_test, y_test, match_api_ids)

            cv_results[f"{key}_Season_{season}_Stages_{start_stage}_to_{end_stage}"] = metrics

            # Retrain the model with data including the current chunk
            current_train_mask = (extracted_features['season_start_year'] < season) | \
                                 ((extracted_features['season_start_year'] == season) & \
                                  (extracted_features['stage'] < end_stage))
            X_train_current, y_train_current = training_features[current_train_mask], labels[current_train_mask]

            pipeline = get_pipeline(config)
            pipeline.fit(X_train_current, y_train_current)

            season_stage_key = (season, start_stage, end_stage)
            time_based_metrics[key][season_stage_key] = metrics
            time_based_metrics[key][season_stage_key]['n_samples_cumulative'] = len(y_train_current) + len(y_test)
            time_based_metrics[key][season_stage_key]['n_samples'] = len(y_test)
            
            if VERBOSE:
                print(
                    f"{season_stage_key}: {len(X_train_current)} - ({len(X_train_current) / len(extracted_features):.2%})")

/home/paulius/miniconda3/envs/rapids_v2/lib/python3.10/site-packages/sklearn/metrics/_classification.py:2922: UserWarning: The y_pred values do not sum to one. Starting from 1.5 thiswill result in an error.
  warnings.warn(
/home/paulius/miniconda3/envs/rapids_v2/lib/python3.10/site-packages/sklearn/metrics/_classification.py:2922: UserWarning: The y_pred values do not sum to one. Starting from 1.5 thiswill result in an error.
  warnings.warn(
/home/paulius/miniconda3/envs/rapids_v2/lib/python3.10/site-packages/sklearn/metrics/_classification.py:2922: UserWarning: The y_pred values do not sum to one. Starting from 1.5 thiswill result in an error.
  warnings.warn(
/home/paulius/miniconda3/envs/rapids_v2/lib/python3.10/site-packages/sklearn/metrics/_classification.py:2922: UserWarning: The y_pred values do not sum to one. Starting from 1.5 thiswill result in an error.
  warnings.warn(
/home/paulius/miniconda3/envs/rapids_v2/lib/python3.10/site-packages/sklearn/metrics/_classification.py:2922: UserWarning: The y_pred values do not sum to one. Starting from 1.5 thiswill result in an error.
  warnings.warn(
/home/paulius/miniconda3/envs/rapids_v2/lib/python3.10/site-packages/sklearn/metrics/_classification.py:2922: UserWarning: The y_pred values do not sum to one. Starting from 1.5 thiswill result in an error.
  warnings.warn(
/home/paulius/miniconda3/envs/rapids_v2/lib/python3.10/site-packages/sklearn/metrics/_classification.py:2922: UserWarning: The y_pred values do not sum to one. Starting from 1.5 thiswill result in an error.
  warnings.warn(
/home/paulius/miniconda3/envs/rapids_v2/lib/python3.10/site-packages/sklearn/metrics/_classification.py:2922: UserWarning: The y_pred values do not sum to one. Starting from 1.5 thiswill result in an error.
  warnings.warn(
/home/paulius/miniconda3/envs/rapids_v2/lib/python3.10/site-packages/sklearn/metrics/_classification.py:2922: UserWarning: The y_pred values do not sum to one. Starting from 1.5 thiswill result in an error.
  warnings.warn(
/home/paulius/miniconda3/envs/rapids_v2/lib/python3.10/site-packages/sklearn/metrics/_classification.py:2922: UserWarning: The y_pred values do not sum to one. Starting from 1.5 thiswill result in an error.
  warnings.warn(
/home/paulius/miniconda3/envs/rapids_v2/lib/python3.10/site-packages/sklearn/metrics/_classification.py:2922: UserWarning: The y_pred values do not sum to one. Starting from 1.5 thiswill result in an error.
  warnings.warn(
/home/paulius/miniconda3/envs/rapids_v2/lib/python3.10/site-packages/sklearn/metrics/_classification.py:2922: UserWarning: The y_pred values do not sum to one. Starting from 1.5 thiswill result in an error.
  warnings.warn(
/home/paulius/miniconda3/envs/rapids_v2/lib/python3.10/site-packages/sklearn/metrics/_classification.py:2922: UserWarning: The y_pred values do not sum to one. Starting from 1.5 thiswill result in an error.
  warnings.warn(
/home/paulius/miniconda3/envs/rapids_v2/lib/python3.10/site-packages/sklearn/metrics/_classification.py:2922: UserWarning: The y_pred values do not sum to one. Starting from 1.5 thiswill result in an error.
  warnings.warn(
/home/paulius/miniconda3/envs/rapids_v2/lib/python3.10/site-packages/sklearn/metrics/_classification.py:2922: UserWarning: The y_pred values do not sum to one. Starting from 1.5 thiswill result in an error.
  warnings.warn(
/home/paulius/miniconda3/envs/rapids_v2/lib/python3.10/site-packages/sklearn/metrics/_classification.py:2922: UserWarning: The y_pred values do not sum to one. Starting from 1.5 thiswill result in an error.
  warnings.warn(
/home/paulius/miniconda3/envs/rapids_v2/lib/python3.10/site-packages/sklearn/metrics/_classification.py:2922: UserWarning: The y_pred values do not sum to one. Starting from 1.5 thiswill result in an error.
  warnings.warn(
/home/paulius/miniconda3/envs/rapids_v2/lib/python3.10/site-packages/sklearn/metrics/_classification.py:2922: UserWarning: The y_pred values do not sum to one. Starting from 1.5 thiswill result in an error.
  warnings.warn(
/home/paulius/miniconda3/envs/rapids_v2/lib/python3.10/site-packages/sklearn/metrics/_classification.py:2922: UserWarning: The y_pred values do not sum to one. Starting from 1.5 thiswill result in an error.
  warnings.warn(
/home/paulius/miniconda3/envs/rapids_v2/lib/python3.10/site-packages/sklearn/metrics/_classification.py:2922: UserWarning: The y_pred values do not sum to one. Starting from 1.5 thiswill result in an error.
  warnings.warn(
/home/paulius/miniconda3/envs/rapids_v2/lib/python3.10/site-packages/sklearn/metrics/_classification.py:2922: UserWarning: The y_pred values do not sum to one. Starting from 1.5 thiswill result in an error.
  warnings.warn(
/home/paulius/miniconda3/envs/rapids_v2/lib/python3.10/site-packages/sklearn/metrics/_classification.py:2922: UserWarning: The y_pred values do not sum to one. Starting from 1.5 thiswill result in an error.
  warnings.warn(
/home/paulius/miniconda3/envs/rapids_v2/lib/python3.10/site-packages/sklearn/metrics/_classification.py:2922: UserWarning: The y_pred values do not sum to one. Starting from 1.5 thiswill result in an error.
  warnings.warn(
/home/paulius/miniconda3/envs/rapids_v2/lib/python3.10/site-packages/sklearn/metrics/_classification.py:2922: UserWarning: The y_pred values do not sum to one. Starting from 1.5 thiswill result in an error.
  warnings.warn(
/home/paulius/miniconda3/envs/rapids_v2/lib/python3.10/site-packages/sklearn/metrics/_classification.py:2922: UserWarning: The y_pred values do not sum to one. Starting from 1.5 thiswill result in an error.
  warnings.warn(
/home/paulius/miniconda3/envs/rapids_v2/lib/python3.10/site-packages/sklearn/metrics/_classification.py:2922: UserWarning: The y_pred values do not sum to one. Starting from 1.5 thiswill result in an error.
  warnings.warn(
/home/paulius/miniconda3/envs/rapids_v2/lib/python3.10/site-packages/sklearn/metrics/_classification.py:2922: UserWarning: The y_pred values do not sum to one. Starting from 1.5 thiswill result in an error.
  warnings.warn(
/home/paulius/miniconda3/envs/rapids_v2/lib/python3.10/site-packages/sklearn/metrics/_classification.py:2922: UserWarning: The y_pred values do not sum to one. Starting from 1.5 thiswill result in an error.
  warnings.warn(
/home/paulius/miniconda3/envs/rapids_v2/lib/python3.10/site-packages/sklearn/metrics/_classification.py:2922: UserWarning: The y_pred values do not sum to one. Starting from 1.5 thiswill result in an error.
  warnings.warn(
/home/paulius/miniconda3/envs/rapids_v2/lib/python3.10/site-packages/sklearn/metrics/_classification.py:2922: UserWarning: The y_pred values do not sum to one. Starting from 1.5 thiswill result in an error.
  warnings.warn(
/home/paulius/miniconda3/envs/rapids_v2/lib/python3.10/site-packages/sklearn/metrics/_classification.py:2922: UserWarning: The y_pred values do not sum to one. Starting from 1.5 thiswill result in an error.
  warnings.warn(
/home/paulius/miniconda3/envs/rapids_v2/lib/python3.10/site-packages/sklearn/metrics/_classification.py:2922: UserWarning: The y_pred values do not sum to one. Starting from 1.5 thiswill result in an error.
  warnings.warn(

metrics_df

weighted_averages = {}
for key, data in time_based_metrics.items():
    metrics_sum = {metric: 0 for metric in data[next(iter(data))]}
    n_test_samples = sum(chunk['n_samples'] for chunk in data.values())
    last_chunk = next(reversed(data.values()))

    for chunk in data.values():
        for metric, value in chunk.items():
            if metric != 'n_samples':
                metrics_sum[metric] += value * chunk['n_samples']

    weighted_averages[key] = {metric: metrics_sum[metric] / n_test_samples if n_test_samples > 0 else 0 for metric in
                              metrics_sum}
    weighted_averages[key]['n_train_sample'] = last_chunk['n_samples_cumulative']
    weighted_averages[key]['n_test_samples'] = n_test_samples
    weighted_averages[key]['average_samples_per_chunk'] = n_test_samples / len(data) if data else 0

    weighted_averages[key].pop('n_samples')
    weighted_averages[key].pop('n_samples_cumulative')

df_weighted_avg = pd.DataFrame(weighted_averages).transpose().sort_values('f1', ascending=False)
column_order = ['average_samples_per_chunk', 'n_test_samples', 'n_train_sample', 'log_loss', 'f1', 'accuracy',
                'precision', 'recall']
df_weighted_avg = df_weighted_avg.reindex(columns=column_order)
round(df_weighted_avg, 3)

SHOW_TIME_DETAILS = True

def calculate_summary_statistics(df, column):
    model = LinearRegression().fit(df[['n_samples']], df[column])
    r2 = r2_score(df[column], model.predict(df[['n_samples']]))

    if column != 'n_samples':
        slope, intercept, r_value, p_value, std_err = linregress(df['n_samples'], df[column])
    else:
        slope, intercept, r_value, p_value, std_err = np.nan, np.nan, np.nan, np.nan, np.nan
    # The best possible score is 1.0 and it can be negative (because the model can be arbitrarily worse). A constant model that always predicts the expected value of y, disregarding the input features, would get a  score of 0.0

    # ANOVA for metric
    # anova_result = f_oneway(*[df[df['chunk'] == chunk][column] for chunk in
    #                           df['chunk'].unique()]).pvalue  # Corrected to directly access pvalue

    # The p-value for a hypothesis test whose null hypothesis is that the slope is zero, using Wald Test with t-distribution of the test statistic. See alternative above for alternative hypotheses.

    # e.g. p ~ 0 then This means that there is a significant linear relationship between the 'n_samples' variable and the 'column' variable in your dataset
    d = {
        "slope": slope,
        "intercept": intercept,
        "r_value": r_value,
        "p_value": p_value,
        "std_err": std_err,
        # 'R2': r2,
        # # 'anova_result': anova_result,
        # 'coefficient': coeff,
        'average': df[column].mean(),
        'median': df[column].median(),
        'std.dev': df[column].std(),
        'variance': df[column].var()
    }

    d = {k: round(v, 3) for (k, v) in d.items()}
    return d


metrics_list = ['f1', 'accuracy', 'log_loss']

model_time_info = {}
for key, chunks in time_based_metrics.items():
    data_for_dfs = {metric: [] for metric in metrics_list}

    for (season, start, end), metrics in chunks.items():
        index_tuple = (season, start, end)
        n_samples = metrics.get('n_samples', np.nan)  # Get the number of samples
        n_samples_cumulative = metrics.get('n_samples_cumulative', np.nan)  # Get the number of samples
        for metric in metrics_list:
            metric_value = metrics.get(metric, np.nan)
            data_for_dfs[metric].append(
                {'key': key, 'chunk': index_tuple, 'n_samples': n_samples, 'n_samples_cumulative': n_samples_cumulative,
                 metric: metric_value})
    model_time_info[key] = data_for_dfs

dfs_metrics = {}

metrics_time_viz_dfs = {}

for model_name, data_for_dfs in model_time_info.items():

    if VERBOSE or SHOW_TIME_DETAILS:
        display(Markdown(f"#### {model_name.title()}"))

    for i, metric in enumerate(metrics_list):
        df = pd.DataFrame(data_for_dfs[metric])
        dfs_metrics[metric] = df

        metric_stats = calculate_summary_statistics(df, metric)
        n_samples_stats = calculate_summary_statistics(df, 'n_samples')

        data = {
            metric: metric_stats,
            'n_samples': n_samples_stats
        }

        summary_df = pd.DataFrame(data)

        if VERBOSE or SHOW_TIME_DETAILS:
            display(Markdown(f"##### {metric.title()}"))
            display(df)
            display(summary_df)

        if not metric in metrics_time_viz_dfs:
            metrics_time_viz_dfs[metric] = []

        for index, row in df.iterrows():
            metrics_time_viz_dfs[metric].append({
                "y": row[metric],
                "x": index,
                'dataset': model_name
            })
# df

for metric, data in metrics_time_viz_dfs.items():
    _df = pd.DataFrame(data)

    g = sns.lmplot(
        data=_df, x="x", y="y", col="dataset", hue="dataset",
        col_wrap=3,
        palette="muted",
        # ci=25,
        # robust=True,
        height=6,
        scatter_kws={"s": 25, "alpha": 0.75}
    )
    
    if metric == "log_loss":
        g.set(ylim=(0.75, 1.25))

    
    plt.subplots_adjust(top=0.9) 
    plt.figtext(0.5, 0.95, metric, ha="center", fontsize=24)
    
    plt.show()

if VERBOSE:
    for k, v in confusion_matrices.items():
        display(k)
        display(confusion_matrices[k].metrics)

%matplotlib inline


importlib.reload(graph)
# sns.set_theme(style='dark', palette='pastel')

n = len(confusion_matrices)
columns = 2
rows = (n + 1) // columns
height = 8
width = height * columns

fig, axes = plt.subplots(
    rows, columns, figsize=(width, height * rows), constrained_layout=True
)
plt.suptitle("Confusion Matrices: Best Models based on f1", fontsize=20)

confusion_matrix_labels = ["Loss", "Draw", "Win"]
confusion_matrix_axis_label = "Match Result"


def make_annotations(cv_info: dict):
    return f"log_loss={cv_info['log_loss']:.2f}, f1={cv_info['f1']:.2f}, precision={cv_info['precision']:.2f}, recall={cv_info['recall']:.2f}, accuracy={cv_info['accuracy']:.2f}"


axes_flat = axes.flatten()
for i, (model_key, matrix_data) in enumerate(confusion_matrices.items()):
    graph.confusion_matrix_plot(
        confusion_matrices[model_key],
        title=model_key,
        axis_label=confusion_matrix_axis_label,
        ax=axes_flat[i],
        labels=confusion_matrix_labels,
        annotations=make_annotations(confusion_matrices[model_key].metrics),
    )

# Hide any unused axes
for j in range(i + 1, len(axes_flat)):
    axes_flat[j].axis("off")

plt.show()

display(Markdown("##### Probability of game ending in a draw by league:"))
overall_prob = pd.DataFrame(
    {"Probability": [full_df["result"].eq(0).mean()]}, index=["Overall"]
)
league_probs = (
    full_df.groupby("league_name")["result"]
    .apply(lambda x: (x == 0).mean())
    .to_frame("Probability")
)
output_df = round(pd.concat([overall_prob, league_probs]), 2)

with pd.option_context("display.max_rows", None, "display.max_columns", None):
    display(output_df)

import matplotlib.pyplot as plt

importlib.reload(graph)

n_classes = 3  # Assuming 3 classes [0, 1, 2]
fig, axes = plt.subplots(
    len(confusion_matrices),
    n_classes,
    figsize=(25, 5 * len(confusion_matrices)),
    constrained_layout=True,
)
plt.suptitle("ROC Curves for Each Model and Class", fontsize=16, y=1.02)

for i, (model_key, matrix_data) in enumerate(confusion_matrices.items()):
    y_true = matrix_data.y_test
    y_probs = matrix_data.probabilities

    if n_classes % 2 == 0:
        middle_plot_index = n_classes // 2 - 1
    else:
        middle_plot_index = n_classes // 2

    axes[i, 0].text(
        -0.1,
        1.2,
        model_key,
        ha="left",
        va="center",
        transform=axes[i, 0].transAxes,
        fontsize=22,
    )

    for class_idx in range(n_classes):
        valid_indices = y_probs.iloc[:, class_idx].dropna().index
        y_true_binary = (y_true.loc[valid_indices] == class_idx).astype(int)
        y_probs_filtered = y_probs.loc[valid_indices, class_idx]

        n = y_true_binary.shape[0]  # Row count for the class
        graph.roc_curve_plot(
            y_true=y_true_binary,
            y_probs=y_probs_filtered,
            ax=axes[i, class_idx],
            labels=confusion_matrix_labels,
            annotations=make_annotations(matrix_data.metrics),
            class_idx=class_idx,
            n=n,
        )

plt.show()

# benchmark_model_target_log = "Logistic | Team Rating + Home"
benchmark_model_target_log = "Logistic | Full Ratios"
cm_model_test_last_log: ModelTrainingResult = confusion_matrices[
    benchmark_model_target_log
]

benchmark_model_target_xgb = "Baseline |XGBoost"
cm_model_test_last_xgb: ModelTrainingResult = confusion_matrices[
    benchmark_model_target_xgb
]

odds_model_target = "Naive | Average Betting Odds"
odds_model_test_last: ModelTrainingResult = confusion_matrices[
    odds_model_target
]

importlib.reload(graph)

feature_importances = cm_model_test_last_xgb.feature_importances
graph.render_feature_importances_chart(
    feature_importances, title="Feature Importance", subtitle=benchmark_model_target_xgb
)

/home/paulius/data/projects/football_m2_s4/workbench/src/graph.py:631: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator.
  ax.set_yticklabels(

#### TODO: just add an XGBoost config with only home/rating
#### TODO: ideally also add logistic configs with rolling goal, points etc. ratios

if VERBOSE:
    full_df[[c for c in full_df.columns if "odds" in c] + ["result"]][
        "result"
    ].value_counts()

if VERBOSE:
    importlib.reload(validation)
    validation.calculate_brier_score(full_df)

if VERBOSE:
    importlib.reload(simulation)
    simulation.calculate_company_profit(full_df, 1000000)

log_model_probs = cm_model_test_last_log.probabilities_match_id
xgb_model_probs = cm_model_test_last_xgb.probabilities_match_id

if VERBOSE:
    full_df[full_df["match_api_id"] == 2060427]

importlib.reload(validation)
probs_benchmark_log = validation.benchmark_model_probs(log_model_probs, full_df_model)
probs_benchmark_xgb = validation.benchmark_model_probs(xgb_model_probs, full_df_model)

/home/paulius/data/projects/football_m2_s4/workbench/src/validation.py:128: FutureWarning: Support for multi-dimensional indexing (e.g. `obj[:, None]`) is deprecated and will be removed in a future version.  Convert to a numpy array before indexing instead.
  combined_df[["prob_win", "prob_draw", "prob_loss"]] /= total_prob[:, None]
/home/paulius/data/projects/football_m2_s4/workbench/src/validation.py:128: FutureWarning: Support for multi-dimensional indexing (e.g. `obj[:, None]`) is deprecated and will be removed in a future version.  Convert to a numpy array before indexing instead.
  combined_df[["prob_win", "prob_draw", "prob_loss"]] /= total_prob[:, None]

if VERBOSE:
    probs_benchmark_log

if VERBOSE:
    probs_benchmark_xgb

importlib.reload(graph)
graph.render_by_league_beanchmark_scores(
    probs_benchmark_log, metric="log_loss", model_name=benchmark_model_target_log
)
graph.render_by_league_beanchmark_scores(
    probs_benchmark_xgb, metric="log_loss", model_name=benchmark_model_target_xgb
)

importlib.reload(graph)
graph.plot_threshold_metrics(
    cm_model_test_last_log, 0.3, 1.0, model_name=benchmark_model_target_log
)

graph.plot_threshold_metrics(
    cm_model_test_last_xgb, 0.3, 1.0, model_name=benchmark_model_target_xgb
)

importlib.reload(graph)
graph.plot_threshold_metrics(
    odds_model_test_last, 0.3, 1.0, model_name=odds_model_target
)

/home/paulius/miniconda3/envs/rapids_v2/lib/python3.10/site-packages/sklearn/metrics/_classification.py:2922: UserWarning: The y_pred values do not sum to one. Starting from 1.5 thiswill result in an error.
  warnings.warn(
/home/paulius/miniconda3/envs/rapids_v2/lib/python3.10/site-packages/sklearn/metrics/_classification.py:2922: UserWarning: The y_pred values do not sum to one. Starting from 1.5 thiswill result in an error.
  warnings.warn(
/home/paulius/miniconda3/envs/rapids_v2/lib/python3.10/site-packages/sklearn/metrics/_classification.py:2922: UserWarning: The y_pred values do not sum to one. Starting from 1.5 thiswill result in an error.
  warnings.warn(
/home/paulius/miniconda3/envs/rapids_v2/lib/python3.10/site-packages/sklearn/metrics/_classification.py:2922: UserWarning: The y_pred values do not sum to one. Starting from 1.5 thiswill result in an error.
  warnings.warn(
/home/paulius/miniconda3/envs/rapids_v2/lib/python3.10/site-packages/sklearn/metrics/_classification.py:2922: UserWarning: The y_pred values do not sum to one. Starting from 1.5 thiswill result in an error.
  warnings.warn(
/home/paulius/miniconda3/envs/rapids_v2/lib/python3.10/site-packages/sklearn/metrics/_classification.py:2922: UserWarning: The y_pred values do not sum to one. Starting from 1.5 thiswill result in an error.
  warnings.warn(
/home/paulius/miniconda3/envs/rapids_v2/lib/python3.10/site-packages/sklearn/metrics/_classification.py:2922: UserWarning: The y_pred values do not sum to one. Starting from 1.5 thiswill result in an error.
  warnings.warn(
/home/paulius/miniconda3/envs/rapids_v2/lib/python3.10/site-packages/sklearn/metrics/_classification.py:2922: UserWarning: The y_pred values do not sum to one. Starting from 1.5 thiswill result in an error.
  warnings.warn(
/home/paulius/miniconda3/envs/rapids_v2/lib/python3.10/site-packages/sklearn/metrics/_classification.py:2922: UserWarning: The y_pred values do not sum to one. Starting from 1.5 thiswill result in an error.
  warnings.warn(
/home/paulius/miniconda3/envs/rapids_v2/lib/python3.10/site-packages/sklearn/metrics/_classification.py:2922: UserWarning: The y_pred values do not sum to one. Starting from 1.5 thiswill result in an error.
  warnings.warn(
/home/paulius/miniconda3/envs/rapids_v2/lib/python3.10/site-packages/sklearn/metrics/_classification.py:2922: UserWarning: The y_pred values do not sum to one. Starting from 1.5 thiswill result in an error.
  warnings.warn(
/home/paulius/miniconda3/envs/rapids_v2/lib/python3.10/site-packages/sklearn/metrics/_classification.py:2922: UserWarning: The y_pred values do not sum to one. Starting from 1.5 thiswill result in an error.
  warnings.warn(
/home/paulius/miniconda3/envs/rapids_v2/lib/python3.10/site-packages/sklearn/metrics/_classification.py:2922: UserWarning: The y_pred values do not sum to one. Starting from 1.5 thiswill result in an error.
  warnings.warn(

importlib.reload(validation)
importlib.reload(graph)
T = 0.9  # Probability threshold, include only rows where any prob. is higher than T.

for _model in [
    (benchmark_model_target_log, cm_model_test_last_log),
    (benchmark_model_target_xgb, cm_model_test_last_xgb),
]:
    filtered_model_training_result: ModelTrainingResult = (
        validation.filter_matches_above_threshold(_model[1], T)
    )

    graph.confusion_matrix_plot(
        model_info=filtered_model_training_result,
        title="Confusion Matrix for High Probability Matches",
        subtitle=_model[0],
        annotations=make_annotations(filtered_model_training_result.metrics),
        include_sample_count=True,
    )

    plt.show()

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

subset_probs = filtered_model_training_result.probabilities_match_id

if VERBOSE:
    validation.benchmark_model_probs(subset_probs, full_df_model, by_league=False)

importlib.reload(simulation)
bet_strats = {
    "Naive (fixed amount per bet)": dict(use_kelly=False),
    "Use (adjusted 0.8/1) Kelly Criterion": dict(
        use_kelly=True, min_range=0.8, max_range=1
    ),
    "Use (adjusted 0.85/1.15) Kelly Criterion": dict(
        use_kelly=True, min_range=0.85, max_range=1.15
    ),
    "Use (adjusted 0.9/1) Kelly Criterion": dict(
        use_kelly=True, min_range=0.9, max_range=1
    ),
    "Use (adjusted 0.91/1.2) Kelly Criterion": dict(
        use_kelly=True, min_range=0.91, max_range=1.2
    ),
}

for key, v in bet_strats.items():
    (
        total_profit_or_loss,
        total_bet_amount,
        total_bets,
        total_valid_matches,
    ) = simulation.evaluate_betting_strategy(
        filtered_model_training_result,
        full_df_model[
            [
                "match_api_id",
                "team_id",
                "win_odds",
                "draw_odds",
                "opponent_win_odds",
                "result",
            ]
        ],
        bankroll=1000,
        **v,
    )
    return_rate = total_profit_or_loss / total_bet_amount
    print(f"\n\n --- \n{key}:")
    print(
        f"Total bets:{total_bets}/{total_valid_matches}\nTotal bet amount: {total_bet_amount:2f}\n Profit: {total_profit_or_loss:.2f}\n Return:{return_rate:.2%}"
    )


 --- 
Naive (fixed amount per bet):
Total bets:221/221
Total bet amount: 1000.000000
 Profit: 30.86
 Return:3.09%


 --- 
Use (adjusted 0.8/1) Kelly Criterion:
Total bets:221/221
Total bet amount: 10733.832842
 Profit: 380.01
 Return:3.54%


 --- 
Use (adjusted 0.85/1.15) Kelly Criterion:
Total bets:160/221
Total bet amount: 3430.006859
 Profit: 123.87
 Return:3.61%


 --- 
Use (adjusted 0.9/1) Kelly Criterion:
Total bets:84/221
Total bet amount: 3572.333641
 Profit: 175.47
 Return:4.91%


 --- 
Use (adjusted 0.91/1.2) Kelly Criterion:
Total bets:75/221
Total bet amount: 925.719984
 Profit: 52.99
 Return:5.72%

	accuracy	precision_macro	recall_macro	f1_macro
Baseline \|XGBoost	0.507	0.446	0.459	0.418
Naive \| Average Betting Odds	0.533	0.590	0.476	0.410
Logistic \| Full Ratios	0.513	0.342	0.458	0.392
Logistic \| Team Rating + Home	0.507	0.338	0.454	0.388
Logistic \| Team Rating	0.494	0.329	0.442	0.377
Naive \| Home Advantage	0.457	0.305	0.409	0.349

	average_samples_per_chunk	n_test_samples	n_train_sample	log_loss	f1	accuracy	precision	recall
Baseline \|XGBoost	1617.875	25886.0	53158.0	1.019	0.410	0.497	0.424	0.450
Naive \| Average Betting Odds	1405.125	22482.0	46308.0	0.973	0.404	0.528	0.363	0.471
Logistic \| Full Ratios	1551.625	24826.0	49550.0	1.021	0.377	0.494	0.329	0.441
Logistic \| Team Rating	1617.875	25886.0	51642.0	1.042	0.377	0.493	0.329	0.441
Logistic \| Team Rating + Home	1617.875	25886.0	51642.0	1.026	0.369	0.484	0.322	0.433
Naive \| Home Advantage	1617.875	25886.0	53158.0	19.889	0.342	0.448	0.299	0.401

	key	chunk	n_samples	n_samples_cumulative	f1
0	Naive \| Home Advantage	(2012, 0, 10)	1674	29420	0.3280
1	Naive \| Home Advantage	(2012, 10, 20)	1860	31466	0.3428
2	Naive \| Home Advantage	(2012, 20, 30)	1860	33326	0.3492
3	Naive \| Home Advantage	(2012, 30, 39)	1126	33718	0.3364
4	Naive \| Home Advantage	(2013, 0, 10)	1554	35700	0.3503
5	Naive \| Home Advantage	(2013, 10, 20)	1700	37546	0.3531
6	Naive \| Home Advantage	(2013, 20, 30)	1700	39246	0.3518
7	Naive \| Home Advantage	(2013, 30, 39)	1110	39766	0.3495
8	Naive \| Home Advantage	(2014, 0, 10)	1692	42040	0.3443
9	Naive \| Home Advantage	(2014, 10, 20)	1880	44108	0.3352
10	Naive \| Home Advantage	(2014, 20, 30)	1880	45988	0.3531
11	Naive \| Home Advantage	(2014, 30, 39)	1198	46504	0.3403
12	Naive \| Home Advantage	(2015, 0, 10)	1692	48690	0.3422
13	Naive \| Home Advantage	(2015, 10, 20)	1880	50758	0.3226
14	Naive \| Home Advantage	(2015, 20, 30)	1880	52638	0.3368
15	Naive \| Home Advantage	(2015, 30, 39)	1200	53158	0.3446

	f1	n_samples
slope	-0.000	NaN
intercept	0.348	NaN
r_value	-0.108	NaN
p_value	0.691	NaN
std_err	0.000	NaN
average	0.343	1617.875
median	0.344	1696.000
std.dev	0.009	291.500
variance	0.000	84972.517

	key	chunk	n_samples	n_samples_cumulative	accuracy
0	Naive \| Home Advantage	(2012, 0, 10)	1674	29420	0.4229
1	Naive \| Home Advantage	(2012, 10, 20)	1860	31466	0.4484
2	Naive \| Home Advantage	(2012, 20, 30)	1860	33326	0.4548
3	Naive \| Home Advantage	(2012, 30, 39)	1126	33718	0.4440
4	Naive \| Home Advantage	(2013, 0, 10)	1554	35700	0.4582
5	Naive \| Home Advantage	(2013, 10, 20)	1700	37546	0.4682
6	Naive \| Home Advantage	(2013, 20, 30)	1700	39246	0.4588
7	Naive \| Home Advantage	(2013, 30, 39)	1110	39766	0.4685
8	Naive \| Home Advantage	(2014, 0, 10)	1692	42040	0.4468
9	Naive \| Home Advantage	(2014, 10, 20)	1880	44108	0.4394
10	Naive \| Home Advantage	(2014, 20, 30)	1880	45988	0.4617
11	Naive \| Home Advantage	(2014, 30, 39)	1198	46504	0.4491
12	Naive \| Home Advantage	(2015, 0, 10)	1692	48690	0.4456
13	Naive \| Home Advantage	(2015, 10, 20)	1880	50758	0.4223
14	Naive \| Home Advantage	(2015, 20, 30)	1880	52638	0.4362
15	Naive \| Home Advantage	(2015, 30, 39)	1200	53158	0.4583

	log_loss	n_samples
slope	0.001	NaN
intercept	18.958	NaN
r_value	0.325	NaN
p_value	0.220	NaN
std_err	0.000	NaN
average	19.862	1617.875
median	19.870	1696.000
std.dev	0.501	291.500
variance	0.251	84972.517

	key	chunk	n_samples	n_samples_cumulative	f1
0	Naive \| Average Betting Odds	(2012, 0, 10)	1428	25568	0.3917
1	Naive \| Average Betting Odds	(2012, 10, 20)	1600	27340	0.3895
2	Naive \| Average Betting Odds	(2012, 20, 30)	1596	28932	0.4038
3	Naive \| Average Betting Odds	(2012, 30, 39)	1040	29416	0.4250
4	Naive \| Average Betting Odds	(2013, 0, 10)	1296	30968	0.4190
5	Naive \| Average Betting Odds	(2013, 10, 20)	1440	32552	0.4200
6	Naive \| Average Betting Odds	(2013, 20, 30)	1438	33988	0.4034
7	Naive \| Average Betting Odds	(2013, 30, 39)	1024	34598	0.4118
8	Naive \| Average Betting Odds	(2014, 0, 10)	1458	36490	0.4004
9	Naive \| Average Betting Odds	(2014, 10, 20)	1620	38272	0.4024
10	Naive \| Average Betting Odds	(2014, 20, 30)	1618	39888	0.4172
11	Naive \| Average Betting Odds	(2014, 30, 39)	1112	40494	0.3975
12	Naive \| Average Betting Odds	(2015, 0, 10)	1458	42298	0.3978
13	Naive \| Average Betting Odds	(2015, 10, 20)	1620	44080	0.3869
14	Naive \| Average Betting Odds	(2015, 20, 30)	1620	45700	0.4013
15	Naive \| Average Betting Odds	(2015, 30, 39)	1114	46308	0.4004

	Probability
Overall	0.25
Belgium Jupiler League	0.25
England Premier League	0.26
France Ligue 1	0.28
Germany 1. Bundesliga	0.24
Italy Serie A	0.26
Netherlands Eredivisie	0.24
Poland Ekstraklasa	0.27
Portugal Liga ZON Sagres	0.26
Scotland Premier League	0.25
Spain LIGA BBVA	0.23
Switzerland Super League	0.24

Model and Strategy¶

Preparing Data¶

Data Transformation:¶

Concerns¶

Encoding Categorical Variables:¶

Building a Classification Model¶

Model and Variable Selection¶

Pipeline¶

Training and Validation¶

Random Sampling¶

Time Based Cross Validation¶

Results¶

Naive | Home Advantage¶

F1¶

Accuracy¶

Log_Loss¶

Naive | Average Betting Odds¶

F1¶

Accuracy¶

Log_Loss¶

Logistic | Team Rating¶

F1¶

Accuracy¶

Log_Loss¶

Logistic | Team Rating + Home¶

F1¶

Accuracy¶

Log_Loss¶

Logistic | Full Ratios¶

F1¶

Accuracy¶

Log_Loss¶

Baseline |Xgboost¶

F1¶

Accuracy¶

Log_Loss¶

Comparing Model Performance¶

Additional Details:¶

Probability of game ending in a draw by league:¶

Measuring Accuracy Probability Prediction¶

Beating the Betting Companies¶

Limitations and Potential Improvements:¶

Model Training and Validation¶