# ── SECTION 1: SETUP ─────────────────────────────────────────────────────────
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import seaborn as sns
from datetime import datetime
from scipy import stats
from scipy.stats import ks_2samp

# Machine Learning
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import (
    matthews_corrcoef, log_loss,
    precision_recall_curve, auc,
    confusion_matrix, classification_report
)
from xgboost import XGBClassifier
from imblearn.over_sampling import SMOTE

# Fairness
from fairlearn.metrics import (
    demographic_parity_difference,
    equalized_odds_difference
)

# Explainability
import shap

# ── REPRODUCIBILITY ───────────────────────────────────────────────────────────
# A fixed seed means anyone running this notebook gets identical results.
# Same seed, same data, same outputs. No variance between runs.
RANDOM_STATE = 42
np.random.seed(RANDOM_STATE)

# ── PASS/FAIL THRESHOLDS ──────────────────────────────────────────────────────
# Set before any results are seen.
# [ClearBoxAI Standard] = internal rule
# [BoG CISD 2026]       = Bank of Ghana binding requirement

THRESHOLDS = {
    'SPD':      0.10,   # Fairness [ClearBoxAI Standard]
    'EOD':      0.10,   # Fairness [ClearBoxAI Standard]
    'MCC':      0.50,   # Performance [ClearBoxAI Standard]
    'LOG_LOSS': 0.40,   # Performance [ClearBoxAI Standard]
    'PR_AUC':   0.70,   # Performance [ClearBoxAI Standard]
    'KS':       0.30,   # Performance [Ghana banking industry standard]
}

print('=' * 58)
print('  ClearBoxAI Fraud Detection Audit Thresholds')
print(f'  Audit ID : CBA-2026-002')
print(f'  Run Date : {datetime.now().strftime("%Y-%m-%d %H:%M")}')
print('=' * 58)
print('\nPass/Fail Thresholds (defined before any results are seen):')
for k, v in THRESHOLDS.items():
    print(f'  {k:<12} threshold = {v}')
print('\nSetup complete. Ready to begin audit.')

==========================================================
  ClearBoxAI Fraud Detection Audit Thresholds
  Audit ID : CBA-2026-002
  Run Date : 2026-04-20 19:09
==========================================================

Pass/Fail Thresholds (defined before any results are seen):
  SPD          threshold = 0.1
  EOD          threshold = 0.1
  MCC          threshold = 0.5
  LOG_LOSS     threshold = 0.4
  PR_AUC       threshold = 0.7
  KS           threshold = 0.3

Setup complete. Ready to begin audit.

SETUP, THRESHOLDS AND ENVIRONMENT¶