{
  "cells": [
    {
      "cell_type": "markdown",
      "id": "48e48d7f-053e-4a1b-8f95-f2c083c949cd",
      "metadata": {},
      "source": [
        "# Tutorial 03: TENAX Hindcast evaluation\n",
        "This tutorial guides you through the process of implementing the scaling of TENAX-based changes in mean temperature (𝜇\n",
        "μ) and the standard deviation of temperature (σ) during precipitation events.\n",
        "\n",
        "Although this method can also be used to project changes in extreme sub-hourly precipitation under a future warmer climate, it relies solely on climate model projections of temperatures during wet days and anticipated changes in precipitation frequency.\""
      ]
    },
    {
      "cell_type": "markdown",
      "id": "24266dc5-298b-47a7-8884-6b391dd78400",
      "metadata": {},
      "source": [
        "Import all libraries needed for running TENAX and SMEV"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "id": "9fb356f4-980b-473d-bdc7-1d4b0a3d8d3a",
      "metadata": {},
      "outputs": [],
      "source": [
        "from importlib.resources import files\n",
        "import numpy as np\n",
        "import pandas as pd\n",
        "import matplotlib.pyplot as plt\n",
        "from scipy.stats import chi2\n",
        "# Import pyTENAX\n",
        "from pyTENAX import tenax, plotting"
      ]
    },
    {
      "cell_type": "markdown",
      "id": "d075c82d-500c-46a9-ab7c-bdd65283d44b",
      "metadata": {},
      "source": [
        "Let's initiate TENAX class and SMEV class with given setup."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "id": "2f471a9b-98b3-450e-b326-60ad6117cd08",
      "metadata": {},
      "outputs": [],
      "source": [
        "# Initiate TENAX class with customized setup\n",
        "S = tenax.TENAX(\n",
        "    return_period=[\n",
        "        2,\n",
        "        5,\n",
        "        10,\n",
        "        20,\n",
        "        50,\n",
        "        100,\n",
        "        200,\n",
        "    ],\n",
        "    durations=[10, 60, 180, 360, 720, 1440], #durations are in minutes and they refer to depth of rainfall within given duration\n",
        "    time_resolution=10,  # time resolution in minutes\n",
        "    left_censoring=[0, 0.90], # left censoring threshold \n",
        "    alpha=0.05, #dependence of shape on T depends on statistical significance at the alpha-level.\n",
        "    min_rain = 0.1, #minimum rainfall depth threshold\n",
        ")"
      ]
    },
    {
      "cell_type": "markdown",
      "id": "1641d7d1-9551-4417-b039-c9b1d10a5660",
      "metadata": {},
      "source": [
        "Once again we start with same data."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "id": "b43a02dd-66fc-4007-ba4d-f16f10247962",
      "metadata": {},
      "outputs": [],
      "source": [
        "# Load precipitation data\n",
        "# Create input path file for the test file\n",
        "file_path_input = files('pyTENAX.res').joinpath('prec_data_Aadorf.parquet')\n",
        "# Load data from csv file\n",
        "data = pd.read_parquet(file_path_input)\n",
        "# Convert 'prec_time' column to datetime, if it's not already\n",
        "data[\"prec_time\"] = pd.to_datetime(data[\"prec_time\"])\n",
        "# Set 'prec_time' as the index\n",
        "data.set_index(\"prec_time\", inplace=True)\n",
        "name_col = \"prec_values\"  # name of column containing data to extract\n",
        "\n",
        "# load temperature data\n",
        "file_path_temperature = files('pyTENAX.res').joinpath('temp_data_Aadorf.parquet')\n",
        "t_data = pd.read_parquet(file_path_temperature)\n",
        "# Convert 'temp_time' column to datetime if it's not already in datetime format\n",
        "t_data[\"temp_time\"] = pd.to_datetime(t_data[\"temp_time\"])\n",
        "# Set 'temp_time' as the index\n",
        "t_data.set_index(\"temp_time\", inplace=True)\n",
        "temp_name_col = \"temp_values\""
      ]
    },
    {
      "cell_type": "markdown",
      "id": "d716d3c5-4bff-4465-bd59-a97e9d9c242a",
      "metadata": {},
      "source": [
        "## Repeat the preprocessing to get ordinary events values and corresponding temperature.\n",
        "We once again focus only on 10-minuts rainfall depth."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "id": "f4240b96-48dd-4767-ac21-644fd9e82fe3",
      "metadata": {},
      "outputs": [],
      "source": [
        "data = S.remove_incomplete_years(data, name_col)\n",
        "\n",
        "# get data from pandas to numpy array\n",
        "df_arr = np.array(data[name_col])\n",
        "df_dates = np.array(data.index)\n",
        "df_arr_t_data = np.array(t_data[temp_name_col])\n",
        "df_dates_t_data = np.array(t_data.index)\n",
        "\n",
        "# extract indexes of ordinary events\n",
        "# these are time-wise indexes =>returns list of np arrays with np.timeindex\n",
        "idx_ordinary = S.get_ordinary_events(data=df_arr, \n",
        "                                     dates=df_dates, \n",
        "                                     name_col=name_col,\n",
        "                                     check_gaps=False)\n",
        "\n",
        "# get ordinary events by removing too short events\n",
        "# returns boolean array, dates of OE in TO, FROM format, and count of OE in each years\n",
        "arr_vals, arr_dates, n_ordinary_per_year = S.remove_short(idx_ordinary)\n",
        "\n",
        "# assign ordinary events values by given durations, values are in depth per duration, NOT in intensity mm/h\n",
        "dict_ordinary, dict_AMS = S.get_ordinary_events_values(data=df_arr, \n",
        "                                                       dates=df_dates, \n",
        "                                                       arr_dates_oe=arr_dates)\n",
        "\n",
        "dict_ordinary, _, n_ordinary_per_year = S.associate_vars(dict_ordinary, \n",
        "                                                         df_arr_t_data, \n",
        "                                                         df_dates_t_data)\n",
        "\n",
        "# Your data (P, T arrays) and threshold thr=3.8\n",
        "P = dict_ordinary[\"10\"][\"ordinary\"].to_numpy()  # Replace with your actual data\n",
        "T = dict_ordinary[\"10\"][\"T\"].to_numpy()  # Replace with your actual data\n",
        "blocks_id = dict_ordinary[\"10\"][\"year\"].to_numpy()  # Replace with your actual data\n",
        "# Number of threshold\n",
        "thr = dict_ordinary[\"10\"][\"ordinary\"].quantile(S.left_censoring[1])\n",
        "# Exctract annual maximas\n",
        "AMS = dict_AMS[\"10\"]  # yet the annual maxima\n",
        "\n",
        "# For plotting \n",
        "# we must create range of temperature\n",
        "eT = np.arange(\n",
        "    np.min(T)-4, np.max(T) + 4, 1\n",
        ")  # define T values to calculate distributions. +4 to go beyond graph end\n",
        "\n"
      ]
    },
    {
      "cell_type": "markdown",
      "id": "bdf70a0b-a461-4201-a6df-8378918fd4ac",
      "metadata": {},
      "source": [
        "## Hindcast evaluation\n",
        "We evaluated the ability of the TENAX model to project precipitation return levels under increased temperatures through a hindcast, by splitting the 38-year record of the climate station into two 19-year periods."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "id": "84b1fa4f-abd1-4655-8026-f4625d9e8b11",
      "metadata": {},
      "outputs": [],
      "source": [
        "yrs = dict_ordinary[\"10\"][\"oe_time\"].dt.year\n",
        "yrs_unique = np.unique(yrs)\n",
        "midway = yrs_unique[\n",
        "    int(np.ceil(np.size(yrs_unique) / 2)) - 1\n",
        "]  # -1 to adjust indexing because this returns a sort of length\n",
        "\n",
        "# DEFINE FIRST PERIOD\n",
        "P1 = P[yrs <= midway]\n",
        "T1 = T[yrs <= midway]\n",
        "AMS1 = AMS[AMS[\"year\"] <= midway]\n",
        "n_ordinary_per_year1 = n_ordinary_per_year[n_ordinary_per_year.index <= midway]\n",
        "n1 = n_ordinary_per_year1.sum() / len(n_ordinary_per_year1)\n",
        "\n",
        "# DEFINE SECOND PERIOD\n",
        "P2 = P[yrs > midway]\n",
        "T2 = T[yrs > midway]\n",
        "AMS2 = AMS[AMS[\"year\"] > midway]\n",
        "n_ordinary_per_year2 = n_ordinary_per_year[n_ordinary_per_year.index > midway]\n",
        "n2 = n_ordinary_per_year2.sum() / len(n_ordinary_per_year2)"
      ]
    },
    {
      "cell_type": "markdown",
      "id": "119523a5-74da-49b3-b96b-0b29f66a163f",
      "metadata": {},
      "source": [
        "## Comparing Temperature models in two periods (1981-1999 & 2000-2018)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "id": "c65dd6b8-a3da-4c35-8c87-fcbabcb1765a",
      "metadata": {
        "trusted": true
      },
      "outputs": [],
      "source": [
        "g_phat1 = S.temperature_model(T1) #returns mu and sigma\n",
        "g_phat2 = S.temperature_model(T2) #returns mu and sigma\n",
        "\n",
        "_, _ = plotting.TNX_FIG_temp_model(\n",
        "    T=T1,\n",
        "    g_phat=g_phat1,\n",
        "    beta=4,\n",
        "    eT=eT,\n",
        "    obscol=\"b\",\n",
        "    valcol=\"b\",\n",
        "    obslabel=None,\n",
        "    vallabel=\"Temperature model \" + str(yrs_unique[0]) + \"-\" + str(midway),\n",
        ")\n",
        "_, _ = plotting.TNX_FIG_temp_model(\n",
        "    T=T2,\n",
        "    g_phat=g_phat2,\n",
        "    beta=4,\n",
        "    eT=eT,\n",
        "    obscol=\"r\",\n",
        "    valcol=\"r\",\n",
        "    obslabel=None,\n",
        "    vallabel=\"Temperature model \" + str(midway + 1) + \"-\" + str(yrs_unique[-1]),\n",
        ")  # model based on temp ave and std changes"
      ]
    },
    {
      "cell_type": "markdown",
      "id": "e93bdb4b-2bd4-42bf-9fab-5886f59aa5cc",
      "metadata": {},
      "source": [
        "### Predicted Temperature Model: Based on Changes in μ (Mean) and σ (Standard Deviation)\n",
        "\n",
        "Increases in the mean temperature (μ) and/or the standard deviation (σ) imply a higher probability of precipitation events occurring at higher temperatures.\n",
        "\n",
        "- `mu_delta` represents the change in mean temperature during precipitation events.\n",
        "- `sigma_factor` represents the scaling factor applied to the standard deviation of temperature during precipitation events.\n",
        "\n",
        "The predicted temperature model is computed by:\n",
        "\n",
        "- Adding `mu_delta` to the original mean (μ), and  \n",
        "- Multiplying the original standard deviation (σ) by `sigma_factor`.\n",
        "\n",
        "#### Mathematical Formulation\n",
        "\n",
        "Let:\n",
        "\n",
        "- μ = original mean temperature of first model (1981-1999)\n",
        "- σ = original standard deviation of first model (1981-1999) \n",
        "- μ' = predicted mean temperature  \n",
        "- σ' = predicted standard deviation  \n",
        "- Δμ = `mu_delta`  \n",
        "- σ_factor = `sigma_factor`\n",
        "\n",
        "Then:\n",
        "\n",
        "$$\n",
        "\\mu' = \\mu + \\Delta\\mu\n",
        "$$\n",
        "\n",
        "$$\n",
        "\\sigma' = \\sigma \\times \\sigma_{\\text{factor}}\n",
        "$$"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "id": "b32c925c-5a42-424e-94f9-29a50c9c0494",
      "metadata": {
        "trusted": true
      },
      "outputs": [],
      "source": [
        "mu_delta = np.mean(T2) - np.mean(T1)\n",
        "sigma_factor = np.std(T2) / np.std(T1)\n",
        "print(f\"Mean temperature has changed by {mu_delta}\")\n",
        "print(f\"Standart deviation has changed by factor of {sigma_factor}\")\n",
        "\n",
        "# Create a predicted temperature model\n",
        "g_phat2_predict = [g_phat1[0] + mu_delta, g_phat1[1] * sigma_factor]\n",
        "\n",
        "# Compare with Temperatude model of second period we have created before \n",
        "print(f\"Temperatude model of second period {g_phat2}\")\n",
        "print(f\"Predicted Temperatude model {g_phat2_predict}\")"
      ]
    },
    {
      "cell_type": "markdown",
      "id": "ef46f58c-ec5a-4546-8092-4c80139d464c",
      "metadata": {},
      "source": [
        "## Create TENAX Magnitude models of two periods (1981-1999 & 2000-2018)\n",
        "A magnitude model $W(x; T)$ was fitted independently for each time period. To assess the similarity of the models, a likelihood ratio test was applied.\n",
        "\n",
        "According to the general theory of the likelihood ratio test, under the null hypothesis $H_0$, we compute the likelihood function $L(\\theta)$, where $\\theta \\in \\Theta$ and $\\Theta$ is the parameter space.\n",
        "\n",
        "The test statistic is defined as:\n",
        "\n",
        "$$\n",
        "-2 \\ln \\left( \\frac{\\sup_{\\theta \\in H_0} L(\\theta)}{\\sup_{\\theta \\in \\Theta} L(\\theta)} \\right)\n",
        "$$\n",
        "\n",
        "This statistic follows a chi-squared distribution under certain regularity conditions and can be used to determine whether the models fitted for different periods are significantly different.\n",
        "\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "id": "b1c5a66a-88e1-4121-8bff-3c33a53e5f4e",
      "metadata": {
        "trusted": true
      },
      "outputs": [],
      "source": [
        "# Sampling intervals for the Montecarlo based on original temperature T\n",
        "Ts = np.arange(\n",
        "    np.min(T) - S.temp_delta, np.max(T) + S.temp_delta, S.temp_res_monte_carlo\n",
        ")\n",
        "\n",
        "# Maginitude model of original data that containst both periods (1981-2018)\n",
        "F_phat, loglik, _, _ = S.magnitude_model(P, T, thr)\n",
        "\n",
        "# Maginitude model of first period (1981-1999)\n",
        "F_phat1, loglik1, _, _ = S.magnitude_model(P1, T1, thr)\n",
        "RL1, _, _ = S.model_inversion(F_phat1, g_phat1, n1, Ts)\n",
        "\n",
        "# Maginitude model of second period  (2000-2018)\n",
        "F_phat2, loglik2, _, _ = S.magnitude_model(P2, T2, thr)\n",
        "RL2, _, _ = S.model_inversion(F_phat2, g_phat2, n2, Ts)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "id": "f1478024-0911-498f-b8ee-20128e10e702",
      "metadata": {
        "trusted": true
      },
      "outputs": [],
      "source": [
        "if F_phat[1] == 0:  # check if b parameter is 0 (shape=shape_0*b\n",
        "    dof = 3\n",
        "    alpha1 = 1  # b parameter is not significantly different from 0; 3 degrees of freedom for the LR test\n",
        "else:\n",
        "    dof = 4\n",
        "    alpha1 = 0  # b parameter is significantly different from 0; 4 degrees of freedom for the LR test"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "id": "3015f327-f0f6-4686-a076-aeb7cd62c086",
      "metadata": {
        "trusted": true
      },
      "outputs": [],
      "source": [
        "# check magnitude model the same in both periods\n",
        "lambda_LR = -2 * (loglik - (loglik1 + loglik2))\n",
        "pval = chi2.sf(lambda_LR, dof)\n",
        "if pval > S.alpha:\n",
        "    print(f\"p={pval}. Magnitude models not  different at {S.alpha*100}% significance.\")\n",
        "else:\n",
        "    print(f\"p={pval}. Magnitude models are different at {S.alpha*100}% significance.\")\n"
      ]
    },
    {
      "cell_type": "markdown",
      "id": "782db30b-7b8c-4ea5-92e5-84f4ea3d1887",
      "metadata": {},
      "source": [
        "## Estimating return levels based on projected change in temperature model and by using the magnitude model of first period. \n",
        "\n",
        "We compare these return levels to annual maxima of second period and \n",
        "Note: Here we do not apply scaling of n (the average number of events per year). \n",
        "Although, this can be simply done by calculating ratio difference of events during two periods and multipying n1 by this factor."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "id": "bcc7f1d3-bfba-440b-8548-19065c26ab65",
      "metadata": {
        "trusted": true
      },
      "outputs": [],
      "source": [
        "RL2_predict, _, _ = S.model_inversion(F_phat1, g_phat2_predict, n1, Ts)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "id": "afbc98cf-4c99-4728-8c35-d26aeb064b04",
      "metadata": {
        "trusted": true
      },
      "outputs": [],
      "source": [
        "# Plot the results\n",
        "plotting.TNX_FIG_valid(\n",
        "    AMS1,\n",
        "    S.return_period,\n",
        "    RL1,\n",
        "    TENAXcol=\"b\",\n",
        "    obscol_shape=\"b+\",\n",
        "    TENAXlabel=\"The TENAX model \" + str(yrs_unique[0]) + \"-\" + str(midway),\n",
        "    obslabel=\"Observed annual maxima \" + str(yrs_unique[0]) + \"-\" + str(midway),\n",
        ")\n",
        "plotting.TNX_FIG_valid(\n",
        "    AMS2,\n",
        "    S.return_period,\n",
        "    RL2_predict,\n",
        "    TENAXcol=\"r\",\n",
        "    obscol_shape=\"r+\",\n",
        "    TENAXlabel=\"The predicted TENAX model \"\n",
        "    + str(midway + 1)\n",
        "    + \"-\"\n",
        "    + str(yrs_unique[-1]),\n",
        "    obslabel=\"Observed annual maxima \" + str(midway + 1) + \"-\" + str(yrs_unique[-1]),\n",
        ")\n",
        "plt.xticks(S.return_period)\n",
        "plt.gca().set_xticks(S.return_period)  # This sets the actual tick marks on log scale\n",
        "plt.gca().get_xaxis().set_major_formatter(plt.ScalarFormatter())  # Optional: shows ticks as plain numbers\n",
        "plt.legend(loc=\"upper center\", bbox_to_anchor=(0.5, -0.2))\n",
        "plt.grid(True, which='both', axis='both', linestyle='--', color='lightgray', alpha=0.7)\n",
        "plt.show()"
      ]
    }
  ],
  "metadata": {
    "kernelspec": {
      "display_name": "Python (Pyodide)",
      "language": "python",
      "name": "python"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "python",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.8"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 5
}