Spaces:

sebastiansarasti
/

TimesFM_UPalermo

Sleeping

App Files Files Community

sebastiansarasti commited on Jun 12

Commit

0b7a7fc

1 Parent(s): b44004b

First commit

Browse files

Files changed (7) hide show

.gitignore +3 -1
notebooks/01_api_bitcoin.ipynb +66 -0
notebooks/02_model_inference.ipynb +346 -0
requirements.txt +7 -0
src/app.py +71 -0
src/model.py +25 -0
src/utils.py +63 -0

.gitignore CHANGED Viewed

@@ -191,4 +191,6 @@ cython_debug/
 #  exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
 #  refer to https://docs.cursor.com/context/ignore-files
 .cursorignore
-.cursorindexingignore

 #  exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
 #  refer to https://docs.cursor.com/context/ignore-files
 .cursorignore
+.cursorindexingignore
+/data

notebooks/01_api_bitcoin.ipynb ADDED Viewed

	@@ -0,0 +1,66 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np \n",
+    "import pandas as pd\n",
+    "import yfinance as yf"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def get_bitcoin_history_yf(start_date, end_date):\n",
+    "    btc = yf.Ticker(\"BTC-USD\")\n",
+    "    hist = btc.history(start=start_date, end=end_date)\n",
+    "    return hist.reset_index()[[\"Date\", \"Close\"]].rename(columns={\"Date\": \"date\", \"Close\": \"price\"})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = get_bitcoin_history_yf(start_date=\"2024-02-01\", end_date=\"2025-01-01\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# df.to_csv(\"/Users/sebastianalejandrosarastizambonino/Documents/conferences/time_series_u_palermo/data/bitcoin_history_yf.csv\", index=False)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "tsfm",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

notebooks/02_model_inference.ipynb ADDED Viewed

	@@ -0,0 +1,346 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import timesfm\n",
+    "import pandas as pd"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 51,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Fetching 5 files: 100%|██████████| 5/5 [00:00<00:00, 48545.19it/s]\n"
+     ]
+    }
+   ],
+   "source": [
+    "tfm = timesfm.TimesFm(\n",
+    "      hparams=timesfm.TimesFmHparams(\n",
+    "          backend=\"gpu\",\n",
+    "          per_core_batch_size=32,\n",
+    "          horizon_len=10,\n",
+    "          num_layers=50,\n",
+    "          use_positional_embedding=False,\n",
+    "          context_len=2048,\n",
+    "      ),\n",
+    "      checkpoint=timesfm.TimesFmCheckpoint(\n",
+    "          huggingface_repo_id=\"google/timesfm-2.0-500m-pytorch\"),\n",
+    "  )"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Create a function to process a dataframe"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 52,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def process_dataframe(df):\n",
+    "    df = df.rename(\n",
+    "        columns={\n",
+    "            \"date\": \"ds\",\n",
+    "            \"price\": \"y\"\n",
+    "        }\n",
+    "    )\n",
+    "    df['ds'] = pd.to_datetime(df['ds'])\n",
+    "    df['unique_id'] = \"bitcoin\"\n",
+    "    return df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 53,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = pd.read_csv(\n",
+    "    \"/Users/sebastianalejandrosarastizambonino/Documents/conferences/time_series_u_palermo/data/bitcoin_history_yf.csv\",\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 54,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_final = process_dataframe(df)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Create a function to make inference over the model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 55,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Help on method forecast_on_df in module timesfm.timesfm_base:\n",
+      "\n",
+      "forecast_on_df(inputs: pandas.core.frame.DataFrame, freq: str, forecast_context_len: int = 0, value_name: str = 'values', model_name: str = 'timesfm', window_size: int | None = None, num_jobs: int = 1, normalize: bool = False, verbose: bool = True) -> pandas.core.frame.DataFrame method of timesfm.timesfm_torch.TimesFmTorch instance\n",
+      "    Forecasts on a list of time series.\n",
+      "    \n",
+      "    Args:\n",
+      "      inputs: A pd.DataFrame of all time series. The dataframe should have a\n",
+      "        `unique_id` column for identifying the time series, a `ds` column for\n",
+      "        timestamps and a value column for the time series values.\n",
+      "      freq: string valued `freq` of data. Notice this is different from the\n",
+      "        `freq` required by `forecast`. See `freq_map` for allowed values.\n",
+      "      forecast_context_len: If provided none zero, we take the last\n",
+      "        `forecast_context_len` time-points from each series as the forecast\n",
+      "        context instead of the `context_len` set by the model.\n",
+      "      value_name: The name of the value column.\n",
+      "      model_name: name of the model to be written into future df.\n",
+      "      window_size: window size of trend + residual decomposition. If None then\n",
+      "        we do not do decomposition.\n",
+      "      num_jobs: number of parallel processes to use for dataframe processing.\n",
+      "      normalize: normalize context before forecasting or not.\n",
+      "      verbose: output model states in terminal.\n",
+      "    \n",
+      "    Returns:\n",
+      "      Future forecasts dataframe.\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "help(tfm.forecast_on_df)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 56,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def predict_timesfm(df, model):\n",
+    "    forecast_df = model.forecast_on_df(\n",
+    "        inputs=df,\n",
+    "        forecast_context_len=10,\n",
+    "        freq=\"D\",  # monthly\n",
+    "        value_name=\"y\",\n",
+    "        num_jobs=-1,\n",
+    "    )\n",
+    "    forecast_df = forecast_df[['ds', 'unique_id', 'timesfm']]\n",
+    "    forecast_df = forecast_df.rename(\n",
+    "        columns={\n",
+    "            \"timesfm\": \"yhat\"\n",
+    "        }\n",
+    "    )\n",
+    "    return forecast_df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 57,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Index(['ds', 'y', 'unique_id'], dtype='object')"
+      ]
+     },
+     "execution_count": 57,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df_final.columns"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 58,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Processing dataframe with multiple processes.\n",
+      " See https://github.com/google-research/timesfm/blob/master/README.md for updated APIs.\n",
+      "Loaded PyTorch TimesFM, likely because python version is 3.11.13 (main, Jun  5 2025, 08:21:08) [Clang 14.0.6 ].\n",
+      "Finished preprocessing dataframe.\n",
+      "Finished forecasting.\n"
+     ]
+    }
+   ],
+   "source": [
+    "forecast = predict_timesfm(df_final)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 59,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>ds</th>\n",
+       "      <th>unique_id</th>\n",
+       "      <th>yhat</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>2025-01-01 00:00:00+00:00</td>\n",
+       "      <td>bitcoin</td>\n",
+       "      <td>93821.898438</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2025-01-02 00:00:00+00:00</td>\n",
+       "      <td>bitcoin</td>\n",
+       "      <td>93758.367188</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>2025-01-03 00:00:00+00:00</td>\n",
+       "      <td>bitcoin</td>\n",
+       "      <td>93707.375000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>2025-01-04 00:00:00+00:00</td>\n",
+       "      <td>bitcoin</td>\n",
+       "      <td>93779.257812</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>2025-01-05 00:00:00+00:00</td>\n",
+       "      <td>bitcoin</td>\n",
+       "      <td>93857.195312</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>2025-01-06 00:00:00+00:00</td>\n",
+       "      <td>bitcoin</td>\n",
+       "      <td>93959.531250</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>2025-01-07 00:00:00+00:00</td>\n",
+       "      <td>bitcoin</td>\n",
+       "      <td>94230.304688</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>2025-01-08 00:00:00+00:00</td>\n",
+       "      <td>bitcoin</td>\n",
+       "      <td>94447.601562</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>2025-01-09 00:00:00+00:00</td>\n",
+       "      <td>bitcoin</td>\n",
+       "      <td>94440.648438</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>2025-01-10 00:00:00+00:00</td>\n",
+       "      <td>bitcoin</td>\n",
+       "      <td>94379.914062</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                         ds unique_id          yhat\n",
+       "0 2025-01-01 00:00:00+00:00   bitcoin  93821.898438\n",
+       "1 2025-01-02 00:00:00+00:00   bitcoin  93758.367188\n",
+       "2 2025-01-03 00:00:00+00:00   bitcoin  93707.375000\n",
+       "3 2025-01-04 00:00:00+00:00   bitcoin  93779.257812\n",
+       "4 2025-01-05 00:00:00+00:00   bitcoin  93857.195312\n",
+       "5 2025-01-06 00:00:00+00:00   bitcoin  93959.531250\n",
+       "6 2025-01-07 00:00:00+00:00   bitcoin  94230.304688\n",
+       "7 2025-01-08 00:00:00+00:00   bitcoin  94447.601562\n",
+       "8 2025-01-09 00:00:00+00:00   bitcoin  94440.648438\n",
+       "9 2025-01-10 00:00:00+00:00   bitcoin  94379.914062"
+      ]
+     },
+     "execution_count": 59,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "forecast"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "tsfm",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+numpy==2.3.0
+pandas==2.3.0
+yfinance==0.2.62
+timesfm==1.2.9
+jax==0.6.1
+torch==2.7.1
+plotly==6.1.2

src/app.py ADDED Viewed

	@@ -0,0 +1,71 @@

+import streamlit as st
+import pandas as pd
+import plotly.express as px
+from model import get_model
+from utils import get_bitcoin_history_yf, process_dataframe, predict_timesfm
+st.title("Foundational Models para Series de Tiempo", anchor=None, help=None)
+st.header("Creado por: Sebastian Sarasti", divider="gray")
+st.markdown("More about me: [LinkedIn](https://www.linkedin.com/in/sebastiansarasti/)")
+st.markdown("**Conferencia:** Universidad de Palermo")
+st.markdown("Esta aplicación permite explorar modelos de series de tiempo utilizando modelos fundacionales basados en Transformers. El modelo fundacional seleccionado es TimesFM, fue desarrollado por Google. ")
+st.markdown("**¿Cómo funciona?**")
+st.markdown("1. **Selecciona las fechas**: Elige el rango de fechas para el cual deseas predecir los precios de Bitcoin.")
+st.markdown("2. **Selecciona la ventana de forecast**: Permite configurar el modelo para predecir el horizonte de tiempo deseado.")
+st.markdown("3. **Ejecuta el modelo**: Haz clic en el botón para ejecutar el modelo y obtener las predicciones.")
+# create two columns for start date and end date
+col1, col2 = st.columns(2)
+with col1:
+    start_date = st.date_input("Fecha de Inicio", value="2025-01-31")
+with col2:
+    end_date = st.date_input("Fecha de Fin", value="2025-06-10")
+# create a slider for forecast horizon
+forecast_horizon = st.slider(
+    "Ventana de Forecast",
+    min_value=1,
+    max_value=365,
+    value=st.session_state.get("forecast_horizon", 30),
+    help="Selecciona el horizonte de tiempo para las predicciones (en días)."
+)
+# create a button to run the model
+value = st.button("Ejecutar Modelo")
+# ... después del botón "Ejecutar Modelo"
+if value:
+    assert start_date < end_date, "La fecha de inicio debe ser anterior a la fecha de fin."
+    assert forecast_horizon > 0, "La ventana de forecast debe ser mayor a 0."
+    with st.spinner("Descargando datos ..."):
+        df = get_bitcoin_history_yf(start_date, end_date)
+        df = process_dataframe(df)
+        st.session_state["df"] = df
+    with st.spinner("Ejecutando modelo ..."):
+        model = get_model(forecast_horizon)
+        forecast_df = predict_timesfm(df=df, model=model)
+        forecast_df["type"] = "Forecast"
+        st.session_state["forecast_df"] = forecast_df
+# nuevo botón separado
+if "forecast_df" in st.session_state and st.button("Graficar Predicciones"):
+    df = st.session_state["df"]
+    forecast_df = st.session_state["forecast_df"]
+    df["type"] = "Historia"
+    df_final = pd.concat([df, forecast_df], ignore_index=True)
+    fig = px.line(
+        df_final,
+        x="ds",
+        y="y",
+        color="type",
+        title="Predicciones de Bitcoin con TimesFM",
+        labels={"ds": "Fecha", "y": "Precio (USD)", "type": "Tipo"},
+    )
+    st.plotly_chart(fig)

src/model.py ADDED Viewed

	@@ -0,0 +1,25 @@

+import timesfm
+def get_model(forecast_horizon):
+    """
+    This function initializes and returns a TimesFM model for forecasting.
+    Args:
+        forecast_horizon (int): The number of time steps to forecast.
+    Returns:
+        TimesFM: An instance of the TimesFM model configured for the specified forecast horizon.
+    """
+    model = timesfm.TimesFm(
+        hparams=timesfm.TimesFmHparams(
+            backend="cpu",
+            per_core_batch_size=32,
+            horizon_len=forecast_horizon,
+            num_layers=50,
+            use_positional_embedding=False,
+            context_len=2048,
+        ),
+        checkpoint=timesfm.TimesFmCheckpoint(
+            huggingface_repo_id="google/timesfm-2.0-500m-pytorch"),
+    )
+    return model

src/utils.py ADDED Viewed

	@@ -0,0 +1,63 @@

+import yfinance as yf
+import pandas as pd
+def get_bitcoin_history_yf(start_date, end_date):
+    """
+    This function fetches the historical price data for Bitcoin (BTC) using the yfinance library.
+    Args:
+        start_date (str): The start date for fetching historical data in 'YYYY-MM-DD' format.
+        end_date (str): The end date for fetching historical data in 'YYYY-MM-DD'
+    Returns:
+        pandas.DataFrame: A DataFrame containing the date and closing price of Bitcoin.
+    """
+    btc = yf.Ticker("BTC-USD")
+    hist = btc.history(start=start_date, end=end_date)
+    return hist.reset_index()[["Date", "Close"]].rename(columns={"Date": "date", "Close": "price"})
+def process_dataframe(df):
+    """
+    This function processes the DataFrame to prepare it for forecasting.
+    Args:
+        df (pandas.DataFrame): The input DataFrame containing the historical data.
+    Returns:
+        pandas.DataFrame: A processed DataFrame with columns 'ds', 'y', and 'unique_id'.
+    """
+    df = df.rename(
+        columns={
+            "date": "ds",
+            "price": "y"
+        }
+    )
+    df['ds'] = pd.to_datetime(df['ds'])
+    df['unique_id'] = "bitcoin"
+    return df
+def predict_timesfm(df, model):
+    """
+    Makes predictions using a trained TimesFM model on the provided DataFrame.
+    Args:
+        df (pandas.DataFrame): The input DataFrame containing the data to be forecasted.
+        model (TimesFM): A trained TimesFM model for forecasting.
+    Returns:
+        pandas.DataFrame: A DataFrame containing the forecasted values with columns 'ds', 'unique_id', and 'yhat'.
+    """
+    forecast_df = model.forecast_on_df(
+        inputs=df,
+        freq="D",  # monthly
+        value_name="y",
+        num_jobs=2,
+    )
+    forecast_df = forecast_df[['ds', 'unique_id', 'timesfm']]
+    forecast_df = forecast_df.rename(
+        columns={
+            "timesfm": "y"
+        }
+    )
+    return forecast_df