NeuralForecast 多变量的处理包括训练和推理

flyfish
在这里插入图片描述

在这里插入图片描述

两个excel表格合并后的结果

      unique_id                  ds         y      ex_1      ex_2      ex_3      ex_4
0           HUFL 2016-07-01 00:00:00 -0.041413 -0.500000  0.166667 -0.500000 -0.001370
1           HUFL 2016-07-01 00:15:00 -0.185467 -0.500000  0.166667 -0.500000 -0.001370
2           HUFL 2016-07-01 00:30:00 -0.257495 -0.500000  0.166667 -0.500000 -0.001370
3           HUFL 2016-07-01 00:45:00 -0.577510 -0.500000  0.166667 -0.500000 -0.001370
4           HUFL 2016-07-01 01:00:00 -0.385501 -0.456522  0.166667 -0.500000 -0.001370
...          ...                 ...       ...       ...       ...       ...       ...
403195        OT 2018-02-20 22:45:00 -1.581325  0.456522 -0.333333  0.133333 -0.363014
403196        OT 2018-02-20 23:00:00 -1.581325  0.500000 -0.333333  0.133333 -0.363014
403197        OT 2018-02-20 23:15:00 -1.581325  0.500000 -0.333333  0.133333 -0.363014
403198        OT 2018-02-20 23:30:00 -1.562328  0.500000 -0.333333  0.133333 -0.363014
403199        OT 2018-02-20 23:45:00 -1.562328  0.500000 -0.333333  0.133333 -0.363014

import pandas as pd

from datasetsforecast.long_horizon import LongHorizon
# Change this to your own data to try the model
Y_df, X_df, _ = LongHorizon.load(directory='./', group='ETTm2')
Y_df['ds'] = pd.to_datetime(Y_df['ds'])

# X_df contains the exogenous features, which we add to Y_df
X_df['ds'] = pd.to_datetime(X_df['ds'])
Y_df = Y_df.merge(X_df, on=['unique_id', 'ds'], how='left')

print(Y_df.head)
#exit()

# We make validation and test splits
n_time = len(Y_df.ds.unique())
val_size = int(.2 * n_time)
test_size = int(.2 * n_time)

@dataclass
class LongHorizon:
    """
    This Long-Horizon datasets wrapper class, provides
    with utility to download and wrangle the following datasets:    
    ETT, ECL, Exchange, Traffic, ILI and Weather.
    
    - Each set is normalized with the train data mean and standard deviation.
    - Datasets are partitioned into train, validation and test splits.
    - For all datasets: 70%, 10%, and 20% of observations are train, validation, test, 
      except ETT that uses 20% validation.  
    """
    
    source_url: str = 'https://nhits-experiments.s3.amazonaws.com/datasets.zip'

    @staticmethod
    def load(directory: str,
             group: str,
             cache: bool = True) -> Tuple[pd.DataFrame, 
                                          Optional[pd.DataFrame], 
                                          Optional[pd.DataFrame]]:
        """
        
        Downloads and long-horizon forecasting benchmark datasets.

            Parameters
            ----------
            directory: str
                Directory where data will be downloaded.
            group: str
                Group name.
                Allowed groups: 'ETTh1', 'ETTh2', 
                                'ETTm1', 'ETTm2',
                                'ECL', 'Exchange',
                                'Traffic', 'Weather', 'ILI'.
            cache: bool
                If `True` saves and loads 

            Returns
            ------- 
            y_df: pd.DataFrame
                Target time series with columns ['unique_id', 'ds', 'y'].
            X_df: pd.DataFrame
                Exogenous time series with columns ['unique_id', 'ds', 'y']. 
            S_df: pd.DataFrame
                Static exogenous variables with columns ['unique_id', 'ds']. 
                and static variables. 
        """
        if group not in LongHorizonInfo.groups:
            raise Exception(f'group not found {group}')
            
        path = f'{directory}/longhorizon/datasets'
        file_cache = f'{path}/{group}.p'
        
        if os.path.exists(file_cache) and cache:
            df, X_df, S_df = pd.read_pickle(file_cache)
            
            return df, X_df, S_df
        
        LongHorizon.download(directory)
        path = f'{directory}/longhorizon/datasets'
        
        kind = 'M' if group not in ['ETTh1', 'ETTh2'] else 'S'
        name = LongHorizonInfo[group].name
        y_df = pd.read_csv(f'{path}/{name}/{kind}/df_y.csv')
        y_df = y_df.sort_values(['unique_id', 'ds'], ignore_index=True)
        y_df = y_df[['unique_id', 'ds', 'y']]
        X_df = pd.read_csv(f'{path}/{name}/{kind}/df_x.csv')
        X_df = y_df.drop('y', axis=1).merge(X_df, how='left', on=['ds'])
       
        S_df = None
        if cache:
            pd.to_pickle((y_df, X_df, S_df), file_cache)
            
        return y_df, X_df, S_df

    @staticmethod
    def download(directory: str) -> None:
        """
        Download ETT Dataset.
        
        Parameters
        ----------
        directory: str
            Directory path to download dataset.
        """
        path = f'{directory}/longhorizon/datasets/'
        if not os.path.exists(path):
             download_file(path, LongHorizon.source_url, decompress=True)

完整的训练保存模型文件

import pandas as pd

from datasetsforecast.long_horizon import LongHorizon
# Change this to your own data to try the model
Y_df, X_df, _ = LongHorizon.load(directory='./', group='ETTm2')
Y_df['ds'] = pd.to_datetime(Y_df['ds'])

# X_df contains the exogenous features, which we add to Y_df
X_df['ds'] = pd.to_datetime(X_df['ds'])
Y_df = Y_df.merge(X_df, on=['unique_id', 'ds'], how='left')

print(Y_df.head)
#exit()

# We make validation and test splits
n_time = len(Y_df.ds.unique())
val_size = int(.2 * n_time)
test_size = int(.2 * n_time)

from neuralforecast.core import NeuralForecast
from neuralforecast.models import TSMixer, TSMixerx, NHITS, MLPMultivariate,VanillaTransformer
from neuralforecast.losses.pytorch import MSE, MAE
horizon = 12
input_size = 24
models = [
          VanillaTransformer(h=horizon,
                input_size=input_size,
                max_steps=1,
                val_check_steps=1,
                early_stop_patience_steps=1,
                scaler_type='identity',
                valid_loss=MAE(),
                random_seed=12345678,
                ),  
                                                                      
         ]
nf = NeuralForecast(
    models=models,
    freq='15min')

Y_hat_df = nf.cross_validation(df=Y_df,
                               val_size=val_size,
                               test_size=test_size,
                               n_windows=None
                               )                                 
Y_hat_df = Y_hat_df.reset_index()
nf.save(path='./checkpoints/test_run/',
        model_index=None, 
        overwrite=True,
        save_dataset=True)

完整的推理代码

import pandas as pd
from neuralforecast.core import NeuralForecast
from neuralforecast.models import VanillaTransformer
from neuralforecast.losses.pytorch import MAE

# 示例数据
data = {
    'unique_id': ['HUFL'] * 5,
    'ds': [
        '2016-07-01 00:00:00', '2016-07-01 00:15:00', '2016-07-01 00:30:00', '2016-07-01 00:45:00', '2016-07-01 01:00:00'
    ],
    'y': [-0.041413, -0.185467, -0.257495, -0.577510, -0.385501],
    'ex_1': [-0.5, -0.5, -0.5, -0.5, -0.456522],
    'ex_2': [0.166667, 0.166667, 0.166667, 0.166667, 0.166667],
    'ex_3': [-0.5, -0.5, -0.5, -0.5, -0.5],
    'ex_4': [-0.001370, -0.001370, -0.001370, -0.001370, -0.001370]
}

# 创建 DataFrame
df = pd.DataFrame(data)
df['ds'] = pd.to_datetime(df['ds'])

# 使用 NeuralForecast 库进行预测
horizon = 12
input_size = 24

models = [
    VanillaTransformer(h=horizon,
                       input_size=input_size,
                       max_steps=1,
                       val_check_steps=1,
                       early_stop_patience_steps=1,
                       scaler_type='identity',
                       valid_loss=MAE(),
                       random_seed=12345678)
]



# 加载已训练的模型
nf = NeuralForecast.load(path='./checkpoints/test_run/')
# 数据准备
Y_df = df[['unique_id', 'ds', 'y']]
X_df = df[['unique_id', 'ds', 'ex_1', 'ex_2', 'ex_3', 'ex_4']]

# 合并数据集
Y_df = Y_df.merge(X_df, on=['unique_id', 'ds'], how='left')

# 进行预测
predictions = nf.predict(Y_df)

# 打印预测结果
print(predictions)