NeuralForecast 多变量的处理 包括训练和推理
flyfish
两个excel表格合并后的结果
unique_id ds y ex_1 ex_2 ex_3 ex_4
0 HUFL 2016-07-01 00:00:00 -0.041413 -0.500000 0.166667 -0.500000 -0.001370
1 HUFL 2016-07-01 00:15:00 -0.185467 -0.500000 0.166667 -0.500000 -0.001370
2 HUFL 2016-07-01 00:30:00 -0.257495 -0.500000 0.166667 -0.500000 -0.001370
3 HUFL 2016-07-01 00:45:00 -0.577510 -0.500000 0.166667 -0.500000 -0.001370
4 HUFL 2016-07-01 01:00:00 -0.385501 -0.456522 0.166667 -0.500000 -0.001370
... ... ... ... ... ... ... ...
403195 OT 2018-02-20 22:45:00 -1.581325 0.456522 -0.333333 0.133333 -0.363014
403196 OT 2018-02-20 23:00:00 -1.581325 0.500000 -0.333333 0.133333 -0.363014
403197 OT 2018-02-20 23:15:00 -1.581325 0.500000 -0.333333 0.133333 -0.363014
403198 OT 2018-02-20 23:30:00 -1.562328 0.500000 -0.333333 0.133333 -0.363014
403199 OT 2018-02-20 23:45:00 -1.562328 0.500000 -0.333333 0.133333 -0.363014
import pandas as pd
from datasetsforecast.long_horizon import LongHorizon
# Change this to your own data to try the model
Y_df, X_df, _ = LongHorizon.load(directory='./', group='ETTm2')
Y_df['ds'] = pd.to_datetime(Y_df['ds'])
# X_df contains the exogenous features, which we add to Y_df
X_df['ds'] = pd.to_datetime(X_df['ds'])
Y_df = Y_df.merge(X_df, on=['unique_id', 'ds'], how='left')
print(Y_df.head)
#exit()
# We make validation and test splits
n_time = len(Y_df.ds.unique())
val_size = int(.2 * n_time)
test_size = int(.2 * n_time)
@dataclass
class LongHorizon:
"""
This Long-Horizon datasets wrapper class, provides
with utility to download and wrangle the following datasets:
ETT, ECL, Exchange, Traffic, ILI and Weather.
- Each set is normalized with the train data mean and standard deviation.
- Datasets are partitioned into train, validation and test splits.
- For all datasets: 70%, 10%, and 20% of observations are train, validation, test,
except ETT that uses 20% validation.
"""
source_url: str = 'https://nhits-experiments.s3.amazonaws.com/datasets.zip'
@staticmethod
def load(directory: str,
group: str,
cache: bool = True) -> Tuple[pd.DataFrame,
Optional[pd.DataFrame],
Optional[pd.DataFrame]]:
"""
Downloads and long-horizon forecasting benchmark datasets.
Parameters
----------
directory: str
Directory where data will be downloaded.
group: str
Group name.
Allowed groups: 'ETTh1', 'ETTh2',
'ETTm1', 'ETTm2',
'ECL', 'Exchange',
'Traffic', 'Weather', 'ILI'.
cache: bool
If `True` saves and loads
Returns
-------
y_df: pd.DataFrame
Target time series with columns ['unique_id', 'ds', 'y'].
X_df: pd.DataFrame
Exogenous time series with columns ['unique_id', 'ds', 'y'].
S_df: pd.DataFrame
Static exogenous variables with columns ['unique_id', 'ds'].
and static variables.
"""
if group not in LongHorizonInfo.groups:
raise Exception(f'group not found {group}')
path = f'{directory}/longhorizon/datasets'
file_cache = f'{path}/{group}.p'
if os.path.exists(file_cache) and cache:
df, X_df, S_df = pd.read_pickle(file_cache)
return df, X_df, S_df
LongHorizon.download(directory)
path = f'{directory}/longhorizon/datasets'
kind = 'M' if group not in ['ETTh1', 'ETTh2'] else 'S'
name = LongHorizonInfo[group].name
y_df = pd.read_csv(f'{path}/{name}/{kind}/df_y.csv')
y_df = y_df.sort_values(['unique_id', 'ds'], ignore_index=True)
y_df = y_df[['unique_id', 'ds', 'y']]
X_df = pd.read_csv(f'{path}/{name}/{kind}/df_x.csv')
X_df = y_df.drop('y', axis=1).merge(X_df, how='left', on=['ds'])
S_df = None
if cache:
pd.to_pickle((y_df, X_df, S_df), file_cache)
return y_df, X_df, S_df
@staticmethod
def download(directory: str) -> None:
"""
Download ETT Dataset.
Parameters
----------
directory: str
Directory path to download dataset.
"""
path = f'{directory}/longhorizon/datasets/'
if not os.path.exists(path):
download_file(path, LongHorizon.source_url, decompress=True)
完整的训练保存模型文件
import pandas as pd
from datasetsforecast.long_horizon import LongHorizon
# Change this to your own data to try the model
Y_df, X_df, _ = LongHorizon.load(directory='./', group='ETTm2')
Y_df['ds'] = pd.to_datetime(Y_df['ds'])
# X_df contains the exogenous features, which we add to Y_df
X_df['ds'] = pd.to_datetime(X_df['ds'])
Y_df = Y_df.merge(X_df, on=['unique_id', 'ds'], how='left')
print(Y_df.head)
#exit()
# We make validation and test splits
n_time = len(Y_df.ds.unique())
val_size = int(.2 * n_time)
test_size = int(.2 * n_time)
from neuralforecast.core import NeuralForecast
from neuralforecast.models import TSMixer, TSMixerx, NHITS, MLPMultivariate,VanillaTransformer
from neuralforecast.losses.pytorch import MSE, MAE
horizon = 12
input_size = 24
models = [
VanillaTransformer(h=horizon,
input_size=input_size,
max_steps=1,
val_check_steps=1,
early_stop_patience_steps=1,
scaler_type='identity',
valid_loss=MAE(),
random_seed=12345678,
),
]
nf = NeuralForecast(
models=models,
freq='15min')
Y_hat_df = nf.cross_validation(df=Y_df,
val_size=val_size,
test_size=test_size,
n_windows=None
)
Y_hat_df = Y_hat_df.reset_index()
nf.save(path='./checkpoints/test_run/',
model_index=None,
overwrite=True,
save_dataset=True)
完整的推理代码
import pandas as pd
from neuralforecast.core import NeuralForecast
from neuralforecast.models import VanillaTransformer
from neuralforecast.losses.pytorch import MAE
# 示例数据
data = {
'unique_id': ['HUFL'] * 5,
'ds': [
'2016-07-01 00:00:00', '2016-07-01 00:15:00', '2016-07-01 00:30:00', '2016-07-01 00:45:00', '2016-07-01 01:00:00'
],
'y': [-0.041413, -0.185467, -0.257495, -0.577510, -0.385501],
'ex_1': [-0.5, -0.5, -0.5, -0.5, -0.456522],
'ex_2': [0.166667, 0.166667, 0.166667, 0.166667, 0.166667],
'ex_3': [-0.5, -0.5, -0.5, -0.5, -0.5],
'ex_4': [-0.001370, -0.001370, -0.001370, -0.001370, -0.001370]
}
# 创建 DataFrame
df = pd.DataFrame(data)
df['ds'] = pd.to_datetime(df['ds'])
# 使用 NeuralForecast 库进行预测
horizon = 12
input_size = 24
models = [
VanillaTransformer(h=horizon,
input_size=input_size,
max_steps=1,
val_check_steps=1,
early_stop_patience_steps=1,
scaler_type='identity',
valid_loss=MAE(),
random_seed=12345678)
]
# 加载已训练的模型
nf = NeuralForecast.load(path='./checkpoints/test_run/')
# 数据准备
Y_df = df[['unique_id', 'ds', 'y']]
X_df = df[['unique_id', 'ds', 'ex_1', 'ex_2', 'ex_3', 'ex_4']]
# 合并数据集
Y_df = Y_df.merge(X_df, on=['unique_id', 'ds'], how='left')
# 进行预测
predictions = nf.predict(Y_df)
# 打印预测结果
print(predictions)