-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathscript.py
96 lines (83 loc) · 2.64 KB
/
script.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
import argparse
from math import sqrt
import numpy as np
from pandas import Series
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error as mse
from statsmodels.tsa.arima_model import ARIMA as arima
from statsmodels.tsa.stattools import adfuller, kpss
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
import warnings
warnings.filterwarnings("ignore")
parser = argparse.ArgumentParser()
parser.add_argument('--filepath', required=True, help='input dataset')
parser.add_argument('--lag_order', required=False, default=1, help='Number of lag observations(AR)')
parser.add_argument('--diff_degree', required=False, default=1, help='Degree of differencing(I)')
parser.add_argument('--ma_order', required=False, default=1, help='Moving average window size(MA)')
params = parser.parse_args()
lag_order = int(params.lag_order)
diff_degree = int(params.diff_degree)
ma_order = int(params.ma_order)
savepath = 'forecast_results/MP/'
# load dataframe
df = pd.read_csv(params.filepath)
df = df['Wind Speed']
# print(df.head())
x = df.to_numpy().tolist()
# ACF and PACF plots
# plt.figure()
# plt.subplots_adjust(hspace=0.5, wspace=0.5)
# plt.subplot(211)
# plt.xlabel("Lags")
# plot_acf(df, ax=plt.gca(), lags=70)
# plt.subplot(212)
# plt.xlabel("Lags")
# plot_pacf(df, ax=plt.gca(), lags=70)
# plt.savefig(savepath+'w_stationary_acf_pacf.png')
# plt.close()
# AD Fuller Test
# print(30*"-")
# print("AD Fuller Test")
# result = adfuller(x)
# print(f"ADF Statistic: {result[0]}")
# print(f"p-value: {result[1]}")
# print("Critical Values:")
# for k, v in result[4].items():
# print(f"\t{k}: {v}")
# print(30*"-")
# KPSS Test
# print(30*"-")
# print("KPSS Test")
# result = kpss(x)
# print(f"KPSS Statistic: {result[0]}")
# print(f"p-value: {result[1]}")
# print("Critical Values:")
# for k, v in result[3].items():
# print(f"\t{k}: {v}")
# print(30*"-")
# separate out validation set
train_size = int(len(x)*0.8) # 80-20 train-val split
train, val = x[:train_size], x[train_size:]
print(len(train), len(val))
history = train
preds = list()
# forecasting
for i in range(len(val)):
model = arima(history, order=(lag_order, diff_degree, ma_order))
model_fit = model.fit(disp=False)
yhat = model_fit.forecast()[0]
preds.append(yhat)
history.append(val[i])
# print(f'Predicted = {yhat}; Expected = {val[i]}')
print(model_fit.summary())
rmse = sqrt(mse(val, preds))
print(f"RMSE: {rmse}")
plt.figure()
plt.plot(val, color='blue', label='actual')
plt.plot(preds, color='red', label='prediction')
plt.legend(loc="upper left")
plt.xlabel('Weeks')
plt.ylabel('Wind Speed(m/s)')
plt.savefig(savepath+'w_arima.png')
plt.close()