Computer Science‎ > ‎

Stats, ML, Data: Time Series Forecasting (ARIMA, Fourier, Regression methods)

Suppose you are provided with the daily number of users visiting a website. 
Such data typically follows a seasonal waveform: month-month trends, day-day trends (Sunday might be typically lower than Wednesday and so on). 


Here's a python based approach, using Ordinary Least Squares from the StatsModels.api

- traffic is the array which holds the given data 
- the program attempts to forecast users for the next 30 days (after the point where the given data truncates)
import sys
import pandas as pd
import numpy as np
import statsmodels.api as sm


def sampling(data,prev):
    n = len(data)
    i = np.random.randint(prev, n - 1)
    x = np.arange(i - prev, i)
    return [data[t] for t in x], data[i]

n = int(input())
traffic = []
for i in range(n):
    traffic.append(int(input()))
offset = 7
last = 130
X = []
for t in range(n - last, n):
    z = [t]
    z.extend([1 if w == t % offset else 0 for w in range(offset)])
    X.append(z)
Y = traffic[-last:]
res = sm.OLS(Y, X).fit()


X = []
for t in range(30):
    z = [n + t]
    z.extend([1 if w == (n + t) % offset else 0 for w in range(offset)])
    X.append(z)
ans = res.predict(X)

for x in ans: 
print(x) 


R model for time series prediction (period = 7 corresponds to weekly cycle, 30 corresponds to number of data points we'd like to predict)

input = strsplit(readLines('stdin', warn=FALSE), " ") input = as.numeric(unlist(input[-1])) p = round(as.vector(predict(arima(input, c(2, 1, 0), seasonal=list(order=c(1,1,0),period = 7)), 30)$pred)) for (i in 1:30) { cat(p[i]) cat("\n") }

R model for time series prediction based on ARIMA
input = strsplit(readLines('stdin', warn=FALSE), " ")
input = as.numeric(unlist(input[-1]))
p = round(as.vector(predict(arima(input, c(2, 1, 0), seasonal=list(order=c(1,1,0),period = 7)), 30)$pred))

for (i in 1:30) {
    cat(p[i])
    cat("\n")
}


Python code for Time Series forecasting using EWMA from Pandas (Exponentially Weighted Moving Average)

import scipy.stats as stats
import numpy as np
import pandas as pd
from sklearn.preprocessing import PolynomialFeatures
from sklearn import linear_model

N = int(input().strip())
x = []
for i in range(0, N):
    x.append(int(input().strip()))
    
x = np.append(x, [np.array(x[-10:]).mean()]*30)

ewma = pd.ewma(x, halflife=11)


for r in ewma[-30:]:
    print("{0:0.0f}".format(r))

Python code for Time Series forecasting using FFT and Fourier Extrapolation (using FFT from Numpy)

from numpy import fft def fourierExtrapolation(x, n_predict): n = x.size n_harm = 5 # number of harmonics in model t = np.arange(0, n) p = np.polyfit(t, x, 1) # find linear trend in x x_notrend = x - p[0] * t # detrended x x_freqdom = fft.fft(x_notrend) # detrended x in frequency domain f = fft.fftfreq(n) # frequencies indexes = list(range(n)) # sort indexes by frequency, lower -> higher indexes.sort(key = lambda i: np.absolute(f[i])) t = np.arange(0, n + n_predict) restored_sig = np.zeros(t.size) for i in indexes[:1 + n_harm * 2]: ampli = np.absolute(x_freqdom[i]) / n # amplitude phase = np.angle(x_freqdom[i]) # phase restored_sig += ampli * np.cos(2 * np.pi * f[i] * t + phase) return restored_sig + p[0] * t x = [] N = int(input()) for _ in range(N): x.append(int(input())) x = np.array(x) data = [] for day in range(7): pred = fourierExtrapolation(x[day::7],5) added = list(x[day::7])+list(pred[-7::]) #print(day,added) data.append(added) series = [] for i in range(N+30): q,day = divmod(i,7) series.append(data[day][q]) for i in range(N,N+30): print(int(series[i])) #plt.plot(range(N+30),series,'g') #plt.show() ''' y_true = [] with open('16-time-ans.txt') as fo: for line in fo: y_true.append(int(line)) y_true = np.array(y_true) n_predict = 5 '''