Data Info:

The Data was scraped from Yahoo Finance which you can find here.

Goal:

Which model from the two, Long Short Term Memory(LSTM) or Gated Recurrent Unit (GRU) performs best in predicting Microsoft closing price?

# load the data
msft.data <- read.csv(file = "~/Stat510/MSFT.csv", header = TRUE, sep = ",")

# print first 5 observations
head(msft.data,5)
##         Date  Open  High   Low Close Adj.Close   Volume
## 1 2018-04-19 96.44 97.07 95.34 96.11  90.70618 23552500
## 2 2018-04-20 95.91 96.11 94.05 95.00  89.65860 31154400
## 3 2018-04-23 95.74 96.29 94.63 95.35  89.98891 22331800
## 4 2018-04-24 96.24 96.47 92.41 93.12  87.88430 34524800
## 5 2018-04-25 93.30 93.30 90.28 92.31  87.11984 33729300
# plotting the closing price
plot(as.POSIXct(msft.data$Date), 
     msft.data$Close, 
     main = "Daily Microsoft Stock Closing Prices", 
     xlab = "Time", 
     ylab = "Stock Close Price", 
   col = 'orange',type = 'l', lwd = 2)

Preprocessing for Modeling

# splitting data into testing and training sets

# formatting the Date column 
msft.data$Year <- as.numeric(format(as.Date(msft.data$Date, format = "%Y-%m-%d"),"%Y"))

train.data <- msft.data[which(msft.data$Year < 2023), 1:5]
test.data <- msft.data[which(msft.data$Year >= 2023), 1:5]
# glimpse of the test data

glimpse(test.data)
## Rows: 73
## Columns: 5
## $ Date  <chr> "2023-01-03", "2023-01-04", "2023-01-05", "2023-01-06", "2023-01…
## $ Open  <dbl> 243.08, 232.28, 227.20, 223.00, 226.45, 227.76, 231.29, 235.26, …
## $ High  <dbl> 245.75, 232.87, 227.55, 225.76, 231.24, 231.31, 235.95, 239.90, …
## $ Low   <dbl> 237.40, 225.96, 221.76, 219.35, 226.41, 227.33, 231.11, 233.56, …
## $ Close <dbl> 239.58, 229.10, 222.31, 224.93, 227.12, 228.85, 235.77, 238.51, …
# plotting training and testing data
plot(as.POSIXct(msft.data$Date), 
     msft.data$Close, 
     main = "Daily Microsoft Stock Closing Prices", 
     xlab = "Time", 
     ylab = "Stock Close Price", 
     pch = "", 
     panel.first = grid())

lines(as.POSIXct(train.data$Date), train.data$Close, lwd = 2, col = "blue")
lines(as.POSIXct(test.data$Date), test.data$Close, lwd = 2, col = "green")
legend("topleft", c("training", "testing"), lty = 1, col = c("blue","green"))

# scaling prices to fall in [0,1]
price <- msft.data$Close
price.sc <- (price - min(price))/(max(price) - min(price))
# Preparing our train matrix for our models

nsteps <- 73 #width of sliding window
train.matrix <- matrix(nrow = nrow(train.data) - nsteps, ncol = nsteps + 1)
for (i in 1:(nrow(train.data) - nsteps))
  train.matrix[i,] <- price.sc[i:(i + nsteps)]

# creating train.x and train.y 
train.x <- array(train.matrix[,-ncol(train.matrix)],dim = c(nrow(train.matrix),nsteps,1))
train.y <- train.matrix[,ncol(train.matrix)]
#creating test.x and test.y
test.matrix <- matrix(nrow = nrow(test.data), ncol = nsteps + 1)
for (i in 1:nrow(test.data)) 
  test.matrix[i,] <- price.sc[(i + nrow(train.matrix)):(i + nsteps + nrow(train.matrix))]

test.x <- array(test.matrix[,-ncol(test.matrix)],dim = c(nrow(test.matrix),nsteps,1))
test.y <- test.matrix[,ncol(test.matrix)]

Deep Learning Modeling

#################################################
# LSTM MODEL
##################################################
# loading machine learning libraries
library('tensorflow')
library('keras')

# use_condaenv("r-reticulate", required = TRUE)

Long Short Term Memory Model

# setting our LSTM Model
LSTM.model <- keras_model_sequential(set_random_seed(42)) 

# specifying model structure
LSTM.model %>% layer_lstm(input_shape = dim(train.x)[2:3], units = nsteps)
LSTM.model %>% layer_dense(units = 1, activation = "tanh") 
LSTM.model %>% compile(loss = "mean_squared_error")
# training model
epochs <- 5  
for (i in 1:epochs) {
  LSTM.model %>% fit(train.x, train.y, batch_size = 32, epochs = 1)
  LSTM.model %>% reset_states() #clears the hidden states in network after every batch
}
# predicting for testing data
pred.y <- LSTM.model %>% predict(test.x, batch_size = 32)
# rescaling test.y and pred.y back to the original scale
test.y.re <- test.y*(max(price) - min(price)) + min(price)
pred.y.re <- pred.y*(max(price) - min(price)) + min(price)
# LSTM Metrics

# mean absolute error
mae <- mean(abs(pred.y.re - test.y.re))
# mean squared error
mse <- mean((pred.y.re - test.y.re)^2)

print("LSTM Metrics:")
## [1] "LSTM Metrics:"
cat("Mean Absolute Error:",mae,"\n")
## Mean Absolute Error: 15.46492
cat("Mean Squared Error:",mse,"\n")
## Mean Squared Error: 314.6245
cat("Root Mean Squared Error:",sqrt(mse))
## Root Mean Squared Error: 17.73766
#computing prediction accuracy
accuracy10 <- ifelse(abs(test.y.re - pred.y.re) < 0.10*test.y.re,1,0) 
accuracy15 <- ifelse(abs(test.y.re - pred.y.re) < 0.15*test.y.re,1,0) 
accuracy20 <- ifelse(abs(test.y.re - pred.y.re) < 0.20*test.y.re,1,0)
print(paste("accuracy within 10%:", round(mean(accuracy10),4)))
## [1] "accuracy within 10%: 0.9452"
print(paste("accuracy within 15%:", round(mean(accuracy15),4)))
## [1] "accuracy within 15%: 1"
print(paste("accuracy within 20%:", round(mean(accuracy20),4)))
## [1] "accuracy within 20%: 1"
# plotting actual and predicted values for testing data
plot(as.POSIXct(test.data$Date), test.y.re, type = "l", lwd = 2, col = "green", 
  main = "Daily Microsoft Stock Actual and Predicted Prices - LSTM Model", 
  xlab = "Time", ylab = "Stock Price", panel.first = grid())
  lines(as.POSIXct(test.data$Date), pred.y.re, lwd = 2, col = "orange")
  legend("topright", c("actual", "predicted"), lty = 1, lwd = 2,
  col = c("green","orange"))

Gated Recurrent Unit Model

#################################################
# GRU MODEL
##################################################

# instantiate our model
GRU.model <- keras_model_sequential(set_random_seed(42)) 
# specifying model structure
GRU.model %>% layer_gru(input_shape = dim(train.x)[2:3], units = nsteps)
GRU.model %>% layer_dense(units = 1, activation = "tanh") 
GRU.model %>% compile(loss = "mean_squared_error")
# training model
epochs <- 5  
for (i in 1:epochs) {
  GRU.model %>% fit(train.x, train.y, batch_size = 32, epochs = 1)
  GRU.model %>% reset_states() 
}
# predicting for testing data
pred.y <- GRU.model %>% predict(test.x, batch_size = 32)
# rescaling pred.y back to the original scale
pred.y.re <- pred.y*(max(price) - min(price)) + min(price)
# GRU Metrics

# mean absolute error
mae <- mean(abs(pred.y.re - test.y.re))
# mean squared error
mse <- mean((pred.y.re - test.y.re)^2)

print("GRU Metrics:")
## [1] "GRU Metrics:"
cat("Mean Absolute Error:",mae,"\n")
## Mean Absolute Error: 8.10126
cat("Mean Squared Error:",mse,"\n")
## Mean Squared Error: 92.75793
cat("Root Mean Squared Error:",sqrt(mse))
## Root Mean Squared Error: 9.631092
#computing prediction accuracy
accuracy10 <- ifelse(abs(test.y.re - pred.y.re) < 0.10*test.y.re,1,0) 
accuracy15 <- ifelse(abs(test.y.re - pred.y.re) < 0.15*test.y.re,1,0) 
accuracy20 <- ifelse(abs(test.y.re - pred.y.re) < 0.20*test.y.re,1,0)
print(paste("accuracy within 10%:", round(mean(accuracy10),4)))
## [1] "accuracy within 10%: 1"
print(paste("accuracy within 15%:", round(mean(accuracy15),4)))
## [1] "accuracy within 15%: 1"
print(paste("accuracy within 20%:", round(mean(accuracy20),4)))
## [1] "accuracy within 20%: 1"
#plotting actual and predicted values for testing data
plot(as.POSIXct(test.data$Date), test.y.re, type = "l", lwd = 2, col = "green", 
  main = "Daily Microsoft Stock Actual and Predicted Prices - GRU Model", 
  xlab = "Time", ylab = "Stock Price", panel.first = grid())
  lines(as.POSIXct(test.data$Date), pred.y.re, lwd = 2, col = "orange")
  legend("topright", c("actual", "predicted"), lty = 1, lwd = 2,
  col = c("green","orange"))