# Import the libraries
import numpy as np
import matplotlib.pyplot as plt  # for 畫圖用
import pandas as pd
from sklearn.preprocessing import MinMaxScaler  # Feature Scaling
# 建立 長短期記憶 LSTM 模型 Import the Keras libraries and packages
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout

# 載入訓練資料

dataset_train = pd.read_csv('D:/googl_stock_prices_train.csv')  # 讀取訓練集
training_set = dataset_train.iloc[:, 1:2].values  # 取「Open」欄位值
# 載入測試資料
dataset_test = pd.read_csv('D:/googl_stock_prices_test.csv')    # 讀取測試集
real_stock_price = dataset_test.iloc[:, 1:2].values  # 取「Open」欄位值

# 將資料標準化成(0,1)之間
sc = MinMaxScaler(feature_range = (0, 1))
training_set_scaled = sc.fit_transform(training_set)

m = 60                 # 預測點的前 m 天的資料
n = len(dataset_test)  # 預測未來 n 天股價走勢

print(len(dataset_train))
X_train = []   #預測點的前 m 天的資料
y_train = []   #預測點
for i in range(m, len(dataset_train)):  # 1258 是訓練集總數
    X_train.append(training_set_scaled[i-m:i, 0])
    y_train.append(training_set_scaled[i, 0])
X_train, y_train = np.array(X_train), np.array(y_train)  # 轉成numpy array的格式,以利輸入 RNN
print(X_train.shape[0], X_train.shape[1])

# 原X_train 是 2-dimension,將它 reshape 成 3-dimension:
#    [stock prices, timesteps, indicators]
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))

# 建立 長短期記憶 LSTM 模型
# Initialising the RNN
regressor = Sequential()
# Adding the first LSTM layer and some Dropout regularisation
regressor.add(LSTM(units = 50, return_sequences = True, input_shape = (X_train.shape[1], 1)))
regressor.add(Dropout(0.2))

# Adding a second LSTM layer and some Dropout regularisation
regressor.add(LSTM(units = 50, return_sequences = True))
regressor.add(Dropout(0.2))

# Adding a third LSTM layer and some Dropout regularisation
regressor.add(LSTM(units = 50, return_sequences = True))
regressor.add(Dropout(0.2))

# Adding a fourth LSTM layer and some Dropout regularisation
regressor.add(LSTM(units = 50))
regressor.add(Dropout(0.2))

# Adding the output layer
regressor.add(Dense(units = 1))

# Compiling
regressor.compile(optimizer = 'adam', loss = 'mean_squared_error')

# 進行訓練
regressor.fit(X_train, y_train, epochs = 100, batch_size = 32)

# 測試
dataset_total = pd.concat((dataset_train['Open'], dataset_test['Open']), axis = 0)
inputs = dataset_total[len(dataset_total) - len(dataset_test) - m:].values
inputs = inputs.reshape(-1,1)
inputs = sc.transform(inputs) # Feature Scaling

X_test = []
for i in range(m, m+n):  # m+n = 先前的m天資料+2017年的n天資料
    X_test.append(inputs[i-m:i, 0])
X_test = np.array(X_test)
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))  # Reshape 成 3-dimension

predicted_stock_price = regressor.predict(X_test)
predicted_stock_price = sc.inverse_transform(predicted_stock_price)  # to get the original scale

# 真實股價與預測股價均為二維陣列
print(real_stock_price[0:n])
print(predicted_stock_price)

訓練集多寡比較:

20240321_100005_11zon.jpg

# 將真實股價與預測結果畫圖顯示 Visualising the results
plt.plot(real_stock_price[0:n], color = 'red', label = 'Real Google Stock Price') #紅線表示真實股價
plt.plot(predicted_stock_price, color = 'blue', label = 'Predict Google Stock Price') #藍線表示預測股價
plt.title('Google Stock Price Prediction')
plt.xlabel('Time')
plt.ylabel('Google Stock Price')
plt.legend()
plt.show()
from scipy.stats import pearsonr
# 將二維陣列轉換一維陣列
real_stock_price = np.array(real_stock_price[0:n])
real_stock_price = real_stock_price.ravel()
predicted_stock_price =  np.array(predicted_stock_price)
predicted_stock_price = predicted_stock_price.ravel()

# Create a Dataframe with two columns:
# "Real Stock Price" and "Predicted Stock Price"
data = pd.DataFrame({
    "Real Stock Price": real_stock_price,
    "Predicted Stock Price": predicted_stock_price
    })
# 計算Pearson相關係數 Calculate the Pearson correlation coefficient
corr = data.corr(method = "pearson")
print(corr)
#相關性檢定 H0:相關係數=0 H1:相關係數不等於0
t = pearsonr(real_stock_price, predicted_stock_price) #顯示 皮爾森相關係數, p-value
print(t)

下載歷史股價資料

輸入 pip install yfinance

import pandas as pd
import yfinance as yf

# 設定股票代號與期間
symbol = "GOOG"
start_date = "2024-01-01"
end_date = "2024-02-29"

# 下載股價資料
stock_data = yf.download(symbol, start_date, end_date)

# 選擇需要的欄位
stock_data  = stock_data [["Open", "High", "Low", "Close","Volume"]]

# 儲存資料到CSV檔案
stock_data.to_csv("test.csv")

20240321_095712_11zon.jpg