Iris データセットのバックプロパゲーションとシグモイド活性化を備えた 4 層ニューラル ネットワークを実装しました。4 つの入力ユニット、それぞれ 8 つのユニットを持つ 2 つの隠れ層、および 3 つのユニットを持つ出力層があります。コードは、自分で実装する練習をしたかったので、numpy を使用して記述されています。問題は、テスト セットを使用してトレーニングおよびテストした後、ほぼ同じ出力が生成されるため、すべてのテスト ケースが 2 番目のクラスとして分類されることです。結果が完全に同じではないという事実は、私をさらに混乱させます。
ここに私のインポートと前処理コードがあります:
# dimensions for each matrix is commented ahead of them
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
X, y = load_iris(return_X_y=True) # base data frame
m = np.size(X[:,0]) # number of examples: 150
n_labels = np.size(np.unique(y)) # number of classes or labels : 3
# Making a new array of labels to indicate them as follows:
# 0: [1,0,0]
# 1: [0,1,0]
# 2: [0,0,1]
y_new = np.zeros((m,n_labels)) # matrix of dimension: 150 * 3
for index, value in enumerate(y):
y_new[index][value] = 1
X_train, X_test, y_train, y_test = train_test_split(X,y_new,test_size=0.1)
ここに私のメインコードがあります:
def random_init(n_in, n_out, epsilon):
# randomly initializing weights to avoid symmetry
W = np.random.rand(n_out, n_in + 1) * (2 * epsilon) - epsilon
return W
def sigmoid(z):
# sigmoid function
return 1/(1 + np.exp(-z))
def forward_propagation(X, y, dict2):
dict1 = {} # declaring a hashmap to save values
# Assigning values from dict2
weight1 = dict2["weight1"] # 8 * 5
weight2 = dict2["weight2"] # 8 * 9
weight3 = dict2["weight3"] # 3 * 9
# forward propagate
a1 = np.asarray(X).reshape((4,1)) # 4 * 1
a1 = np.append(np.ones(1), a1).reshape((5,1)) # 5 * 1
z2 = np.dot(weight1, a1) # 8 * 1
a2 = sigmoid(z2) # 8 * 1
a2 = np.append(np.ones(1), a2).reshape((9,1)) # 9 * 1
z3 = np.dot(weight2, a2) # 8 * 1
a3 = sigmoid(z3) # 8 * 1
a3 = np.append(np.ones(1), a3).reshape((9,1)) # 9 * 1
z4 = np.dot(weight3, a3) # 3 * 1
a4 = sigmoid(z4) # 3 * 1
# assigning to dict1
dict1["a2"] = a2
dict1["a3"] = a3
dict1["a4"] = a4
dict1["z2"] = z2
dict1["z3"] = z3
dict1["z4"] = z4
return dict1
def back_propagation(X, y, dict1, dict2, dict3):
# assigning values from dict1 and dict2
a1 = np.append(np.ones(1), X).reshape((5,1)) # 5 * 1
a2 = dict1["a2"] # 9 * 1
a3 = dict1["a3"] # 9 * 1
a4 = dict1["a4"] # 3 * 1
z2 = dict1["z2"] # 8 * 1
z3 = dict1["z3"] # 8 * 1
z4 = dict1["z4"] # 8 * 1
weight1 = dict2["weight1"] # 8 * 5
weight2 = dict2["weight2"] # 8 * 9
weight3 = dict2["weight3"] # 3 * 9
delta4 = a4 - y # 3 * 1
delta3 = np.dot(weight3.T, delta4) * (a3 * (1-a3)) # 9 * 1
delta3 = delta3[1:] # 8 * 1
delta2 = np.dot(weight2.T, delta3) * (a2 * (1-a2)) # 9 * 1
delta2 = delta2[1:] # 8 * 1
dict3['Delta3'] += np.dot(delta4,a3.T) # 3 * 9
dict3['Delta2'] += np.dot(delta3,a2.T) # 8 * 9
dict3['Delta1'] += np.dot(delta2,a1.T) # 8 * 5
def cost_function(X, y, dict2):
m = np.size(X[:,0]) # y: m * 3
h_x = predict(X, dict2) # m * 3
J = (1/m) * np.sum(np.sum((-y) * np.log(h_x) - (1-y) * np.log(1-h_x)))
return J
def fit(X, y, alpha, num_iter, dict2):
m = np.size(X[:,0]) # y: m * 3
weight1 = dict2["weight1"] # 8 * 5
weight2 = dict2["weight2"] # 8 * 9
weight3 = dict2["weight3"] # 3 * 9
dict3 = {}
dict3['Cost'] = []
for _ in range(num_iter):
dict3['Delta1'] = np.zeros((8,5)) # 8 * 5
dict3['Delta2'] = np.zeros((8,9)) # 8 * 9
dict3['Delta3'] = np.zeros((3,9)) # 3 * 9
for i in range(m):
curr_x = X[i,:].reshape((4,1)) # 4 * 1
curr_y = y[i,:].reshape((3,1)) # 3 * 1
d_1 = forward_propagation(curr_x, curr_y, dict2)
back_propagation(curr_x, curr_y, d_1, dict2, dict3)
dict2["weight1"] -= alpha * dict3['Delta1']
dict2["weight2"] -= alpha * dict3['Delta2']
dict2["weight3"] -= alpha * dict3['Delta3']
dict3['Cost'].append(cost_function(X, y, dict2))
return dict3
def predict(X, dict2):
m = np.size(X[:,0]) # y: m * 3
weight1 = dict2["weight1"] # 8 * 5
weight2 = dict2["weight2"] # 8 * 9
weight3 = dict2["weight3"] # 3 * 9
a1 = np.asarray(X).reshape((4,m)) # 4 * m
a1 = np.append(np.ones((1,m)), a1).reshape((5,m)) # 5 * m
z2 = np.dot(weight1, a1) # 8 * m
a2 = sigmoid(z2) # 8 * m
a2 = np.append(np.ones((1,m)), a2).reshape((9,m)) # 9 * m
z3 = np.dot(weight2, a2) # 8 * m
a3 = sigmoid(z3) # 8 * m
a3 = np.append(np.ones((1,m)), a3).reshape((9,m)) # 9 * m
z4 = np.dot(weight3, a3) # 3 * m
a4 = sigmoid(z4) # 3 * m
return a4.T # m * 3
そして、ここにトレーニングの部分があります:
epsilon = 0.012
dict2 = {}
# Randomly initializing the weights
dict2['weight1'] = random_init(4, 8, epsilon)
dict2['weight2'] = random_init(8, 8, epsilon)
dict2['weight3'] = random_init(8, 3, epsilon)
alpha = 0.0001 # learning rate
num_iter = 300
d_3 = fit(X_train, y_train, alpha, num_iter, dict2)
したがって、テスト セットを実行すると、次のような結果が生成されます。
pred = predict(X_test, dict2)
for index in range(len(pred)):
print(f'Predicted:{pred[index]}, Actual:{y_test[index]}')
出力:
Predicted:[0.33530749 0.34205935 0.34909822], Actual:[0. 0. 1.]
Predicted:[0.33529658 0.34204889 0.34908868], Actual:[0. 0. 1.]
Predicted:[0.33530866 0.34206106 0.34910007], Actual:[1. 0. 0.]
Predicted:[0.33530336 0.34205595 0.34909549], Actual:[0. 1. 0.]
Predicted:[0.33530603 0.34205787 0.34909684], Actual:[0. 0. 1.]
Predicted:[0.33529737 0.34204983 0.3490895 ], Actual:[0. 1. 0.]
Predicted:[0.33531099 0.34206343 0.34910228], Actual:[1. 0. 0.]
Predicted:[0.33530516 0.34205763 0.34909703], Actual:[1. 0. 0.]
Predicted:[0.33531216 0.34206447 0.34910327], Actual:[1. 0. 0.]
Predicted:[0.3353009 0.34205322 0.34909274], Actual:[0. 1. 0.]
Predicted:[0.3353099 0.34206301 0.34910225], Actual:[0. 0. 1.]
Predicted:[0.33530988 0.34206279 0.34910207], Actual:[0. 1. 0.]
Predicted:[0.335315 0.34206706 0.34910561], Actual:[1. 0. 0.]
Predicted:[0.33530064 0.34205326 0.34909282], Actual:[0. 1. 0.]
Predicted:[0.33530877 0.34206142 0.34910049], Actual:[1. 0. 0.]
どこで見逃したのか知りたいので、本当に行き詰まっています。