kaggle のタイタニック データセットで mlp を使用しようとしていますが、トレーニングすると、エポックを 1 に下げたり、ノードをほとんど (4,1,4) に下げたりしても、常に 1.0 の精度が得られます。入力のデータ型はすべて問題ありません。これがコードです
import pandas as pd
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import data_engine as de
import time
train = pd.read_csv("train.csv")
# test = pd.read_csv("test.csv")
test = train.iloc[700:]
train = train.iloc[:700]
#encoding
sex_lables = {'female': 0, 'male': 1}
train.Sex = train.Sex.replace(sex_lables)
test.Sex = test.Sex.replace(sex_lables)
#(C = Cherbourg; Q = Queenstown; S = Southampton)
embarked = {'C': 0, 'Q': 1, 'S': 2}
train.Embarked = train.Embarked.replace(embarked)
test.Embarked = test.Embarked.replace(embarked)
#selecting feachers
train_x = train[['Pclass','Sex','SibSp','Parch','Fare','Embarked']]
train_y = train[['Survived']]
test_x = test[['Pclass','Sex','SibSp','Parch','Fare','Embarked']]
test_y = test[['Survived']]
# conver to numpy array and conver Y (labels) to one hot
train_x_np = train_x.values.astype('float32')
train_y_np = np.eye(np.max(train_y.Survived.values.flatten())+1)[train_y.values.flatten()]
test_x_np = test_x.values.astype('float32')
test_y_np = np.eye(np.max(train_y.Survived.values.flatten())+1)[test_y.values.flatten()]
# the number of nodes in a hidden layer
node_layer_1 = 64
node_layer_2 = 64
node_layer_3 = 64
# hyperparameters
classes = 2
batch_size = 100
hm_epochs = 10
step_numer = 1
input_parameters = 6
# for ploting
epoch_cost_values = []
epoch_accuracy_values = []
#placeholders
x = tf.placeholder('float', [None, input_parameters ])
y = tf.placeholder('float')
def neural_network_model(data):
hidden_1_layer = {'weight': tf.Variable(tf.random_normal([input_parameters, node_layer_1])),
'biases': tf.Variable(tf.random_normal([node_layer_1]))}
hidden_2_layer = {'weight': tf.Variable(tf.random_normal([node_layer_1, node_layer_2])),
'biases': tf.Variable(tf.random_normal([node_layer_2]))}
hidden_3_layer = {'weight': tf.Variable(tf.random_normal([node_layer_2, node_layer_3])),
'biases': tf.Variable(tf.random_normal([node_layer_3]))}
output_layer = {'weight': tf.Variable(tf.random_normal([node_layer_3, classes])),
'biases': tf.Variable(tf.random_normal([classes]))}
l1 = tf.matmul(data, hidden_1_layer['weight']) + hidden_1_layer['biases']
l1 = tf.nn.relu(l1)
l2 = tf.matmul(l1, hidden_2_layer['weight']) + hidden_2_layer['biases']
l2 = tf.nn.relu(l2)
l3 = tf.matmul(l2, hidden_3_layer['weight']) + hidden_3_layer['biases']
l3 = tf.nn.relu(l3)
output = tf.matmul(l3, output_layer['weight']) + output_layer['biases']
return output
now = time.time()
prediction = neural_network_model(x)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(prediction, y))
optimizer = tf.train.AdamOptimizer().minimize(cost)
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
# cycles feed forword + backprope
hm_epochs = 10
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for epoch in range(hm_epochs):
epoch_loss = 0
for batch_num in range(int(len(train_y) / batch_size)):
epoch_x, epoch_y = de.get_patch(train_x_np,train_y_np,batch_num,batch_size)
_, c = sess.run([optimizer, cost], feed_dict={x: epoch_x, y: epoch_y})
epoch_loss += c
print('Epoch', epoch, 'completed out of ', hm_epochs, 'loss:', epoch_loss)
epoch_cost_values.append(epoch_loss)
# test the accuracy of the model
correct = tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
print('Accuracy: ', accuracy.eval({x: test_x.values, y: test_y.values}))
print( int(time.time() - now), "sec")
これがパッチ関数です
def chunker_mono(seq, size):
return (seq[pos:pos + size] for pos in np.arange(0, len(seq), size))
def get_patch(x,y,num,batch_size):
x_sub = x
y_sub = y
for indx, i in enumerate(chunker_mono(x,batch_size)):
if indx == num:
x_sub = i
for indx, i in enumerate(chunker_mono(y,batch_size)):
if indx == num:
y_sub = i
return x_sub,y_sub
そしてこれが結果です
Epoch 0 completed out of 10 loss: 835.817965508
Epoch 1 completed out of 10 loss: 75.1456642151
Epoch 2 completed out of 10 loss: 60.268289566
Epoch 3 completed out of 10 loss: 45.3410954475
Epoch 4 completed out of 10 loss: 30.482026577
Epoch 5 completed out of 10 loss: 15.8394477367
Epoch 6 completed out of 10 loss: 5.79651939869
Epoch 7 completed out of 10 loss: 7.72154080868
Epoch 8 completed out of 10 loss: 5.14520382881
Epoch 9 completed out of 10 loss: 5.37735944986
Accuracy: 1.0
4 sec