私は、人気のある皮膚がん画像のデータセットに基づいて開発しているモデルに苦境に陥っています。いくつかのガイダンスが必要な点があります -
A.
元のデータセットは 10,000 枚以上の画像で、7000 近くの画像が 7 つのクラスのいずれかに属しています。4948 個のランダムな画像のサブセットを作成し、画像をリストのリストに変換する関数を実行しました。最初のリストには画像が含まれ、後者にはクラスが含まれ、クラス (5 - +6800K 画像のクラス)。思考プロセスは、クラス全体の分布を正規化することでした。
元のモデルを出力 (7 ではなく 6 ニューロンの高密度層) で再実行すると、エラーが返されます。
可能なクラスが 6 つしかないことをモデルに「示す」手順がありませんか? モデルは、出力層に 7 つのニューロンがある場合にのみ実行されます。
エラー:
Train on 1245 samples, validate on 312 samples
Epoch 1/30
---------------------------------------------------------------------------
InvalidArgumentError Traceback (most recent call last)
<ipython-input-138-8a3b40a69e37> in <module>
25 metrics=["accuracy"])
26
---> 27 model.fit(X_train, y_train, batch_size=32, epochs=30, validation_split=0.2)
/anaconda3/lib/python3.7/site-packages/tensorflow/python/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
778 validation_steps=validation_steps,
779 validation_freq=validation_freq,
--> 780 steps_name='steps_per_epoch')
781
782 def evaluate(self,
/anaconda3/lib/python3.7/site-packages/tensorflow/python/keras/engine/training_arrays.py in model_iteration(model, inputs, targets, sample_weights, batch_size, epochs, verbose, callbacks, val_inputs, val_targets, val_sample_weights, shuffle, initial_epoch, steps_per_epoch, validation_steps, validation_freq, mode, validation_in_fit, prepared_feed_values_from_dataset, steps_name, **kwargs)
361
362 # Get outputs.
--> 363 batch_outs = f(ins_batch)
364 if not isinstance(batch_outs, list):
365 batch_outs = [batch_outs]
/anaconda3/lib/python3.7/site-packages/tensorflow/python/keras/backend.py in __call__(self, inputs)
3290
3291 fetched = self._callable_fn(*array_vals,
-> 3292 run_metadata=self.run_metadata)
3293 self._call_fetch_callbacks(fetched[-len(self._fetches):])
3294 output_structure = nest.pack_sequence_as(
/anaconda3/lib/python3.7/site-packages/tensorflow/python/client/session.py in __call__(self, *args, **kwargs)
1456 ret = tf_session.TF_SessionRunCallable(self._session._session,
1457 self._handle, args,
-> 1458 run_metadata_ptr)
1459 if run_metadata:
1460 proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)
InvalidArgumentError: Received a label value of 6 which is outside the valid range of [0, 6). Label values: 1 1 2 4 2 1 2 1 2 1 2 2 4 2 2 1 3 1 4 6 0 2 4 2 0 4 2 4 4 0 2 4
[[{{node loss_15/activation_63_loss/SparseSoftmaxCrossEntropyWithLogits/SparseSoftmaxCrossEntropyWithLogits}}]]
B.
クラスの数とクラス全体の画像のまばらさを考慮して、データセットが比較的小さいため、データ拡張を追加しようとしています。ジェネレーターを実行しようとすると、validation_data
タプル内の変数の 1 つに問題があることを示す以下のエラー メッセージが表示されます。何が問題なのか理解できません。
テスト セットの値の例は次のようになります。
[[[[0.41568627]
[0.4 ]
[0.43137255]
...
[0.54509804]
[0.54901961]
[0.54509804]]
[[0.42352941]
[0.43137255]
[0.43921569]
...
[0.56078431]
[0.54117647]
[0.55294118]]
[[0.41960784]
[0.41960784]
[0.45490196]
...
[0.51764706]
[0.57254902]
[0.50588235]]
...
[[0.30980392]
[0.36470588]
[0.36470588]
...
[0.47058824]
[0.44705882]
[0.41960784]]
[[0.29803922]
[0.31764706]
[0.34509804]
...
[0.45098039]
[0.43921569]
[0.4 ]]
[[0.25882353]
[0.30196078]
[0.31764706]
...
[0.45490196]
[0.42745098]
[0.36078431]]]
[[[0.60784314]
[0.59215686]
[0.56862745]
...
[0.59607843]
[0.63921569]
[0.63529412]]
[[0.6627451 ]
[0.63137255]
[0.62352941]
...
[0.67843137]
[0.60784314]
[0.63529412]]
[[0.62745098]
[0.65098039]
[0.6 ]
...
[0.61568627]
[0.63921569]
[0.67058824]]
...
[[0.62352941]
[0.6 ]
[0.59607843]
...
[0.6627451 ]
[0.71372549]
[0.6745098 ]]
[[0.61568627]
[0.58431373]
[0.61568627]
...
[0.67058824]
[0.65882353]
[0.68235294]]
[[0.61176471]
[0.60392157]
[0.61960784]
...
[0.65490196]
[0.6627451 ]
[0.66666667]]]]
[2, 1, 4, 4, 2]
エラー:
Epoch 1/10
1/155 [..............................] - ETA: 11s - loss: 1.7916 - acc: 0.3000
---------------------------------------------------------------------------
InvalidArgumentError Traceback (most recent call last)
<ipython-input-139-8f19a958861f> in <module>
12 history = model.fit_generator(trainAug.flow(X_train, y_train, batch_size=batch_size)
13 ,epochs = 10, validation_data = (X_test, y_test),
---> 14 steps_per_epoch= X_train.shape[0]// batch_size
15 )
16 #epochs = epochs, validation_data = (X_test, y_test),
/anaconda3/lib/python3.7/site-packages/tensorflow/python/keras/engine/training.py in fit_generator(self, generator, steps_per_epoch, epochs, verbose, callbacks, validation_data, validation_steps, validation_freq, class_weight, max_queue_size, workers, use_multiprocessing, shuffle, initial_epoch)
1431 shuffle=shuffle,
1432 initial_epoch=initial_epoch,
-> 1433 steps_name='steps_per_epoch')
1434
1435 def evaluate_generator(self,
/anaconda3/lib/python3.7/site-packages/tensorflow/python/keras/engine/training_generator.py in model_iteration(model, data, steps_per_epoch, epochs, verbose, callbacks, validation_data, validation_steps, validation_freq, class_weight, max_queue_size, workers, use_multiprocessing, shuffle, initial_epoch, mode, batch_size, steps_name, **kwargs)
262
263 is_deferred = not model._is_compiled
--> 264 batch_outs = batch_function(*batch_data)
265 if not isinstance(batch_outs, list):
266 batch_outs = [batch_outs]
/anaconda3/lib/python3.7/site-packages/tensorflow/python/keras/engine/training.py in train_on_batch(self, x, y, sample_weight, class_weight, reset_metrics)
1173 self._update_sample_weight_modes(sample_weights=sample_weights)
1174 self._make_train_function()
-> 1175 outputs = self.train_function(ins) # pylint: disable=not-callable
1176
1177 if reset_metrics:
/anaconda3/lib/python3.7/site-packages/tensorflow/python/keras/backend.py in __call__(self, inputs)
3290
3291 fetched = self._callable_fn(*array_vals,
-> 3292 run_metadata=self.run_metadata)
3293 self._call_fetch_callbacks(fetched[-len(self._fetches):])
3294 output_structure = nest.pack_sequence_as(
/anaconda3/lib/python3.7/site-packages/tensorflow/python/client/session.py in __call__(self, *args, **kwargs)
1456 ret = tf_session.TF_SessionRunCallable(self._session._session,
1457 self._handle, args,
-> 1458 run_metadata_ptr)
1459 if run_metadata:
1460 proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)
InvalidArgumentError: Received a label value of 6 which is outside the valid range of [0, 6). Label values: 0 1 6 4 2 4 2 0 1 2
[[{{node loss_15/activation_63_loss/SparseSoftmaxCrossEntropyWithLogits/SparseSoftmaxCrossEntropyWithLogits}}]]
コード:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, Conv2D, MaxPooling2D
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sys
import os
import cv2
DATA_DIR = "/Users/namefolder/PycharmProjects/skin-cancer/HAM10000_images_part_1"
metadata = pd.read_csv(os.path.join(DATA_DIR, 'HAM10000_metadata.csv'))
lesion_type_dict = {'nv': 'Melanocytic nevi',
'mel': 'Melanoma',
'bkl': 'Benign keratosis-like lesions ',
'bcc': 'Basal cell carcinoma',
'akiec': 'Actinic keratoses',
'vasc': 'Vascular lesions',
'df': 'Dermatofibroma'}
metadata['cell_type'] = metadata['dx'].map(lesion_type_dict.get)
metadata['dx_code'] = pd.Categorical(metadata['dx']).codes
# save array of image-id and diagnosis-type (categorical)
metadata = metadata[['image_id', 'dx', 'dx_type', 'dx_code']]
training_data = []
IMG_SIZE=50
# preparing training data
def creating_training_data(path):
for img in os.listdir(path):
try:
img_array = cv2.imread(os.path.join(path, img), cv2.IMREAD_GRAYSCALE)
new_array = cv2.resize(img_array, (IMG_SIZE, IMG_SIZE))
for index, row in metadata.iterrows():
if (img == row['image_id']+'.jpg') & (row['dx_code'] != 5):
try:
training_data.append([new_array, row['dx_code']])
except Exception as ee:
pass
except Exception as e:
pass
return training_data
training_data = creating_training_data(DATA_DIR)
import random
random.shuffle(training_data)
# Splitting data into X features and Y label
X_train = []
y_train = []
for features, label in training_data:
X_train.append(features)
y_train.append(label)
# Reshaping of the data - required by Tensorflow and Keras (*necessary step of deep-learning using these repos)
X_train = np.array(X_train).reshape(-1, IMG_SIZE, IMG_SIZE, 1)
# Normalize data - to reduce processing requirements
X_train = X_train/255.0
# model configuration
model = Sequential()
model.add(Conv2D(64, (3,3), input_shape = X_train.shape[1:]))
model.add(Activation("relu"))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(64, (3,3)))
model.add(Activation("relu"))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(64))
model.add(Dense(6))
model.add(Activation("softmax"))
model.compile(loss="mean_squared_error",
optimizer="adam",
metrics=["accuracy"])
# Data Augmentation - Repo enabler
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ReduceLROnPlateau
# initialize the training training data augmentation object
trainAug = ImageDataGenerator(
rescale=1 / 255.0,
rotation_range=20,
zoom_range=0.05,
width_shift_range=0.05,
height_shift_range=0.05,
shear_range=0.05,
horizontal_flip=True,
fill_mode="nearest")
# initialize the validation (and testing) data augmentation object
valAug = ImageDataGenerator(rescale=1 / 255.0)
#set a leraning rate annealer
learning_rate_reduction = ReduceLROnPlateau(monitor='val_acc',
patience=3,
verbose=1,
factor=0.5,
min_lr=0.00001)
#Augmented Images model development
)
trainAug.fit(X_train)
#Fit the model
epochs = 10
batch_size= 10
history = model.fit_generator(trainAug.flow(X_train, y_train, batch_size=batch_size),epochs = 10, validation_data = (X_test, y_test), steps_per_epoch= X_train.shape[0]// batch_size)