このチュートリアルに従って https://blog.keras.io/a-ten-minute-introduction-to-sequence-to-sequence-learning-in-keras.html
具体的には、ワンホット エンコードではなく整数シーケンスのデータの使用に関するセクションです。
- 以下のエラー メッセージから、次元の競合に関するものであることがわかります。私は元のチュートリアルに従いましたが、エンコーダー、デコーダーの入力、およびdecoder_outputsの形状が同じである場合(1つのホットベクターを使用してデータを前処理するため)、そのような問題は発生しませんでした。これは、私の .Model 入力の寸法が原因であると私に信じさせます。ただし、埋め込みレイヤーを使用してマスキングを適用したいと考えています。これには、整数シーケンス データを使用する必要があります。
Encoder_inputs shape: TensorShape([None, None])
Decoder_inputs shape: TensorShape([None, None])
デコーダー出力形状: TensorShape([なし、なし、99])
私が見つけたすべてのチュートリアルは、これが問題ではないことを示しているようです(?)。
Traceback (most recent call last):
File "C:\Users\bakes\Anaconda3\envs\math_env\lib\site-packages\tensorflow_core\python\framework\ops.py", line 1619, in _create_c_op
c_op = c_api.TF_FinishOperation(op_desc)
tensorflow.python.framework.errors_impl.InvalidArgumentError: Dimensions must be equal, but are 160 and 99 for 'loss/dense_loss/mul' (op: 'Mul') with input shapes: [?,160], [?,160,99].
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:/Users/bakes/OneDrive - The Pennsylvania State University/devops/repos/thesis_math_language_processing/main.py", line 18, in <module>
network.train()
File "C:\Users\bakes\OneDrive - The Pennsylvania State University\devops\repos\thesis_math_language_processing\architectures\test.py", line 90, in train
model = keras.models.Model([encoder_inputs, decoder_inputs], decoder_outputs)
File "C:\Users\bakes\Anaconda3\envs\math_env\lib\site-packages\tensorflow_core\python\keras\engine\training.py", line 819, in fit
use_multiprocessing=use_multiprocessing)
File "C:\Users\bakes\Anaconda3\envs\math_env\lib\site-packages\tensorflow_core\python\keras\engine\training_v2.py", line 342, in fit
total_epochs=epochs)
File "C:\Users\bakes\Anaconda3\envs\math_env\lib\site-packages\tensorflow_core\python\keras\engine\training_v2.py", line 128, in run_one_epoch
batch_outs = execution_function(iterator)
File "C:\Users\bakes\Anaconda3\envs\math_env\lib\site-packages\tensorflow_core\python\keras\engine\training_v2_utils.py", line 98, in execution_function
distributed_function(input_fn))
File "C:\Users\bakes\Anaconda3\envs\math_env\lib\site-packages\tensorflow_core\python\eager\def_function.py", line 568, in __call__
result = self._call(*args, **kwds)
File "C:\Users\bakes\Anaconda3\envs\math_env\lib\site-packages\tensorflow_core\python\eager\def_function.py", line 615, in _call
self._initialize(args, kwds, add_initializers_to=initializers)
File "C:\Users\bakes\Anaconda3\envs\math_env\lib\site-packages\tensorflow_core\python\eager\def_function.py", line 497, in _initialize
*args, **kwds))
File "C:\Users\bakes\Anaconda3\envs\math_env\lib\site-packages\tensorflow_core\python\eager\function.py", line 2389, in _get_concrete_function_internal_garbage_collected
graph_function, _, _ = self._maybe_define_function(args, kwargs)
File "C:\Users\bakes\Anaconda3\envs\math_env\lib\site-packages\tensorflow_core\python\eager\function.py", line 2703, in _maybe_define_function
graph_function = self._create_graph_function(args, kwargs)
File "C:\Users\bakes\Anaconda3\envs\math_env\lib\site-packages\tensorflow_core\python\eager\function.py", line 2593, in _create_graph_function
capture_by_value=self._capture_by_value),
File "C:\Users\bakes\Anaconda3\envs\math_env\lib\site-packages\tensorflow_core\python\framework\func_graph.py", line 978, in func_graph_from_py_func
func_outputs = python_func(*func_args, **func_kwargs)
File "C:\Users\bakes\Anaconda3\envs\math_env\lib\site-packages\tensorflow_core\python\eager\def_function.py", line 439, in wrapped_fn
return weak_wrapped_fn().__wrapped__(*args, **kwds)
File "C:\Users\bakes\Anaconda3\envs\math_env\lib\site-packages\tensorflow_core\python\keras\engine\training_v2_utils.py", line 85, in distributed_function
per_replica_function, args=args)
File "C:\Users\bakes\Anaconda3\envs\math_env\lib\site-packages\tensorflow_core\python\distribute\distribute_lib.py", line 763, in experimental_run_v2
return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
File "C:\Users\bakes\Anaconda3\envs\math_env\lib\site-packages\tensorflow_core\python\distribute\distribute_lib.py", line 1819, in call_for_each_replica
return self._call_for_each_replica(fn, args, kwargs)
File "C:\Users\bakes\Anaconda3\envs\math_env\lib\site-packages\tensorflow_core\python\distribute\distribute_lib.py", line 2164, in _call_for_each_replica
return fn(*args, **kwargs)
File "C:\Users\bakes\Anaconda3\envs\math_env\lib\site-packages\tensorflow_core\python\autograph\impl\api.py", line 292, in wrapper
return func(*args, **kwargs)
File "C:\Users\bakes\Anaconda3\envs\math_env\lib\site-packages\tensorflow_core\python\keras\engine\training_v2_utils.py", line 433, in train_on_batch
output_loss_metrics=model._output_loss_metrics)
File "C:\Users\bakes\Anaconda3\envs\math_env\lib\site-packages\tensorflow_core\python\keras\engine\training_eager.py", line 312, in train_on_batch
output_loss_metrics=output_loss_metrics))
File "C:\Users\bakes\Anaconda3\envs\math_env\lib\site-packages\tensorflow_core\python\keras\engine\training_eager.py", line 253, in _process_single_batch
training=training))
File "C:\Users\bakes\Anaconda3\envs\math_env\lib\site-packages\tensorflow_core\python\keras\engine\training_eager.py", line 167, in _model_loss
per_sample_losses = loss_fn.call(targets[i], outs[i])
File "C:\Users\bakes\Anaconda3\envs\math_env\lib\site-packages\tensorflow_core\python\keras\losses.py", line 221, in call
return self.fn(y_true, y_pred, **self._fn_kwargs)
File "C:\Users\bakes\Anaconda3\envs\math_env\lib\site-packages\tensorflow_core\python\keras\losses.py", line 971, in categorical_crossentropy
return K.categorical_crossentropy(y_true, y_pred, from_logits=from_logits)
File "C:\Users\bakes\Anaconda3\envs\math_env\lib\site-packages\tensorflow_core\python\keras\backend.py", line 4495, in categorical_crossentropy
return -math_ops.reduce_sum(target * math_ops.log(output), axis)
File "C:\Users\bakes\Anaconda3\envs\math_env\lib\site-packages\tensorflow_core\python\ops\math_ops.py", line 902, in binary_op_wrapper
return func(x, y, name=name)
File "C:\Users\bakes\Anaconda3\envs\math_env\lib\site-packages\tensorflow_core\python\ops\math_ops.py", line 1201, in _mul_dispatch
return gen_math_ops.mul(x, y, name=name)
File "C:\Users\bakes\Anaconda3\envs\math_env\lib\site-packages\tensorflow_core\python\ops\gen_math_ops.py", line 6125, in mul
"Mul", x=x, y=y, name=name)
File "C:\Users\bakes\Anaconda3\envs\math_env\lib\site-packages\tensorflow_core\python\framework\op_def_library.py", line 742, in _apply_op_helper
attrs=attr_protos, op_def=op_def)
File "C:\Users\bakes\Anaconda3\envs\math_env\lib\site-packages\tensorflow_core\python\framework\func_graph.py", line 595, in _create_op_internal
compute_device)
File "C:\Users\bakes\Anaconda3\envs\math_env\lib\site-packages\tensorflow_core\python\framework\ops.py", line 3322, in _create_op_internal
op_def=op_def)
File "C:\Users\bakes\Anaconda3\envs\math_env\lib\site-packages\tensorflow_core\python\framework\ops.py", line 1786, in __init__
control_input_ops)
File "C:\Users\bakes\Anaconda3\envs\math_env\lib\site-packages\tensorflow_core\python\framework\ops.py", line 1622, in _create_c_op
raise ValueError(str(e))
ValueError: Dimensions must be equal, but are 160 and 99 for 'loss/dense_loss/mul' (op: 'Mul') with input shapes: [?,160], [?,160,99].
def train(self):
tensorboard_callback = keras.callbacks.TensorBoard(log_dir=definitions.LOGDIR)
processor = preprocessing.processor()
train_x, train_y, test_x, test_y = processor.get_data(n_data=self.n_train)
encoder_input_data, decoder_input_data, decoder_target_data = processor.preprocess_sequence([train_x, train_y])
latent_dim = p.hidden_size
num_decoder_tokens = p.vocab_size + 1
num_encoder_tokens = p.vocab_size + 1
# Embedding
encoder_inputs = keras.layers.Input(shape=(None, ))
x = keras.layers.Embedding(num_encoder_tokens, latent_dim, mask_zero=True)(encoder_inputs)
x, state_h, state_c = keras.layers.LSTM(latent_dim, return_state=True)(x)
# We discard `encoder_outputs` and only keep the states.
encoder_states = [state_h, state_c]
# Set up the decoder, using `encoder_states` as initial state.
decoder_inputs = keras.layers.Input(shape=(None,))
x = keras.layers.Embedding(num_encoder_tokens, latent_dim, mask_zero=True)(decoder_inputs)
x, _, _ = keras.layers.LSTM(latent_dim, return_sequences=True, return_state=True)(x, initial_state=encoder_states)
pdb.set_trace()
decoder_outputs = keras.layers.Dense(num_decoder_tokens, activation='softmax')(x)
# Define the model that will turn
# `encoder_input_data` & `decoder_input_data` into `decoder_target_data`
pdb.set_trace()
model = keras.models.Model([encoder_inputs, decoder_inputs], decoder_outputs)
# Run training
model.compile(optimizer='adam', loss='categorical_crossentropy',
metrics=['accuracy'])
model.summary()
history = model.fit([encoder_input_data, decoder_input_data],
decoder_target_data,
batch_size=64,
epochs=self.n_epochs,
callbacks=[tensorboard_callback],
validation_split=0.2)