tensorflow - TensorFlow の自動微分が正しい結果を返していないように見える

Question

単純な常微分方程式 (ODE) を解いています。

y'(x) = 2 * x
y(0)  = 1

TensorFlow と以下のコード

import tensorflow as tf

import numpy as np

import matplotlib.pyplot as plt
from matplotlib.pyplot import figure

alphaAdam           = 0.01      # --- Learning rate. float >= 0.
betaAdam            = 0.99      # --- Exponential decay rate for the 1st moment estimates. float, 0 < beta < 1. Generally close to 1.
epsilonAdam         = 1e-1      # --- Fuzz factor. float >= 0.
numIter             = 20000     # --- Number of gradient descent iterations
skipIter            = 100       # --- Number of iterations to skip for presenting the results

f0                  = 1         # --- Boundary condition

xEpsilon            = np.sqrt(np.finfo(np.float32).eps)

N                   = 20        # --- Number of sampling points
x                   = tf.cast(tf.reshape(tf.linspace(0, 1, N), (N, 1)), dtype = tf.float32)
                                # --- Independent variable

numNeuronsInputLayer    = 1     # --- Number of neurons of the input layer
numNeuronsLayer1        = 32    # --- Number of neurons of the first layer
numNeuronsLayer2        = 32    # --- Number of neurons of the second layer
numNeuronsLayer3        = 32    # --- Number of neurons of the third layer
numNeuronsOutputLayer   = 1     # --- Number of neurons of the output layer

weights = {
    'h1':  tf.Variable(tf.random.normal([numNeuronsInputLayer, numNeuronsLayer1])),
    'h2':  tf.Variable(tf.random.normal([numNeuronsLayer1,     numNeuronsLayer2])),
    'h3':  tf.Variable(tf.random.normal([numNeuronsLayer2,     numNeuronsLayer3])),
    'out': tf.Variable(tf.random.normal([numNeuronsLayer3,     numNeuronsOutputLayer]))
}
biases = {
    'b1':  tf.Variable(tf.random.normal([numNeuronsLayer1])),
    'b2':  tf.Variable(tf.random.normal([numNeuronsLayer2])),
    'b3':  tf.Variable(tf.random.normal([numNeuronsLayer3])),
    'out': tf.Variable(tf.random.normal([numNeuronsOutputLayer]))
}

def model(x):

    x = np.array([[[x]]],  dtype='float32')

    layer1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
    layer1 = tf.nn.sigmoid(layer1)

    layer2 = tf.add(tf.matmul(layer1, weights['h2']), biases['b2'])
    layer2 = tf.nn.sigmoid(layer2)
    
    layer3 = tf.add(tf.matmul(layer2, weights['h3']), biases['b3'])
    layer3 = tf.nn.sigmoid(layer3)

    output = tf.matmul(layer3, weights['out']) + biases['out']
    
    return tf.nn.sigmoid(output)

# --- Auxiliary function
def g(x):
  return x * model(x) + f0

# --- ODE function
def f(x):
    return 2 * x

def costFunction(x):
    with tf.GradientTape(persistent = True) as tape:
      tape.watch(x)
      y = g(x)
    #dNN = tape.gradient(y, x)
    dNN = (g(x + xEpsilon) - g(x)) / xEpsilon

    return tf.reduce_sum(tf.square(tf.abs(dNN - f(x))))

optimizer = tf.keras.optimizers.Adam(learning_rate = alphaAdam, beta_1 = betaAdam, epsilon = epsilonAdam)

def optimizationStep():
    with tf.GradientTape() as tape:
        costFunctionValue = costFunction(x)
    trainable_variables = list(weights.values()) + list(biases.values())
    gradients = tape.gradient(costFunctionValue, trainable_variables)
    optimizer.apply_gradients(zip(gradients, trainable_variables))

for i in range(numIter):
    optimizationStep()
    if i % skipIter == 0:
        print("Cost function: %f " % (costFunction(x)))

def referenceSolution(x):
    return x**2 + 1

figure(figsize = (10, 10))

yref = referenceSolution(X)
y    = g(x)

plt.plot(x, yref, label = "Reference")
plt.plot(x, y, label = "Approximation")
plt.legend(loc = 2, prop = {'size': 20})
plt.show()

y'(x)微分を有限差分、つまり直線で計算すると

dNN = (g(x + xEpsilon) - g(x)) / xEpsilon

コードはうまく機能します。ただし、自動微分、つまり行を使用すると、

with tf.GradientTape(persistent = True) as tape:
  tape.watch(x)
  y = g(x)
dNN = tape.gradient(y, x)

コードが機能しません。

私は何を間違っていますか？

tensorflow - TensorFlow の自動微分が正しい結果を返していないように見える

0 に答える 0

Related

Reference