4

Geoffrey Hinton と Ruslan Salakhutdinov によるSemantic Hashing の記事 ( http://www.cs.toronto.edu/~hinton/absps/sh.pdf )のディープ ビリーフ ネットワークを実装しようとしています。制限付きボルツマン マシン (RBM) で制限付きポアソン モデルを実装する方法を理解するのに苦労しています。その結果、私のモデルは実数値の単語数ベクトルを取り、重みを正しく更新します。

以下に、私の RBM の重要なコードを示します。

for epoch in range(epochs):
        errsum = 0
        batch_index = 0
        for _ in self.batches:

            # Positive phase - generate data from visible to hidden units.
            pos_vis = self.__get_input_data__(batch_index)
            N = sum(pos_vis, axis = 1)[newaxis].T

            batch_size = len(pos_vis)


            if self.final_layer:
                pos_hid_prob = dot(pos_vis,self.weights) + tile(self.hidden_biases,(batch_size,1))
            elif self.first_layer:
                pos_hid_prob = dbn.bernoulli(dot(pos_vis,self.weights) + tile(self.hidden_biases,(batch_size,1)))
            else:
                pos_hid_prob = dbn.bernoulli(dot(pos_vis,self.weights) + tile(self.hidden_biases,(batch_size,1)))

            pos_values = dot(pos_vis.T,pos_hid_prob)
            pos_hid_act = sum(pos_hid_prob,axis = 0)
            pos_vis_act = sum(pos_vis,axis = 0)

            # If probabilities are higher than random generated, the states are 1 
            if self.final_layer:
                pos_hid = pos_hid_prob + randn(batch_size,self.num_hid)
            else:
                randoms = rand(batch_size,self.num_hid)
                pos_hid = array(randoms < pos_hid_prob,dtype = int)

            # Negative phase - generate data from hidden to visible units and then again to hidden units.                
            if self.first_layer: # To represent the count data in the visible units for the first layer RBM we use the constrained poisson model.
                neg_vis = dbn.constrained_poisson(pos_hid, self.visible_biases, self.weights.T, batch_size, pos_vis)                   
            else: # For all RBM's besides the first where the data are binary.
                neg_vis = dbn.bernoulli(dot(pos_hid,self.weights.T) + tile(self.visible_biases,(batch_size,1)))

            if self.final_layer:
                neg_hid_prob = dot(neg_vis,self.weights) + tile(self.hidden_biases,(batch_size,1))    
            else:
                neg_hid_prob = dbn.bernoulli(dot(neg_vis,self.weights) + tile(self.hidden_biases,(batch_size,1)))

            neg_values = dot(neg_vis.T, neg_hid_prob)
            neg_hid_act = sum(neg_hid_prob,axis=0)
            neg_vis_act = sum(neg_vis,axis=0)

            # Set the error
            errsum += sum((pos_vis-neg_vis)**2)

            # Update weights and biases
            self.delta_weights = self.learning_momentum * self.delta_weights + self.epsilon_weights*((pos_values-neg_values)/batch_size - self.weight_cost*self.weights)
            self.delta_visible_biases = self.learning_momentum * self.delta_visible_biases + (self.epsilon_visibleBiases/batch_size) * (pos_vis_act-neg_vis_act)
            self.delta_hidden_biases = self.learning_momentum * self.delta_hidden_biases + (self.epsilon_hiddenBiases/batch_size) * (pos_hid_act-neg_hid_act)

            self.weights += self.delta_weights
            self.visible_biases += self.delta_visible_biases
            self.hidden_biases += self.delta_hidden_biases

            batch_index += 1
        print 'Epoch ',epoch+1,' Error ',errsum

ベルヌーイ分布と制約付きポアソン モデルの方法を以下に示します。

def constrained_poisson(states_hidden_units, biases_visible_units,weights, number_of_docs_in_batch,states_visible_units):
    '''
    Calculating the probabilities according to the constrained
    Poisson model.

    Parameters
    ----------
    states_hidden_units: the binary values of the hidden units.
    weights: the weight matrices.
    number_of_docs_in_batch: number of documents in the batch.

    Returns
    __________
    The batch after calculating the contrained poisson values.
    '''

    _lambda = dot(states_hidden_units,weights)+tile(biases_visible_units,(number_of_docs_in_batch,1))
    # If overflow is encountered in exp.
    m = amax(_lambda,axis=1)[newaxis].T
    _lambda = exp(_lambda-m)

    # Divide each element by the sum of the words in the document.
    num_of_rows = sum(_lambda,axis = 1)
    for i in range(len(_lambda)):
        for j in range(len(_lambda[i])):
            _lambda[i,j] = _lambda[i,j]/num_of_rows[i]

    # Compute the sum of each row in the initial input data.        
    N = array(sum(states_visible_units,axis = 1))[newaxis].T
    _lambda = _lambda * N

    _lambda_power = zeros((len(_lambda),len(_lambda[0])))
    for (i,j),value in ndenumerate(_lambda):
        _lambda_power[i,j] = power(value,states_visible_units[i,j])

    return (exp(-_lambda)*(_lambda_power))/sc.factorial(states_visible_units,exact=False)

def bernoulli(x):
    return 1./(1+exp(-x))
4

0 に答える 0