Chainer1.22.0 を使用して LSTM 言語モデルを実装しています。私のコードは CPU では機能しますが、GPU では機能しません... より正確には、私のオブジェクト関数は CPU ではかなり高速に収束しますが、GPU では収束しません。何が起こっているかについて何か考えはありますか?
事前に助けてくれてありがとう!
出力:
$ python debug.py --gpu -1
cpu mode
objective in epoch 0 : 14.8049154282
objective in epoch 1 : 11.7126655579
objective in epoch 2 : 10.6166152954
objective in epoch 3 : 9.81489753723
objective in epoch 4 : 8.90626144409
objective in epoch 5 : 7.73007297516
objective in epoch 6 : 6.31889343262
objective in epoch 7 : 4.83179998398
objective in epoch 8 : 3.52315592766
objective in epoch 9 : 2.58598852158
$ python debug.py --gpu 0
gpu mode 0
objective in epoch 0 : 14.8049144745
objective in epoch 1 : 14.3081817627
objective in epoch 2 : 14.0404243469
objective in epoch 3 : 13.8618173599
objective in epoch 4 : 13.7236022949
objective in epoch 5 : 13.6082553864
objective in epoch 6 : 13.5111179352
objective in epoch 7 : 13.4323377609
objective in epoch 8 : 13.3735141754
objective in epoch 9 : 13.3361949921
環境:
- Python 2.7.13 (アナコンダ)
- Chainer1.22.0
- Cuda8.0
私のモデル:
min -sum log P(s_t|s_{t<})
where P(s_t | s{t<}) = LSTM( s_{t<} )
完全なコード:
'''
this is a code for asking
'''
import numpy as np
try:
import cupy as xp
except ImportError:
pass
import sys
import chainer as ch
import chainer.links as L
import chainer.functions as F
INT = "int32"
FLOAT="float32"
BOOLEAN='bool'
class LSTM(ch.Chain):
def __init__(self, voc_size, in_size, out_size, batch_size):
np.random.seed(0)
w1 = np.random.normal(size=[voc_size, in_size])
super(LSTM, self).__init__(
emb=L.EmbedID(voc_size, in_size, initialW =w1), # word embedding
enc = L.LSTM(in_size=in_size, out_size=out_size),# LSTM_cell
scores = L.Linear(out_size, voc_size) # output transformation
)
self.batch_size = batch_size
self.out_size = out_size
self.gpu_idx = -1
#put links on GPU
def to_gpu(self, device_idx):
self.gpu_idx = device_idx
self.emb.to_gpu(device_idx)
self.scores.to_gpu(device_idx)
self.enc.to_gpu(device_idx)
def obj(self, seq):
#object function is log likelyhood for the each word on the seq
return -F.sum(self.logL(seq))
def logL(self, seq):
'''
seq ; batch of src seq of length T : List<List<int>>
RETRUN : R^{batch_size x T} : CP node
'''
T = xp if self.gpu_idx>=0 else np
padded = T.transpose(T.array(seq, dtype=INT)) #Z^{T x batch_size}
#reset LSTM cell
self.enc.reset_state()
logL = []
#logL for each time step except the first input
for i in range(0, len(padded)-1):
#get LSTM output
h = self.enc(self.emb(padded[i])) #R^{batch_size x hidden_size}
#probability distribution over vocabrary
s = self.scores(F.tanh(h)) #R^{batch_size x voc_size}
s = F.transpose(F.log_softmax(s)) #R^{voc_size x batch_size}
#likelyhood for the next word
l = F.embed_id(padded[i+1] , s) #R^{batch_size x batch_size}
l = F.sum(l * T.identity(self.batch_size), axis=0) #R^{batch_size}
logL += [l]
return F.transpose(F.stack(logL))
GPU_TAG = "--gpu"
if __name__=="__main__":
args= sys.argv
gpu_idx = -1
i=0
#argument
while i<len(args):
if args[i]==GPU_TAG:
i+=1
gpu_idx = int(args[i])
i+=1
#hyper paramters
voc_size = 5
batch_size=3
in_size = 5
out_size=2
#instanciate model
model = LSTM(voc_size, in_size, out_size, batch_size)
#GPU mode or CPU mode
if gpu_idx>=0:
print "gpu mode ", gpu_idx
ch.cuda.get_device(gpu_idx).use()
model.to_gpu(gpu_idx)
else:
print "cpu mode"
#prepare optimizer
trainer = ch.optimizers.sgd.SGD(lr=0.3)
trainer.setup(model)
#seq to train
x = [[1,2,3,4]]*batch_size
#main training loop
for epoch in range(10):
obj = model.obj(x) #forward path
model.cleargrads() #init grad for backward path
obj.backward() #backward path
print "objective in epoch ",epoch, ": ", obj.data
trainer.update() #update