7.3 seq2seqの実装

足し算データセットを読み出します。

from ivory.utils.repository import import_module

sequence = import_module("scratch2/dataset/sequence")
x_train, t_train = sequence.load_data("addition.txt")[0]
char_to_id, id_to_char = sequence.get_vocab()

[1] 2019-08-30 08:17:38 (283ms) python3 (36.0s)

モデルの形状を設定します。

vocab_size = len(char_to_id)
wordvec_size = 16
hideen_size = 128

[2] 2019-08-30 08:17:38 (30.2ms) python3 (36.1s)

「ゼロから作るDeep Learning ❷」のモデルを読み込みます。

sequence = import_module("scratch2/ch07/seq2seq")
seq2seq = sequence.Seq2seq(vocab_size, wordvec_size, hideen_size)
for p in seq2seq.params:
    print(p.shape)

[3] 2019-08-30 08:17:38 (46.9ms) python3 (36.1s)

(13, 16)
(16, 512)
(128, 512)
(512,)
(13, 16)
(16, 512)
(128, 512)
(512,)
(128, 13)
(13,)

モデルを作成します。

from ivory.core.model import branch, Model

net_encoder = [
    ("input", vocab_size),
    ("embedding", wordvec_size),
    ("lstm", hideen_size, "select"),
]

net_decoder = [
    ("input", vocab_size),
    ("embedding", wordvec_size),
    ("lstm", hideen_size),
    ("affine", vocab_size, "softmax_cross_entropy"),
]

encoder = branch(net_encoder)
decoder = branch(net_decoder)

[4] 2019-08-30 08:17:38 (15.5ms) python3 (36.1s)

エンコーダの出力をデコーダのLSTMレイヤに入力します。

v = encoder[-1].y.set_variable()
decoder[1].h.set_variable(v)
print(v)

[5] 2019-08-30 08:17:38 (15.6ms) python3 (36.2s)

<Variable(['Select.1.y', 'LSTM.2.h'], (128,)) at 0x271a1d357f0>

デコーダの損失パラメータを起点にしてモデルを構築します。

model = Model([decoder[-1].loss])
for k, layer in enumerate(model.layers):
    print(f"layers[{k}]", layer)
for k, v in enumerate(model.data_input_variables):
    print(f"inputs[{k}]", v)

[6] 2019-08-30 08:17:38 (15.7ms) python3 (36.2s)

layers[0] <Embedding('Embedding.1', (13, 16)) at 0x271a1d35b38>
layers[1] <LSTM('LSTM.1', (16, 128)) at 0x271a1d35cc0>
layers[2] <Select('Select.1', (128,)) at 0x271a1d35e48>
layers[3] <Embedding('Embedding.2', (13, 16)) at 0x271a1d35f28>
layers[4] <LSTM('LSTM.2', (16, 128)) at 0x271a1d280b8>
layers[5] <Affine('Affine.1', (128, 13)) at 0x271a1d28240>
layers[6] <SoftmaxCrossEntropy('SoftmaxCrossEntropy.1', (13,)) at 0x271a1d28400>
inputs[0] <Variable(['Embedding.1.x'], ()) at 0x271a1d289e8>
inputs[1] <Variable(['Embedding.2.x'], ()) at 0x271a1d28a20>
inputs[2] <Variable(['SoftmaxCrossEntropy.1.t'], ()) at 0x271a1d28a90>

重みの初期値を「ゼロから作るDeep Learning」と同じにします。

for p, data in zip(model.weights, seq2seq.params):
    p.variable.data = data.copy()
    print(p.layer.name, p.name, p.d.shape, p.d.dtype)

[7] 2019-08-30 08:17:38 (15.6ms) python3 (36.2s)

Embedding.1 W (13, 16) float32
LSTM.1 W (16, 512) float32
LSTM.1 U (128, 512) float32
LSTM.1 b (512,) float32
Embedding.2 W (13, 16) float32
LSTM.2 W (16, 512) float32
LSTM.2 U (128, 512) float32
LSTM.2 b (512,) float32
Affine.1 W (128, 13) float32
Affine.1 b (13,) float32

エンコーダのstatefulFalseに設定します。

encoder[1].stateful.d = False

[8] 2019-08-30 08:17:38 (15.7ms) python3 (36.2s)

データを用意します。

batch_size = 2
x, t = x_train[:batch_size, ::-1], t_train[:batch_size]

[9] 2019-08-30 08:17:38 (15.6ms) python3 (36.2s)

モデルに代入し、「ゼロから作るDeep Learning」の結果と比較します。

順伝搬

import numpy as np

model.reset_state()
model.set_data(x, t[:, :-1], t[:, 1:])
for layer in encoder:
    layer.clear_data()
    layer.forward()

xs = seq2seq.encoder.embed.forward(x)
hs = seq2seq.encoder.lstm.forward(xs)
h = seq2seq.encoder.forward(x)
print(np.allclose(xs, encoder[0].y.d))
print(np.allclose(hs, encoder[1].y.d))
print(np.allclose(h, encoder[2].y.d))

for layer in decoder:
    layer.clear_data()
    layer.forward()

seq2seq.decoder.lstm.set_state(h)
out = seq2seq.decoder.embed.forward(t[:, :-1])
out2 = seq2seq.decoder.lstm.forward(out)
score = seq2seq.decoder.affine.forward(out2)
loss = seq2seq.softmax.forward(score, t[:, 1:])
print(np.allclose(out, decoder[0].y.d))
print(np.allclose(out2, decoder[1].y.d))
print(np.allclose(score, decoder[2].y.d))
print(np.allclose(loss, model.loss))

[10] 2019-08-30 08:17:38 (109ms) python3 (36.3s)

TypeError: Cannot convert numpy.ndarray to cupy.core.core.ndarray
TypeError                                 Traceback (most recent call last)
<ipython-input-55-dafc7b6b0081> in <module>
      5 for layer in encoder:
      6     layer.clear_data()
----> 7     layer.forward()
      8 
      9 xs = seq2seq.encoder.embed.forward(x)

~\Documents\GitHub\ivory\ivory\layers\recurrent.py in forward(self)
     85         for t in range(T):
     86             h = self.h_prev if t == 0 else y[:, t - 1]
---> 87             a = x[:, t] + h @ self.U.d + self.b.d
     88             a[:, :M] = sigmoid(a[:, :M])  # f
     89             a[:, M : 2 * M] = np.tanh(a[:, M : 2 * M])  # g

cupy\core\core.pyx in cupy.core.core.ndarray.__matmul__()

逆伝搬

for layer in decoder[::-1]:
    layer.clear_grad()
    layer.backward()

dscore = seq2seq.softmax.backward()
dout2 = seq2seq.decoder.affine.backward(dscore)
dout = seq2seq.decoder.lstm.backward(dout2)
seq2seq.decoder.embed.backward(dout)
dh = seq2seq.decoder.lstm.dh
print(np.allclose(dout, decoder[1].x.g))
print(np.allclose(dout2, decoder[2].x.g))
print(np.allclose(dscore, decoder[3].x.g))
print(np.allclose(dh, decoder[1].h.g))
print(np.allclose(dh, encoder[2].y.g))

for layer in encoder[::-1]:
    layer.clear_grad()
    layer.backward()

dhs = np.zeros_like(seq2seq.encoder.hs)
dhs[:, -1, :] = dh
dout = seq2seq.encoder.lstm.backward(dhs)
seq2seq.encoder.embed.backward(dout)
print(np.allclose(dout, encoder[1].x.g))
print(np.allclose(dhs, encoder[2].x.g))

[11] 2019-08-30 08:17:38 (137ms) python3 (36.5s)

AttributeError: 'SoftmaxCrossEntropy' object has no attribute 'y_2d'
AttributeError                            Traceback (most recent call last)
<ipython-input-56-c100d3f306c5> in <module>
      1 for layer in decoder[::-1]:
      2     layer.clear_grad()
----> 3     layer.backward()
      4 
      5 dscore = seq2seq.softmax.backward()

~\Documents\GitHub\ivory\ivory\layers\loss.py in backward(self)
     34 
     35     def backward(self):
---> 36         self.y_2d[np.arange(self.size), self.t_1d] -= 1
     37         self.x.g = self.y_2d.reshape(*self.x.d.shape) / self.size
     38

勾配を比較します。

for p, grad in zip(model.weights, seq2seq.grads):
    print(p.layer.name, p.name, np.allclose(p.variable.grad, grad))

[12] 2019-08-30 08:17:38 (156ms) python3 (36.6s)

TypeError: ufunc 'isfinite' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''
TypeError                                 Traceback (most recent call last)
<ipython-input-57-edd9775e1fe5> in <module>
      1 for p, grad in zip(model.weights, seq2seq.grads):
----> 2     print(p.layer.name, p.name, np.allclose(p.variable.grad, grad))

c:\users\daizu\miniconda3\envs\daizu\lib\site-packages\numpy\core\numeric.py in allclose(a, b, rtol, atol, equal_nan)
   2421 
   2422     """
-> 2423     res = all(isclose(a, b, rtol=rtol, atol=atol, equal_nan=equal_nan))
   2424     return bool(res)
   2425 

c:\users\daizu\miniconda3\envs\daizu\lib\site-packages\numpy\core\numeric.py in isclose(a, b, rtol, atol, equal_nan)
   2519     y = array(y, dtype=dt, copy=False, subok=True)
   2520 
-> 2521     xfin = isfinite(x)
   2522     yfin = isfinite(y)
   2523     if all(xfin) and all(yfin):

重みの更新

optimizer = import_module("scratch2/common/optimizer")
util = import_module("scratch2/common/util")
max_grad = 5.0
util.clip_grads(seq2seq.grads, max_grad)
adam_scratch = optimizer.Adam()
adam_scratch.update(seq2seq.params, seq2seq.grads)

from ivory.core.optimizer import Adam

model.clip_grads(max_grad)
adam = Adam()
adam.set_model(model)
adam.update()

[13] 2019-08-30 08:17:38 (141ms) python3 (36.8s)

TypeError: unsupported operand type(s) for ** or pow(): 'NoneType' and 'int'
TypeError                                 Traceback (most recent call last)
<ipython-input-58-55954568ec66> in <module>
      9 from ivory.core.optimizer import Adam
     10 
---> 11 model.clip_grads(max_grad)
     12 adam = Adam()
     13 adam.set_model(model)

~\Documents\GitHub\ivory\ivory\core\model.py in clip_grads(self, max_grad)
    187         total_norm = 0.0
    188         for grad in grads:
--> 189             total_norm += np.sum(grad ** 2)  # type:ignore
    190         total_norm = np.sqrt(total_norm)
    191         rate = max_grad / (total_norm + 1e-6)

更新された重みを比較します。

for p, data in zip(model.weights, seq2seq.params):
    print(p.layer.name, p.name, np.allclose(p.variable.data, data))

[14] 2019-08-30 08:17:39 (141ms) python3 (36.9s)

Embedding.1 W True
LSTM.1 W True
LSTM.1 U True
LSTM.1 b True
Embedding.2 W True
LSTM.2 W True
LSTM.2 U True
LSTM.2 b True
Affine.1 W True
Affine.1 b True

モデル経由で訓練を実施します。

seq2seq = sequence.Seq2seq(vocab_size, wordvec_size, hideen_size)
for p, data in zip(model.weights, seq2seq.params):
    p.variable.data = data.copy()

data_size = len(x_train)
batch_size = 128

for iters in range(5):
    model.reset_state()
    batch_x = x_train[iters * batch_size : (iters + 1) * batch_size]
    batch_t = t_train[iters * batch_size : (iters + 1) * batch_size]

    seq2seq.forward(batch_x, batch_t)
    seq2seq.backward()
    util.clip_grads(seq2seq.grads, max_grad)
    adam_scratch.update(seq2seq.params, seq2seq.grads)

    model.set_data(batch_x, batch_t[:, :-1], batch_t[:, 1:])
    model.forward()
    model.backward()
    model.clip_grads(max_grad)
    adam.update()

    print('grad ', end='')
    for p, grad in zip(model.weights, seq2seq.grads):
        print(np.allclose(p.variable.grad, grad), end=', ')
    print('\ndata ', end='')
    for p, data in zip(model.weights, seq2seq.params):
        print(np.allclose(p.variable.data, data), end=', ')
    print()

[15] 2019-08-30 08:17:39 (93.7ms) python3 (37.0s)

TypeError: Cannot convert numpy.ndarray to cupy.core.core.ndarray
TypeError                                 Traceback (most recent call last)
<ipython-input-60-42f6fb1ac084> in <module>
     17 
     18     model.set_data(batch_x, batch_t[:, :-1], batch_t[:, 1:])
---> 19     model.forward()
     20     model.backward()
     21     model.clip_grads(max_grad)

~\Documents\GitHub\ivory\ivory\core\model.py in forward(self, predict, start)
     38             if predict and isinstance(layer, LossLayer):
     39                 continue
---> 40             layer.forward()
     41 
     42     def backward(self):

~\Documents\GitHub\ivory\ivory\layers\recurrent.py in forward(self)
     85         for t in range(T):
     86             h = self.h_prev if t == 0 else y[:, t - 1]
---> 87             a = x[:, t] + h @ self.U.d + self.b.d
     88             a[:, :M] = sigmoid(a[:, :M])  # f
     89             a[:, M : 2 * M] = np.tanh(a[:, M : 2 * M])  # g

cupy\core\core.pyx in cupy.core.core.ndarray.__matmul__()