Ok, I think my problem is that I don’t know how to code in the tensor flow style, so I kind of rudely forced it.
(* represent where to change)
python/ops/seq2seq, model_with_buckets()
outputs = []
*states = []
with ops.op_scope(all_inputs, name, "model_with_buckets"):
for j in xrange(len(buckets)):
if j > 0:
vs.get_variable_scope().reuse_variables()
bucket_encoder_inputs = [encoder_inputs[i]
for i in xrange(buckets[j][0])]
bucket_decoder_inputs = [decoder_inputs[i]
for i in xrange(buckets[j][1])]
*bucket_outputs, _ ,bucket_states= seq2seq(bucket_encoder_inputs,
bucket_decoder_inputs)
outputs.append(bucket_outputs)
states.append(bucket_states)
bucket_targets = [targets[i] for i in xrange(buckets[j][1])]
bucket_weights = [weights[i] for i in xrange(buckets[j][1])]
losses.append(sequence_loss(
outputs[-1], bucket_targets, bucket_weights, num_decoder_symbols,
softmax_loss_function=softmax_loss_function))
return outputs, losses,*states
python/ops/seq2seq, modify embedding_attention_seq2seq()
if isinstance(feed_previous, bool):
* outputs, states = embedding_attention_decoder(
decoder_inputs, encoder_states[-1], attention_states, cell,
num_decoder_symbols, num_heads, output_size, output_projection,
feed_previous)
* return outputs, states, tf.constant(encoder_states[-1])
else:
outputs1, states1 = embedding_attention_decoder(
decoder_inputs, encoder_states[-1], attention_states, cell,
num_decoder_symbols, num_heads, output_size, output_projection, True)
vs.get_variable_scope().reuse_variables()
outputs2, states2 = embedding_attention_decoder(
decoder_inputs, encoder_states[-1], attention_states, cell,
num_decoder_symbols, num_heads, output_size, output_projection, False)
outputs = control_flow_ops.cond(feed_previous,
lambda: outputs1, lambda: outputs2)
states = control_flow_ops.cond(feed_previous,
lambda: states1, lambda: states2)
*return outputs, states, tf.constant(encoder_states[-1])
/rnn/translate/seq 2seq_model.py init()
if forward_only:
* self.outputs, self.losses, self.states = seq2seq.model_with_buckets(
self.encoder_inputs, self.decoder_inputs, targets,
self.target_weights, buckets, self.target_vocab_size,
lambda x, y: seq2seq_f(x, y, True),
softmax_loss_function=softmax_loss_function)
# If we use output projection, we need to project outputs for decoding.
if output_projection is not None:
for b in xrange(len(buckets)):
self.outputs[b] = [tf.nn.xw_plus_b(output, output_projection[0],
output_projection[1])
for output in self.outputs[b]]
else:
* self.outputs, self.losses,_ = seq2seq.model_with_buckets(
self.encoder_inputs, self.decoder_inputs, targets,
self.target_weights, buckets, self.target_vocab_size,
lambda x, y: seq2seq_f(x, y, False),
softmax_loss_function=softmax_loss_function)
/rnn/translate/seq 2seq_model.py ()
if not forward_only:
return outputs[1], outputs[2], None
else:
*return None, outputs[0], outputs[1:-1], outputs[-1]
, :
_, _, _,states = model.step(all_other_arguements, forward_only = True)