Tensorflow seq2seq get sequence of hidden states

Question

Tensorflow seq2seq get sequence of hidden states

I just recently started working on tensor flow. I am working on a seq2seq model and somehow learned how to work, but I am stuck in getting the states of each sentence.

As far as I understand, the seq2seq model accepts an input sequence and generates a hidden state for the sequence through RNN. Later, the model uses the hidden state of the sequence to generate a new data sequence.

My problem is what should I do if I want to use the hidden state of the input sequence directly? Say, for example, if I have a trained model, how can I get the final hidden state of the input sequence [token1, token2, ...., token N]?

I was stuck with this for 2 days, I tried many different methods, but none of them work.

+4

python tensorflow

bearsteak Feb 02 '16 at 0:33

source share

3 answers

Lukasz Kaiser · Answer 1 · 2016-02-03T01:28:13+0000

In the seq2seq model, the encoder is always the RNN called through rnn.rnn.

The rnn.rnn call returns the outputs and state, so to get only state you can do this:

_, encoder_state = rnn.rnn (encoder_cell, encoder_inputs, dtype = dtype)

This is also done in the seq2seq module. https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/ops/seq2seq.py#L103

bearsteak · Answer 2 · 2016-02-09T22:04:33+0000

Ok, I think my problem is that I don’t know how to code in the tensor flow style, so I kind of rudely forced it.

(* represent where to change)

python/ops/seq2seq, model_with_buckets()

outputs = []
*states = []
  with ops.op_scope(all_inputs, name, "model_with_buckets"):
    for j in xrange(len(buckets)):
      if j > 0:
        vs.get_variable_scope().reuse_variables()
      bucket_encoder_inputs = [encoder_inputs[i]
                               for i in xrange(buckets[j][0])]
      bucket_decoder_inputs = [decoder_inputs[i]
                               for i in xrange(buckets[j][1])]
      *bucket_outputs, _ ,bucket_states= seq2seq(bucket_encoder_inputs,
                                  bucket_decoder_inputs)
      outputs.append(bucket_outputs)
      states.append(bucket_states)
      bucket_targets = [targets[i] for i in xrange(buckets[j][1])]
      bucket_weights = [weights[i] for i in xrange(buckets[j][1])]
      losses.append(sequence_loss(
          outputs[-1], bucket_targets, bucket_weights, num_decoder_symbols,
          softmax_loss_function=softmax_loss_function))

  return outputs, losses,*states

python/ops/seq2seq, modify embedding_attention_seq2seq()

if isinstance(feed_previous, bool):
     * outputs, states =  embedding_attention_decoder(
          decoder_inputs, encoder_states[-1], attention_states, cell,
          num_decoder_symbols, num_heads, output_size, output_projection,
          feed_previous)
      * return outputs, states, tf.constant(encoder_states[-1])
    else:  # If feed_previous is a Tensor, we construct 2 graphs and use cond.
      outputs1, states1 = embedding_attention_decoder(
          decoder_inputs, encoder_states[-1], attention_states, cell,
          num_decoder_symbols, num_heads, output_size, output_projection, True)
      vs.get_variable_scope().reuse_variables()
      outputs2, states2 = embedding_attention_decoder(
          decoder_inputs, encoder_states[-1], attention_states, cell,
          num_decoder_symbols, num_heads, output_size, output_projection, False)

      outputs = control_flow_ops.cond(feed_previous,
                                      lambda: outputs1, lambda: outputs2)
      states = control_flow_ops.cond(feed_previous,
                                     lambda: states1, lambda: states2)

      *return outputs, states, tf.constant(encoder_states[-1])

/rnn/translate/seq 2seq_model.py init()

if forward_only:
     * self.outputs, self.losses, self.states = seq2seq.model_with_buckets(
          self.encoder_inputs, self.decoder_inputs, targets,
          self.target_weights, buckets, self.target_vocab_size,
          lambda x, y: seq2seq_f(x, y, True),
          softmax_loss_function=softmax_loss_function)
      # If we use output projection, we need to project outputs for decoding.
      if output_projection is not None:
        for b in xrange(len(buckets)):
          self.outputs[b] = [tf.nn.xw_plus_b(output, output_projection[0],
                                             output_projection[1])
                             for output in self.outputs[b]]
    else:
  *    self.outputs, self.losses,_  = seq2seq.model_with_buckets(
          self.encoder_inputs, self.decoder_inputs, targets,
          self.target_weights, buckets, self.target_vocab_size,
          lambda x, y: seq2seq_f(x, y, False),
          softmax_loss_function=softmax_loss_function)

/rnn/translate/seq 2seq_model.py ()

if not forward_only:
      return outputs[1], outputs[2], None  # Gradient norm, loss, no outputs.
else:
      *return None, outputs[0], outputs[1:-1], outputs[-1]

, :

_, _, _,states = model.step(all_other_arguements, forward_only = True)

ccy · Answer 3 · 2016-10-11T14:22:16+0000

Bearsteak , tensorflow-0.6, . -0.8, , .

(* , )

losses = []
outputs = []
*states = []
with ops.op_scope(all_inputs, name, "model_with_buckets"):
    for j, bucket in enumerate(buckets):
        with variable_scope.variable_scope(variable_scope.get_variable_scope(),
                                                                             reuse=True if j > 0 else None):
            *bucket_outputs, _ ,bucket_states= seq2seq(encoder_inputs[:bucket[0]],
                                                                    decoder_inputs[:bucket[1]])
            outputs.append(bucket_outputs)
            if per_example_loss:
                losses.append(sequence_loss_by_example(
                        outputs[-1], targets[:bucket[1]], weights[:bucket[1]],
                        softmax_loss_function=softmax_loss_function))
            else:
                losses.append(sequence_loss(
                    outputs[-1], targets[:bucket[1]], weights[:bucket[1]],
                    softmax_loss_function=softmax_loss_function))

return outputs, losses, *states

python/ops/seq2seq, modify embedding_attention_seq2seq()

if isinstance(feed_previous, bool):
    *outputs, states = embedding_attention_decoder(
                decoder_inputs, encoder_state, attention_states, cell,
                num_decoder_symbols, embedding_size, num_heads=num_heads,
                output_size=output_size, output_projection=output_projection,
                feed_previous=feed_previous,
                initial_state_attention=initial_state_attention)
    *return outputs, states, encoder_state

    # If feed_previous is a Tensor, we construct 2 graphs and use cond.
def decoder(feed_previous_bool):
    reuse = None if feed_previous_bool else True
    with variable_scope.variable_scope(variable_scope.get_variable_scope(),reuse=reuse):
        outputs, state = embedding_attention_decoder(
                decoder_inputs, encoder_state, attention_states, cell,
                num_decoder_symbols, embedding_size, num_heads=num_heads,
                output_size=output_size, output_projection=output_projection,
                feed_previous=feed_previous_bool,
                update_embedding_for_previous=False,
                initial_state_attention=initial_state_attention)
        return outputs + [state]

    outputs_and_state = control_flow_ops.cond(feed_previous, lambda: decoder(True), lambda: decoder(False))                                                                                                                                                           
    *return outputs_and_state[:-1], outputs_and_state[-1], encoder_state

/rnn/translate/seq 2seq_model.py init()

if forward_only:
    *self.outputs, self.losses, self.states= tf.nn.seq2seq.model_with_buckets(
            self.encoder_inputs, self.decoder_inputs, targets,
            self.target_weights, buckets, lambda x, y: seq2seq_f(x, y, True),
            softmax_loss_function=softmax_loss_function)
    # If we use output projection, we need to project outputs for decoding.
    if output_projection is not None:
        for b in xrange(len(buckets)):
            self.outputs[b] = [
                    tf.matmul(output, output_projection[0]) + output_projection[1]
                    for output in self.outputs[b]
            ]
else:
    *self.outputs, self.losses, _ = tf.nn.seq2seq.model_with_buckets(
            self.encoder_inputs, self.decoder_inputs, targets,
            self.target_weights, buckets,
            lambda x, y: seq2seq_f(x, y, False),
            softmax_loss_function=softmax_loss_function)

/rnn/translate/seq 2seq_model.py ()

if not forward_only:
    return outputs[1], outputs[2], None    # Gradient norm, loss, no outputs.
else:
    *return None, outputs[0], outputs[1:], outputs[-1]    # No gradient norm, loss, outputs.

, :

_, _, output_logits, states = model.step(sess, encoder_inputs, decoder_inputs,
                                                                     target_weights, bucket_id, True)
print (states)

translate.py.

Tensorflow seq2seq get sequence of hidden states

More articles: