, . , .
TensorFlow ( ).
( [3]):
import tensorflow as tf
lines = tf.constant([
'Hello',
'my name is also Mark',
'Are there any other Marks here ?'])
vocabulary = ['Hello', 'my', 'name', 'is', 'also', 'Mark', 'Are', 'there', 'any', 'other', 'Marks', 'here', '?']
, , ( .)
words = tf.string_split(lines," ")
( [3,7]). - [ , ]. :
indices values
0 0 'hello'
1 0 'my'
1 1 'name'
1 2 'is'
...
:
table = tf.contrib.lookup.index_table_from_tensor(vocabulary)
word_indices = table.lookup(words)
, .
, :
line_number = word_indices.indices[:,0]
line_position = word_indices.indices[:,1]
lengths = tf.segment_max(data = line_position,
segment_ids = line_number)+1
, , , lstm... - ( ):
EMBEDDING_DIM = 100
dense_word_indices = tf.sparse_tensor_to_dense(word_indices)
e_layer = tf.contrib.keras.layers.Embedding(len(vocabulary), EMBEDDING_DIM)
embedded = e_layer(dense_word_indices)
[3,7,100], [lines, words, embedding_dim].
lstm:
LSTM_SIZE = 50
lstm = tf.nn.rnn_cell.BasicLSTMCell(LSTM_SIZE)
, .
outputs, final_state = tf.nn.dynamic_rnn(
cell=lstm,
inputs=embedded,
sequence_length=lengths,
dtype=tf.float32)
[3,7,50] [line, word, lstm_size]. , (! !) select_last_activations:
from tensorflow.contrib.learn.python.learn.estimators.rnn_common import select_last_activations
final_output = select_last_activations(outputs,tf.cast(lengths,tf.int32))
, . [3,50] [line, lstm_size]
init_t = tf.tables_initializer()
init = tf.global_variables_initializer()
with tf.Session() as sess:
init_t.run()
init.run()
print(final_output.eval().shape())
, , , , tf.contrib.learn.DynamicRnnEstimator.