How to get global_step when restoring breakpoints in Tensorflow?

I save the session state as follows:

self._saver = tf.saver()
self._saver.save(self._session, '/network', global_step=self._time)

When I restore later, I want to get the global_step value for the breakpoint from which I restore. This is done in order to set some hyperparameters from it.

The hacker way to do this is to skip and parse the file names in the checkpoint directory. But moody should be a better, built-in way to do this?

+16
source share
8 answers

The generic template must have a variable global_stepto track steps

global_step = tf.Variable(0, name='global_step', trainable=False)
train_op = optimizer.minimize(loss, global_step=global_step)

Then you can save with

saver.save(sess, save_path, global_step=global_step)

When restoring, the value is restored. global_step

+24
source

,

ckpt = tf.train.get_checkpoint_state(checkpoint_dir) 

#Extract from checkpoint filename
step = int(os.path.basename(ckpt.model_checkpoint_path).split('-')[1])

9/2017

, - , , global_step :

ops. global_step :

    global_step = tf.Variable(0, trainable=False, name='global_step')
    increment_global_step = tf.assign_add(global_step,1,
                                            name = 'increment_global_step')

op , op.

sess.run([train_op,increment_global_step],feed_dict=feed_dict)

, :

sess.run(global_step)

, , tensorflow Variable . , :

loaded_epoch = sess.run(global_step)//(batch_size*num_train_records)
+4

, Lawrence Du, global_step, . v3 Tensorflow/models github repo . , pretrained_model_checkpoint_path.

, , !

:

...

# When not restoring start at 0
last_step = 0
if FLAGS.pretrained_model_checkpoint_path:
    # A model consists of three files, use the base name of the model in
    # the checkpoint path. E.g. my-model-path/model.ckpt-291500
    #
    # Because we need to give the base name you can't assert (will always fail)
    # assert tf.gfile.Exists(FLAGS.pretrained_model_checkpoint_path)

    variables_to_restore = tf.get_collection(
        slim.variables.VARIABLES_TO_RESTORE)
    restorer = tf.train.Saver(variables_to_restore)
    restorer.restore(sess, FLAGS.pretrained_model_checkpoint_path)
    print('%s: Pre-trained model restored from %s' %
          (datetime.now(), FLAGS.pretrained_model_checkpoint_path))

    # HACK : global step is not restored for some unknown reason
    last_step = int(os.path.basename(FLAGS.pretrained_model_checkpoint_path).split('-')[1])

    # assign to global step
    sess.run(global_step.assign(last_step))

...

for step in range(last_step + 1, FLAGS.max_steps):

  ...
+1

TL; DR

tenorflow ( )

global_step = tf.train.get_or_create_global_step()
# use global_step variable to calculate your hyperparameter 
# this variable will be evaluated later in the session
saver = tf.train.Saver()
with tf.Session() as sess:
    # restore all variables from checkpoint
    saver.restore(sess, checkpoint_path)
    # than init table and local variables and start training/evaluation ...

: numpy integer ( - ):

reader = tf.train.NewCheckpointReader(absolute_checkpoint_path)
global_step = reader.get_tensor('global_step')


. tenorflow numpy. , global_step save Saver. . .

Tensorflow

global_step , tf.train.get_or_create_global_step(). tenorflow. , . , : max(global_step, 100) . , tf.maximum(global_step, 100) .

, saver.restore(sess, checkpoint_path)

global_step = tf.train.get_or_create_global_step()
# use global_step variable to calculate your hyperparameter 
# this variable will be evaluated later in the session
hyper_parameter = tf.maximum(global_step, 100) 
saver = tf.train.Saver()
with tf.Session() as sess:
    # restore all variables from checkpoint
    saver.restore(sess, checkpoint_path)
    # than init table and local variables and start training/evaluation ...

    # for verification you can print the global step and your hyper parameter
    print(sess.run([global_step, hyper_parameter]))

: numpy ( )

, () . NewCheckpointReader. - . . tf.GraphKeys.GLOBAL_STEP 'global_step'.

absolute_checkpoint_path = os.path.abspath(checkpoint_path)
reader = tf.train.NewCheckpointReader(absolute_checkpoint_path)
global_step = reader.get_tensor(tf.GraphKeys.GLOBAL_STEP)

: , , . , .

+1

0.10rc0, -, , tf.saver(). tf.train.Saver(). , save save_path global_step, save_path, .

, , - SessionManager :

my_checkpoint_dir = "/tmp/checkpoint_dir"
# make a saver to use with SessionManager for restoring
saver = tf.train.Saver()
# Build an initialization operation to run below.
init = tf.initialize_all_variables()
# use a SessionManager to help with automatic variable restoration
sm = tf.train.SessionManager()
# try to find the latest checkpoint in my_checkpoint_dir, then create a session with that restored
# if no such checkpoint, then call the init_op after creating a new session
sess = sm.prepare_session("", init_op=init, saver=saver, checkpoint_dir=my_checkpoint_dir))

. , my_checkpoint_dir (, ), , init_op .

, , , global_step. , global_step, , , :

checkpoint_path = os.path.join(my_checkpoint_dir, 'model.ckpt')
saver.save(sess, checkpoint_path, global_step=step)

my_checkpoint_dir, "model.ckpt-1000", 1000 - global_step. , "model.ckpt-2000". SessionManager . checkpoint_path , checkpoint_dir. Save() global_step ( ). " ", SessionManager .

0

.

:

global_step = tf.Variable(0, trainable=False, name='global_step')
saver.save(sess, model_path + model_name, global_step=_global_step)

:

if os.path.exists(model_path):
    saver.restore(sess, tf.train.latest_checkpoint(model_path))
    print("Model restore finished, current globle step: %d" % global_step.eval())
0

global_step , step, .

, global_step :

global_step = tf.Variable(0, name='global_step', trainable=False)

:

train_op = optimizer.minimize(loss, global_step=global_step)

:

saver.save(sess, checkpoint_path, global_step=global_step)

:

saver.restore(sess, checkpoint_path) 

global_step , , , step, - :

step = global_step.eval(session=sess)

step, global_step .

global_step , ( ):

global_step = tf.train.get_or_create_global_step()

global_step , , .

0

, , - , tf.Saver().

, tf.Saver() , var_list None var_list. , save(), , , , . .

- , -, . - .

, global_step saver.save(sess, save_path, global_step=global_step) , , , global_step. IMO, , , , .

0

Source: https://habr.com/ru/post/1674487/


All Articles