Skip to content

Commit

Permalink
Merges TPU-TC optimizations into HEAD. (tensorflow#5635)
Browse files Browse the repository at this point in the history
* Merges TPU-TC optimizations into HEAD.

* Split a line that went over 80 from a tab.

* Remove trailing whitespace.
  • Loading branch information
tayo authored and Taylor Robie committed Oct 30, 2018
1 parent 0c0860e commit b8318fd
Show file tree
Hide file tree
Showing 2 changed files with 79 additions and 44 deletions.
20 changes: 12 additions & 8 deletions official/recommendation/ncf_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,13 +54,14 @@
FLAGS = flags.FLAGS


def construct_estimator(num_gpus, model_dir, params, batch_size,
def construct_estimator(num_gpus, model_dir, iterations, params, batch_size,
eval_batch_size):
"""Construct either an Estimator or TPUEstimator for NCF.
Args:
num_gpus: The number of gpus (Used to select distribution strategy)
model_dir: The model directory for the estimator
iterations: Estimator iterations
params: The params dict for the estimator
batch_size: The mini-batch size for training.
eval_batch_size: The batch size used during evaluation.
Expand All @@ -79,12 +80,13 @@ def construct_estimator(num_gpus, model_dir, params, batch_size,
tf.Session.reset(tpu_cluster_resolver.get_master())

tpu_config = tf.contrib.tpu.TPUConfig(
iterations_per_loop=100,
iterations_per_loop=iterations,
num_shards=8)

run_config = tf.contrib.tpu.RunConfig(
cluster=tpu_cluster_resolver,
model_dir=model_dir,
save_checkpoints_secs=600,
session_config=tf.ConfigProto(
allow_soft_placement=True, log_device_placement=False),
tpu_config=tpu_config)
Expand All @@ -95,12 +97,13 @@ def construct_estimator(num_gpus, model_dir, params, batch_size,
model_fn=neumf_model.neumf_model_fn,
use_tpu=True,
train_batch_size=batch_size,
eval_batch_size=eval_batch_size,
params=tpu_params,
config=run_config)

eval_estimator = tf.contrib.tpu.TPUEstimator(
model_fn=neumf_model.neumf_model_fn,
use_tpu=False,
use_tpu=True,
train_batch_size=1,
eval_batch_size=eval_batch_size,
params=tpu_params,
Expand Down Expand Up @@ -204,7 +207,8 @@ def run_ncf(_):
}
if FLAGS.use_estimator:
train_estimator, eval_estimator = construct_estimator(
num_gpus=num_gpus, model_dir=FLAGS.model_dir, params=params,
num_gpus=num_gpus, model_dir=FLAGS.model_dir,
iterations=num_train_steps, params=params,
batch_size=flags.FLAGS.batch_size, eval_batch_size=eval_batch_size)
else:
runner = model_runner.NcfModelRunner(ncf_dataset, params)
Expand All @@ -231,7 +235,7 @@ def run_ncf(_):
test_id=FLAGS.benchmark_test_id)


pred_input_fn = None
eval_input_fn = None
total_training_cycle = FLAGS.train_epochs // FLAGS.epochs_between_evals
target_reached = False
mlperf_helper.ncf_print(key=mlperf_helper.TAGS.TRAIN_LOOP)
Expand Down Expand Up @@ -260,8 +264,8 @@ def run_ncf(_):
tf.gfile.DeleteRecursively(train_record_dir)

tf.logging.info("Beginning evaluation.")
if pred_input_fn is None:
pred_input_fn, _, eval_batch_count = data_preprocessing.make_input_fn(
if eval_input_fn is None:
eval_input_fn, _, eval_batch_count = data_preprocessing.make_input_fn(
ncf_dataset=ncf_dataset, is_training=False)

if eval_batch_count != num_eval_steps:
Expand All @@ -272,7 +276,7 @@ def run_ncf(_):

mlperf_helper.ncf_print(key=mlperf_helper.TAGS.EVAL_START,
value=cycle_index)
eval_results = eval_estimator.evaluate(pred_input_fn,
eval_results = eval_estimator.evaluate(eval_input_fn,
steps=num_eval_steps)
tf.logging.info("Evaluation complete.")
else:
Expand Down
103 changes: 67 additions & 36 deletions official/recommendation/neumf_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,9 @@ def construct_model(users, items, params):
Raises:
ValueError: if the first model layer is not even.
Returns:
logits: network logits
"""

num_users = params["num_users"]
Expand All @@ -193,46 +196,74 @@ def construct_model(users, items, params):
# Input variables
user_input = tf.keras.layers.Input(tensor=users)
item_input = tf.keras.layers.Input(tensor=items)
batch_size = user_input.get_shape()[0]

if params["use_tpu"]:
with tf.variable_scope("embed_weights", reuse=tf.AUTO_REUSE):
cmb_embedding_user = tf.get_variable(
name="embeddings_mf_user",
shape=[num_users, mf_dim + model_layers[0] // 2],
initializer=tf.glorot_uniform_initializer())
cmb_embedding_item = tf.get_variable(
name="embeddings_mf_item",
shape=[num_items, mf_dim + model_layers[0] // 2],
initializer=tf.glorot_uniform_initializer())

cmb_user_latent = tf.gather(cmb_embedding_user, user_input)
cmb_item_latent = tf.gather(cmb_embedding_item, item_input)

mlp_user_latent = tf.slice(cmb_user_latent, [0, 0],
[batch_size, model_layers[0] // 2])
mlp_item_latent = tf.slice(cmb_item_latent, [0, 0],
[batch_size, model_layers[0] // 2])
mlp_vector = tf.keras.layers.concatenate([mlp_user_latent,
mlp_item_latent])
mf_user_latent = tf.slice(cmb_user_latent, [0, model_layers[0] // 2],
[batch_size, mf_dim])
mf_item_latent = tf.slice(cmb_item_latent, [0, model_layers[0] // 2],
[batch_size, mf_dim])
else:
# Initializer for embedding layers
embedding_initializer = "glorot_uniform"

# Embedding layers of GMF and MLP
mf_embedding_user = tf.keras.layers.Embedding(
num_users,
mf_dim,
embeddings_initializer=embedding_initializer,
embeddings_regularizer=tf.keras.regularizers.l2(mf_regularization),
input_length=1)
mf_embedding_item = tf.keras.layers.Embedding(
num_items,
mf_dim,
embeddings_initializer=embedding_initializer,
embeddings_regularizer=tf.keras.regularizers.l2(mf_regularization),
input_length=1)

mlp_embedding_user = tf.keras.layers.Embedding(
num_users,
model_layers[0]//2,
embeddings_initializer=embedding_initializer,
embeddings_regularizer=tf.keras.regularizers.l2(mlp_reg_layers[0]),
input_length=1)
mlp_embedding_item = tf.keras.layers.Embedding(
num_items,
model_layers[0]//2,
embeddings_initializer=embedding_initializer,
embeddings_regularizer=tf.keras.regularizers.l2(mlp_reg_layers[0]),
input_length=1)

# GMF part
mf_user_latent = mf_embedding_user(user_input)
mf_item_latent = mf_embedding_item(item_input)

# MLP part
mlp_user_latent = mlp_embedding_user(user_input)
mlp_item_latent = mlp_embedding_item(item_input)

# Initializer for embedding layers
embedding_initializer = "glorot_uniform"

# Embedding layers of GMF and MLP
mf_embedding_user = tf.keras.layers.Embedding(
num_users,
mf_dim,
embeddings_initializer=embedding_initializer,
embeddings_regularizer=tf.keras.regularizers.l2(mf_regularization),
input_length=1)
mf_embedding_item = tf.keras.layers.Embedding(
num_items,
mf_dim,
embeddings_initializer=embedding_initializer,
embeddings_regularizer=tf.keras.regularizers.l2(mf_regularization),
input_length=1)

mlp_embedding_user = tf.keras.layers.Embedding(
num_users,
model_layers[0]//2,
embeddings_initializer=embedding_initializer,
embeddings_regularizer=tf.keras.regularizers.l2(mlp_reg_layers[0]),
input_length=1)
mlp_embedding_item = tf.keras.layers.Embedding(
num_items,
model_layers[0]//2,
embeddings_initializer=embedding_initializer,
embeddings_regularizer=tf.keras.regularizers.l2(mlp_reg_layers[0]),
input_length=1)

# GMF part
mf_user_latent = mf_embedding_user(user_input)
mf_item_latent = mf_embedding_item(item_input)
# Element-wise multiply
mf_vector = tf.keras.layers.multiply([mf_user_latent, mf_item_latent])

# MLP part
mlp_user_latent = mlp_embedding_user(user_input)
mlp_item_latent = mlp_embedding_item(item_input)
# Concatenation of two latent features
mlp_vector = tf.keras.layers.concatenate([mlp_user_latent, mlp_item_latent])

Expand Down

0 comments on commit b8318fd

Please sign in to comment.