6 years ago · 9492dc7cb7
--- a/TensorFlow/Recommendation/WideAndDeep/README.md
+++ b/TensorFlow/Recommendation/WideAndDeep/README.md
@@ -25,7 +25,6 @@ This repository provides a script and recipe to train the Wide and Deep Recommen
 
				     * [Getting the data](#getting-the-data)
			
 
				         * [Dataset guidelines](#dataset-guidelines)
			
 
				     * [Training process](#training-process)
			
 
				-    * [Deploying the Wide & Deep model using Triton Inference Server](#deploying-the-wide-deep-model-using-triton-inference-server)
			
 
				 - [Performance](#performance)
			
 
				     * [Benchmarking](#benchmarking)
			
 
				         * [Training performance benchmark](#training-performance-benchmark)
			
@@ -181,7 +180,7 @@ To train your model using mixed precision with Tensor Cores or using FP32, perfo
 
				 
			
 
				 ```
			
 
				 git clone https://github.com/NVIDIA/DeepLearningExamples
			
 
				-cd DeepLearningExamples/TensorFlow/Recommendation/WideDeep
			
 
				+cd DeepLearningExamples/TensorFlow/Recommendation/WideAndDeep
			
 
				 ```
			
 
				 
			
 
				 2.  Download the Outbrain dataset.
			
@@ -326,10 +325,6 @@ The training log will contain information about:
 
				 
			
 
				 Checkpoints are stored at the end of every `--save_checkpoints_steps` at the `--model_dir` location.
			
 
				 
			
 
				-### Deploying the Wide & Deep model using Triton Inference Server
			
 
				-
			
 
				-This repository does not contain code for deploying the model using Triton Inference Server. The details of such deployment together with obtained performance numbers was discussed on the [blog post](https://devblogs.nvidia.com/accelerating-wide-deep-recommender-inference-on-gpus/).
			
 
				-
			
 
				 ## Performance
			
 
				 
			
 
				 ### Benchmarking
			
--- a/TensorFlow/Recommendation/WideAndDeep/dataflow_preprocess.py
+++ b/TensorFlow/Recommendation/WideAndDeep/dataflow_preprocess.py
@@ -18,14 +18,9 @@ from __future__ import print_function
 
				 
			
 
				 import argparse
			
 
				 import datetime
			
 
				-import os
			
 
				-import random
			
 
				-import subprocess
			
 
				 import sys
			
 
				-from joblib import Parallel, delayed
			
 
				 
			
 
				 import outbrain_transform
			
 
				-import path_constants
			
 
				 
			
 
				 import tensorflow as tf
			
 
				 import glob
			
--- a/TensorFlow/Recommendation/WideAndDeep/outbrain_transform.py
+++ b/TensorFlow/Recommendation/WideAndDeep/outbrain_transform.py
@@ -22,9 +22,8 @@ from tensorflow_transform.tf_metadata import dataset_schema
 
				 from tensorflow_transform.tf_metadata import dataset_metadata
			
 
				 from tensorflow_transform.tf_metadata import metadata_io
			
 
				 import numpy as np
			
 
				-import pandas as pd
			
 
				 
			
 
				-from trainer.features import LABEL_COLUMN, DISPLAY_ID_COLUMN, AD_ID_COLUMN, IS_LEAK_COLUMN, DISPLAY_ID_AND_IS_LEAK_ENCODED_COLUMN, CATEGORICAL_COLUMNS, DOC_CATEGORICAL_MULTIVALUED_COLUMNS, BOOL_COLUMNS, INT_COLUMNS, FLOAT_COLUMNS, FLOAT_COLUMNS_LOG_BIN_TRANSFORM, FLOAT_COLUMNS_SIMPLE_BIN_TRANSFORM
			
 
				+from trainer.features import LABEL_COLUMN, DISPLAY_ID_COLUMN, IS_LEAK_COLUMN, DISPLAY_ID_AND_IS_LEAK_ENCODED_COLUMN, CATEGORICAL_COLUMNS, DOC_CATEGORICAL_MULTIVALUED_COLUMNS, BOOL_COLUMNS, INT_COLUMNS, FLOAT_COLUMNS, FLOAT_COLUMNS_LOG_BIN_TRANSFORM, FLOAT_COLUMNS_SIMPLE_BIN_TRANSFORM
			
 
				 
			
 
				 RENAME_COLUMNS = False
			
 
				 
			
@@ -95,42 +94,6 @@ def make_spec(output_dir, batch_size=None):
 
				 	
			
 
				   metadata_io.write_metadata(metadata, output_dir)
			
 
				 
			
 
				-def make_input_schema(mode=tf.contrib.learn.ModeKeys.TRAIN, batch_size=None):
			
 
				-  """Input schema definition.
			
 
				-
			
 
				-  Args:
			
 
				-    mode: tf.contrib.learn.ModeKeys specifying if the schema is being used for
			
 
				-      train/eval or prediction.
			
 
				-    batch_size: None if not explicitly batched (for FixedLenFeature size of []), 
			
 
				-      otherwise the number of elements to assume will be grouped (size of [batch_size])
			
 
				-  Returns:
			
 
				-    A `Schema` object.
			
 
				-  """
			
 
				-  fixed_shape = [batch_size] if batch_size is not None else []
			
 
				-  result = {}
			
 
				-  result[LABEL_COLUMN] = tf.FixedLenFeature(shape=fixed_shape, dtype=tf.int64)
			
 
				-  result[DISPLAY_ID_COLUMN] = tf.FixedLenFeature(shape=fixed_shape, dtype=tf.float32)
			
 
				-  #result[AD_ID_COLUMN] = tf.VarLenFeature(dtype=tf.float32)
			
 
				-  result[IS_LEAK_COLUMN] = tf.FixedLenFeature(shape=fixed_shape, dtype=tf.int64)
			
 
				-  for name in BOOL_COLUMNS:
			
 
				-    #result[name] = tf.VarLenFeature(dtype=tf.int64)
			
 
				-    result[name] = tf.FixedLenFeature(shape=fixed_shape, dtype=tf.int64, default_value=0.0)
			
 
				-  #TODO: Create dummy features that indicates whether any of the numeric features is null 
			
 
				-  #(currently default 0 value might introduce noise)
			
 
				-  for name in FLOAT_COLUMNS_LOG_BIN_TRANSFORM+FLOAT_COLUMNS_SIMPLE_BIN_TRANSFORM:
			
 
				-    result[name] = tf.FixedLenFeature(shape=fixed_shape, dtype=tf.float32, default_value=0.0)  
			
 
				-  for name in INT_COLUMNS:
			
 
				-    result[name] = tf.FixedLenFeature(shape=fixed_shape, dtype=tf.float32, default_value=0.0)
			
 
				-  for name in CATEGORICAL_COLUMNS:
			
 
				-    result[name] = tf.FixedLenFeature(shape=fixed_shape, dtype=tf.float32, default_value=0.0)
			
 
				-    #result[name] = tf.VarLenFeature(dtype=tf.float32)
			
 
				-  for multi_category in DOC_CATEGORICAL_MULTIVALUED_COLUMNS:
			
 
				-    for category in DOC_CATEGORICAL_MULTIVALUED_COLUMNS[multi_category]:
			
 
				-      result[category] = tf.FixedLenFeature(shape=fixed_shape, dtype=tf.float32, default_value=0.0)
			
 
				-      #result[category] = tf.VarLenFeature(dtype=tf.float32)
			
 
				-
			
 
				-  return dataset_schema.from_feature_spec(result)
			
 
				-
			
 
				 def tf_log2_1p(x):
			
 
				   return tf.log1p(x) / tf.log(2.0)
			
 
				 
			
@@ -163,9 +126,6 @@ def scale_to_0_1(val, minv, maxv):
 
				   return (val - minv) / (maxv - minv)
			
 
				 
			
 
				 def create_tf_example(df, min_logs, max_logs):
			
 
				-  names = CSV_ORDERED_COLUMNS
			
 
				-  #columns_dict = dict(zip(names, row))
			
 
				-  
			
 
				   result = {}
			
 
				   result[LABEL_COLUMN] = tf.train.Feature(int64_list=tf.train.Int64List(value=df[LABEL_COLUMN].to_list()))
			
 
				   result[DISPLAY_ID_COLUMN] = tf.train.Feature(int64_list=tf.train.Int64List(value=df[DISPLAY_ID_COLUMN].to_list()))
			
--- a/TensorFlow/Recommendation/WideAndDeep/path_constants.py
+++ b/TensorFlow/Recommendation/WideAndDeep/path_constants.py
@@ -1,47 +0,0 @@
 
				-# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
			
 
				-#
			
 
				-# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-# you may not use this file except in compliance with the License.
			
 
				-# You may obtain a copy of the License at
			
 
				-#
			
 
				-#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				-#
			
 
				-# Unless required by applicable law or agreed to in writing, software
			
 
				-# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-# See the License for the specific language governing permissions and
			
 
				-# limitations under the License.
			
 
				-
			
 
				-
			
 
				-# Copyright 2016 Google Inc. All Rights Reserved.
			
 
				-#
			
 
				-# Licensed under the Apache License, Version 2.0 (the "License");
			
 
				-# you may not use this file except in compliance with the License.
			
 
				-# You may obtain a copy of the License at
			
 
				-#
			
 
				-#      http://www.apache.org/licenses/LICENSE-2.0
			
 
				-#
			
 
				-# Unless required by applicable law or agreed to in writing, software
			
 
				-# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-# See the License for the specific language governing permissions and
			
 
				-# limitations under the License.
			
 
				-"""File paths for the Criteo Classification pipeline.
			
 
				-"""
			
 
				-
			
 
				-from __future__ import absolute_import
			
 
				-from __future__ import division
			
 
				-from __future__ import print_function
			
 
				-
			
 
				-
			
 
				-TEMP_DIR = 'tmp'
			
 
				-TRANSFORM_FN_DIR = 'transform_fn'
			
 
				-RAW_METADATA_DIR = 'raw_metadata'
			
 
				-TRANSFORMED_METADATA_DIR = 'transformed_metadata'
			
 
				-TRANSFORMED_TRAIN_DATA_FILE_PREFIX = 'features_train'
			
 
				-TRANSFORMED_EVAL_DATA_FILE_PREFIX = 'features_eval'
			
 
				-TRANSFORMED_PREDICT_DATA_FILE_PREFIX = 'features_predict'
			
 
				-TRAIN_RESULTS_FILE = 'train_results'
			
 
				-DEPLOY_SAVED_MODEL_DIR = 'saved_model'
			
 
				-MODEL_EVALUATIONS_FILE = 'model_evaluations'
			
 
				-BATCH_PREDICTION_RESULTS_FILE = 'batch_prediction_results'
			
--- a/TensorFlow/Recommendation/WideAndDeep/preproc/preproc1.py
+++ b/TensorFlow/Recommendation/WideAndDeep/preproc/preproc1.py
@@ -19,7 +19,7 @@ OUTPUT_BUCKET_FOLDER = "/outbrain/preprocessed/"
 
				 DATA_BUCKET_FOLDER = "/outbrain/orig/"
			
 
				 SPARK_TEMP_FOLDER = "/outbrain/spark-temp/"
			
 
				 
			
 
				-from pyspark.sql.types import *
			
 
				+from pyspark.sql.types import IntegerType, StringType, StructType, StructField
			
 
				 import pyspark.sql.functions as F
			
 
				 
			
 
				 from pyspark.context import SparkContext, SparkConf
			
--- a/TensorFlow/Recommendation/WideAndDeep/preproc/preproc2.py
+++ b/TensorFlow/Recommendation/WideAndDeep/preproc/preproc2.py
@@ -19,17 +19,10 @@ OUTPUT_BUCKET_FOLDER = "/outbrain/preprocessed/"
 
				 DATA_BUCKET_FOLDER = "/outbrain/orig/"
			
 
				 SPARK_TEMP_FOLDER = "/outbrain/spark-temp/"
			
 
				 
			
 
				-from IPython.display import display
			
 
				-
			
 
				-from pyspark.sql.types import *
			
 
				+from pyspark.sql.types import IntegerType, StringType, StructType, StructField, TimestampType, FloatType, ArrayType, MapType
			
 
				 import pyspark.sql.functions as F
			
 
				 
			
 
				-from pyspark.sql import DataFrameWriter
			
 
				-
			
 
				-import numpy as np
			
 
				-
			
 
				 import math
			
 
				-import datetime
			
 
				 import time
			
 
				 
			
 
				 import random
			
--- a/TensorFlow/Recommendation/WideAndDeep/preproc/preproc3.py
+++ b/TensorFlow/Recommendation/WideAndDeep/preproc/preproc3.py
@@ -21,13 +21,9 @@ OUTPUT_BUCKET_FOLDER = "/outbrain/preprocessed/"
 
				 DATA_BUCKET_FOLDER = "/outbrain/orig/"
			
 
				 SPARK_TEMP_FOLDER = "/outbrain/spark-temp/"
			
 
				 
			
 
				-
			
 
				-from IPython.display import display
			
 
				-
			
 
				-
			
 
				-from pyspark.sql.types import *
			
 
				+from pyspark.sql.types import IntegerType, StringType, StructType, StructField, TimestampType, FloatType, ArrayType, MapType
			
 
				 import pyspark.sql.functions as F
			
 
				-from pyspark.ml.linalg import Vectors, SparseVector, VectorUDT
			
 
				+from pyspark.ml.linalg import SparseVector, VectorUDT
			
 
				 
			
 
				 from pyspark.context import SparkContext, SparkConf
			
 
				 from pyspark.sql.session import SparkSession
			
@@ -38,17 +34,10 @@ sc = SparkContext(conf=conf)
 
				 spark = SparkSession(sc)
			
 
				 
			
 
				 import numpy as np
			
 
				-import scipy.sparse
			
 
				 
			
 
				 import math
			
 
				 import datetime
			
 
				 import time
			
 
				-import itertools
			
 
				-
			
 
				-import pickle
			
 
				-
			
 
				-import random
			
 
				-random.seed(42)
			
 
				 
			
 
				 import pandas as pd
			
 
				 
			
@@ -461,7 +450,7 @@ else:
 
				 
			
 
				 # # Training models
			
 
				 def is_null(value):
			
 
				-    return value == None or len(str(value).strip()) == 0
			
 
				+    return value is None or len(str(value).strip()) == 0
			
 
				 
			
 
				 LESS_SPECIAL_CAT_VALUE = 'less'
			
 
				 def get_category_field_values_counts(field, df, min_threshold=10):
			
@@ -490,7 +479,7 @@ len(doc_entity_id_values_counts)
 
				 
			
 
				 # ## Processing average CTR by categories
			
 
				 def get_percentiles(df, field, quantiles_levels=None, max_error_rate=0.0):
			
 
				-    if quantiles_levels == None:
			
 
				+    if quantiles_levels is None:
			
 
				         quantiles_levels = np.arange(0.0, 1.1, 0.1).tolist() 
			
 
				     quantiles = df.approxQuantile(field, quantiles_levels, max_error_rate)
			
 
				     return dict(zip(quantiles_levels, quantiles))
			
@@ -896,7 +885,7 @@ def get_days_diff(newer_timestamp, older_timestamp):
 
				     return days_diff
			
 
				 
			
 
				 def get_time_decay_factor(timestamp, timestamp_ref=None, alpha=0.001):
			
 
				-    if timestamp_ref == None:
			
 
				+    if timestamp_ref is None:
			
 
				         timestamp_ref = time.time()
			
 
				         
			
 
				     days_diff = get_days_diff(timestamp_ref, timestamp)
			
@@ -1146,7 +1135,7 @@ def cosine_similarity_dicts(dict1, dict2):
 
				     return sum_common_aspects / (dict1_norm * dict2_norm), intersections
			
 
				 
			
 
				 def cosine_similarity_user_docs_aspects(user_aspect_profile, doc_aspect_ids, doc_aspects_confidence, aspect_docs_counts):
			
 
				-    if user_aspect_profile==None or len(user_aspect_profile) == 0 or doc_aspect_ids == None or len(doc_aspect_ids) == 0:
			
 
				+    if user_aspect_profile is None or len(user_aspect_profile) == 0 or doc_aspect_ids is None or len(doc_aspect_ids) == 0:
			
 
				         return None, None
			
 
				         
			
 
				     doc_aspects = dict(zip(doc_aspect_ids, doc_aspects_confidence))
			
@@ -1170,7 +1159,6 @@ def cosine_similarity_user_docs_aspects(user_aspect_profile, doc_aspect_ids, doc
 
				         random_error = math.pow(len(doc_aspects) / float(len(aspect_docs_counts)), 
			
 
				           intersections) * math.pow(len(user_aspect_profile) / float(len(aspect_docs_counts)), 
			
 
				           intersections)
			
 
				-        confidence = 1.0 - random_error
			
 
				     else:
			
 
				         #P(A not intersect B) = 1 - P(A intersect B)
			
 
				         random_error = 1 - ((len(doc_aspects) / float(len(aspect_docs_counts))) * 
			
@@ -1183,8 +1171,8 @@ def cosine_similarity_user_docs_aspects(user_aspect_profile, doc_aspect_ids, doc
 
				 def cosine_similarity_doc_event_doc_ad_aspects(doc_event_aspect_ids, doc_event_aspects_confidence, 
			
 
				         doc_ad_aspect_ids, doc_ad_aspects_confidence, 
			
 
				         aspect_docs_counts):
			
 
				-    if doc_event_aspect_ids == None or len(doc_event_aspect_ids) == 0 \
			
 
				-            or doc_ad_aspect_ids == None or len(doc_ad_aspect_ids) == 0:
			
 
				+    if doc_event_aspect_ids is None or len(doc_event_aspect_ids) == 0 \
			
 
				+            or doc_ad_aspect_ids is None or len(doc_ad_aspect_ids) == 0:
			
 
				         return None, None
			
 
				         
			
 
				     doc_event_aspects = dict(zip(doc_event_aspect_ids, doc_event_aspects_confidence))
			
@@ -1210,7 +1198,6 @@ def cosine_similarity_doc_event_doc_ad_aspects(doc_event_aspect_ids, doc_event_a
 
				         random_error = math.pow(len(doc_event_aspect_ids) / float(len(aspect_docs_counts)), 
			
 
				             intersections) * math.pow(len(doc_ad_aspect_ids) / float(len(aspect_docs_counts)), 
			
 
				             intersections)
			
 
				-        confidence = 1.0 - random_error
			
 
				     else:
			
 
				         #P(A not intersect B) = 1 - P(A intersect B)
			
 
				         random_error = 1 - ((len(doc_event_aspect_ids) / float(len(aspect_docs_counts))) * 
			
--- a/TensorFlow/Recommendation/WideAndDeep/trainer/task.py
+++ b/TensorFlow/Recommendation/WideAndDeep/trainer/task.py
@@ -20,8 +20,6 @@ import numpy as np
 
				 import argparse
			
 
				 import json
			
 
				 import os
			
 
				-import sys
			
 
				-import pickle
			
 
				 import tensorflow as tf
			
 
				 import tensorflow_transform as tft
			
 
				 
			
--- a/TensorFlow/Recommendation/WideAndDeep/utils/hooks/training_hooks.py
+++ b/TensorFlow/Recommendation/WideAndDeep/utils/hooks/training_hooks.py
@@ -15,12 +15,6 @@
 
				 # See the License for the specific language governing permissions and
			
 
				 # limitations under the License.
			
 
				 
			
 
				-import time
			
 
				-import tensorflow as tf
			
 
				-
			
 
				-import dllogger
			
 
				-
			
 
				-
			
 
				 class MeanAccumulator:
			
 
				     def __init__(self):
			
 
				         self.sum = 0