Explorar o código

[Convnets/TF] Added barrier at the end of multiprocess run to allow step synchronization

Lukasz Pierscieniewski %!s(int64=4) %!d(string=hai) anos
pai
achega
e1af187301

+ 16 - 12
TensorFlow/Classification/ConvNets/main.py

@@ -15,20 +15,17 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from utils.cmdline_helper import parse_cmdline
+from model.resnet import model_architectures
+from runtime import Runner
+import dllogger
+from utils import hvd_wrapper as hvd
+import tensorflow as tf
 import os
 
 import warnings
 warnings.simplefilter("ignore")
 
-import tensorflow as tf
-
-from utils import hvd_wrapper as hvd
-import dllogger
-
-from runtime import Runner
-from model.resnet import model_architectures
-
-from utils.cmdline_helper import parse_cmdline
 
 if __name__ == "__main__":
 
@@ -42,7 +39,8 @@ if __name__ == "__main__":
         os.makedirs(FLAGS.results_dir, exist_ok=True)
 
         dllogger.init(backends=[
-            dllogger.JSONStreamBackend(verbosity=dllogger.Verbosity.VERBOSE, filename=log_path),
+            dllogger.JSONStreamBackend(
+                verbosity=dllogger.Verbosity.VERBOSE, filename=log_path),
             dllogger.StdOutBackend(verbosity=dllogger.Verbosity.VERBOSE)
         ])
     else:
@@ -100,7 +98,8 @@ if __name__ == "__main__":
     if FLAGS.mode in ["train_and_evaluate", 'evaluate', 'inference_benchmark']:
 
         if FLAGS.mode == 'inference_benchmark' and hvd.size() > 1:
-            raise NotImplementedError("Only single GPU inference is implemented.")
+            raise NotImplementedError(
+                "Only single GPU inference is implemented.")
 
         elif hvd.rank() == 0:
             runner.evaluate(iter_unit=FLAGS.iter_unit if FLAGS.mode != "train_and_evaluate" else "epoch",
@@ -114,6 +113,10 @@ if __name__ == "__main__":
                             symmetric=FLAGS.symmetric,
                             use_final_conv=FLAGS.use_final_conv,
                             use_qdq=FLAGS.use_qdq)
+        if hvd.size() > 1:
+            # Wait for all processes to finish
+            from mpi4py import MPI
+            MPI.COMM_WORLD.Barrier()
 
     if FLAGS.mode == 'predict':
         if FLAGS.to_predict is None:
@@ -123,7 +126,8 @@ if __name__ == "__main__":
             raise ValueError("Only prediction on single images is supported!")
 
         if hvd.size() > 1:
-            raise NotImplementedError("Only single GPU inference is implemented.")
+            raise NotImplementedError(
+                "Only single GPU inference is implemented.")
 
         else:
             runner.predict(FLAGS.to_predict,

+ 1 - 0
TensorFlow/Classification/ConvNets/requirements.txt

@@ -1 +1,2 @@
 git+https://github.com/NVIDIA/dllogger#egg=dllogger 
+mpi4py

+ 2 - 1
TensorFlow/Classification/ConvNets/resnet50v1.5/README.md

@@ -856,6 +856,7 @@ on NVIDIA T4 with (1x T4 16G) GPU.
 8. August 2020
   * Updated command line argument names
   * Added support for syntetic dataset with different image size
-
+9. January, 2022
+  * Added barrier at the end of multiprocess run
 ### Known issues
 Performance without XLA enabled is low due to BN + ReLU fusion bug.

+ 2 - 1
TensorFlow/Classification/ConvNets/resnext101-32x4d/README.md

@@ -796,6 +796,7 @@ June 2020
 August 2020
    - Updated command line argument names
    - Added support for syntetic dataset with different image size
-
+January 2022
+   - Added barrier at the end of multiprocess run
 ### Known issues
 Performance without XLA enabled is low due to BN + ReLU fusion bug.

+ 2 - 0
TensorFlow/Classification/ConvNets/se-resnext101-32x4d/README.md

@@ -789,6 +789,8 @@ April 2020
 August 2020
    - Updated command line argument names
    - Added support for syntetic dataset with different image size
+January 2022
+   - Added barrier at the end of multiprocess run
 
 ### Known issues
 Performance without XLA enabled is low due to BN + ReLU fusion bug.