setup.py 3.8 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980
  1. # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import os
  15. import numpy as np
  16. import tensorflow as tf
  17. import horovod.tensorflow as hvd
  18. def set_flags(params):
  19. # os.environ['CUDA_CACHE_DISABLE'] = '1'
  20. os.environ['HOROVOD_GPU_ALLREDUCE'] = 'NCCL'
  21. os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
  22. # os.environ['TF_GPU_THREAD_MODE'] = 'gpu_private'
  23. # os.environ['TF_USE_CUDNN_BATCHNORM_SPATIAL_PERSISTENT'] = '0'
  24. os.environ['TF_ADJUST_HUE_FUSED'] = '1'
  25. os.environ['TF_ADJUST_SATURATION_FUSED'] = '1'
  26. os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1'
  27. # os.environ['TF_SYNC_ON_FINISH'] = '0'
  28. os.environ['TF_AUTOTUNE_THRESHOLD'] = '2'
  29. os.environ['HOROVOD_CACHE_CAPACITY'] = "0"
  30. os.environ['HOROVOD_CYCLE_TIME'] = "1.0"
  31. if params.intraop_threads:
  32. os.environ['TF_NUM_INTRAOP_THREADS'] = str(params.intraop_threads)
  33. if params.interop_threads:
  34. os.environ['TF_NUM_INTEROP_THREADS'] = str(params.interop_threads)
  35. if params.use_xla:
  36. # it turns out tf_xla_enable_lazy_compilation is used before running main.py, so setting this flag
  37. # in the current function would have no effect. Thus, this flag is already set in Dockerfile. The
  38. # remaining XLA flags are set here.
  39. TF_XLA_FLAGS = os.environ['TF_XLA_FLAGS'] # contains tf_xla_enable_lazy_compilation
  40. # we set tf_xla_async_io_level=0 for 2 reasons: 1) It turns out that XLA doesn't like
  41. # hvd.allreduce ops used in the custom train_step. Because of this issue, training never started.
  42. # 2) XLA doesn't like the tf.cond used in conditional mixing (model module).
  43. # remove async flag since it's obsolete
  44. #os.environ['TF_XLA_FLAGS'] = TF_XLA_FLAGS + " --tf_xla_auto_jit=1 --tf_xla_async_io_level=0"
  45. os.environ['TF_XLA_FLAGS'] = TF_XLA_FLAGS + " --tf_xla_auto_jit=1"
  46. os.environ['TF_EXTRA_PTXAS_OPTIONS'] = "-sw200428197=true"
  47. tf.keras.backend.clear_session()
  48. tf.config.optimizer.set_jit(True)
  49. gpus = tf.config.experimental.list_physical_devices('GPU')
  50. tf.config.experimental.set_visible_devices(gpus, 'GPU')
  51. if params.memory_limit:
  52. for gpu in gpus:
  53. tf.config.experimental.set_virtual_device_configuration(gpu, [
  54. tf.config.experimental.VirtualDeviceConfiguration(memory_limit=params.memory_limit)])
  55. else:
  56. for gpu in gpus:
  57. tf.config.experimental.set_memory_growth(gpu, True)
  58. assert tf.config.experimental.get_memory_growth(gpu)
  59. if gpus:
  60. tf.config.experimental.set_visible_devices(gpus[hvd.local_rank()], 'GPU')
  61. np.random.seed(params.seed)
  62. tf.random.set_seed(params.seed)
  63. if params.use_amp:
  64. # Model.compile will automatically wrap an optimizer with a tf.keras.mixed_precision.LossScaleOptimizer
  65. # if you use the 'mixed_float16' policy. If you use a custom training loop instead of calling Model.compile,
  66. # you should explicitly use a tf.keras.mixed_precision.LossScaleOptimizer to avoid numeric underflow with float16.
  67. policy = tf.keras.mixed_precision.experimental.Policy('mixed_float16', loss_scale='dynamic')
  68. tf.keras.mixed_precision.experimental.set_policy(policy)
  69. else:
  70. os.environ['TF_ENABLE_AUTO_MIXED_PRECISION'] = '0'