utils.py 1.6 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465
  1. # Copyright (c) 2019 NVIDIA CORPORATION. All rights reserved.
  2. # Licensed under the Apache License, Version 2.0 (the "License");
  3. # you may not use this file except in compliance with the License.
  4. # You may obtain a copy of the License at
  5. #
  6. # http://www.apache.org/licenses/LICENSE-2.0
  7. #
  8. # Unless required by applicable law or agreed to in writing, software
  9. # distributed under the License is distributed on an "AS IS" BASIS,
  10. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  11. # See the License for the specific language governing permissions and
  12. # limitations under the License.
  13. import torch
  14. import torch.distributed as dist
  15. from pathlib import Path
  16. def get_rank():
  17. if not dist.is_available():
  18. return 0
  19. if not dist.is_initialized():
  20. return 0
  21. return dist.get_rank()
  22. def get_world_size():
  23. if not dist.is_available():
  24. return 1
  25. if not dist.is_initialized():
  26. return 1
  27. return dist.get_world_size()
  28. def is_main_process():
  29. return get_rank() == 0
  30. def barrier():
  31. if dist.is_available() and dist.is_initialized():
  32. dist.barrier()
  33. def format_step(step):
  34. if isinstance(step, str):
  35. return step
  36. s = ""
  37. if len(step) > 0:
  38. s += "Training Epoch: {} ".format(step[0])
  39. if len(step) > 1:
  40. s += "Training Iteration: {} ".format(step[1])
  41. if len(step) > 2:
  42. s += "Validation Iteration: {} ".format(step[2])
  43. return s
  44. def mkdir(path):
  45. Path(path).mkdir(parents=True, exist_ok=True)
  46. def mkdir_by_main_process(path):
  47. if is_main_process():
  48. mkdir(path)
  49. barrier()