gpu_affinity.py 1.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263
  1. # Copyright (c) 2020 NVIDIA CORPORATION. All rights reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import math
  15. import os
  16. import pynvml
  17. pynvml.nvmlInit()
  18. def systemGetDriverVersion():
  19. return pynvml.nvmlSystemGetDriverVersion()
  20. def deviceGetCount():
  21. return pynvml.nvmlDeviceGetCount()
  22. class device:
  23. # assume nvml returns list of 64 bit ints
  24. _nvml_affinity_elements = math.ceil(os.cpu_count() / 64)
  25. def __init__(self, device_idx):
  26. super().__init__()
  27. self.handle = pynvml.nvmlDeviceGetHandleByIndex(device_idx)
  28. def getName(self):
  29. return pynvml.nvmlDeviceGetName(self.handle)
  30. def getCpuAffinity(self):
  31. affinity_string = ''
  32. for j in pynvml.nvmlDeviceGetCpuAffinity(
  33. self.handle, device._nvml_affinity_elements
  34. ):
  35. # assume nvml returns list of 64 bit ints
  36. affinity_string = '{:064b}'.format(j) + affinity_string
  37. affinity_list = [int(x) for x in affinity_string]
  38. affinity_list.reverse() # so core 0 is in 0th element of list
  39. return [i for i, e in enumerate(affinity_list) if e != 0]
  40. def set_affinity(gpu_id=None):
  41. if gpu_id is None:
  42. gpu_id = int(os.getenv('LOCAL_RANK', 0))
  43. dev = device(gpu_id)
  44. os.sched_setaffinity(0, dev.getCpuAffinity())
  45. # list of ints representing the logical cores this process is now affinitied with
  46. return os.sched_getaffinity(0)