affinity.py 9.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214
  1. # Copyright (c) 2022 NVIDIA Corporation. All rights reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import os
  15. import logging
  16. import paddle
  17. def _get_gpu_affinity_table():
  18. """
  19. Generate three dict objects, gpu_cpu_affinity_map, cpu_socket_gpus_list, cpu_core_groups.
  20. gpu_cpu_affinity_map (dict): Key is GPU ID and value is cpu_affinity string.
  21. cpu_socket_gpus_list (dict): Key is cpu_affinity string and value is a list
  22. collected all GPU IDs that affinity to this cpu socket.
  23. cpu_core_groups (dict): Key is cpu_affinity string and value is cpu core groups.
  24. cpu core groups contains #GPUs groups, each group have,
  25. nearly eaual amount of cpu cores.
  26. Example:
  27. $nvidis-smi topo -m
  28. GPU0 GPU1 GPU2 GPU3 CPU Affinity NUMA Affinity
  29. GPU0 X SYS SYS SYS 0-9,20-29 0
  30. GPU1 SYS X SYS SYS 0-9,20-29 0
  31. GPU2 SYS SYS X SYS 10-19,30-39 1
  32. GPU3 SYS SYS SYS X 10-19,30-39 1
  33. gpu_cpu_affinity_map =
  34. { 0: '0-9,20-29', # GPU0's cpu affninity is '0-9,20-29'
  35. 1: '0-9,20-29', # GPU1's cpu affninity is '0-9,20-29'
  36. 2: '10-19,30-39', # GPU2's cpu affninity is '10-19,30-39'
  37. 3: '10-19,30-39' } # GPU3's cpu affninity is '10-19,30-39'
  38. cpu_socket_gpus_list =
  39. { '0-9,20-29': [0, 1], # There are 2 GPUs, 0 and 1, belong to cpu affinity '0-9,20-29'.
  40. '10-19,30-39': [2, 3] # There are 2 GPUs, 2 and 3, belong to cpu affinity '10-19,30-39'.
  41. }
  42. cpu_core_groups =
  43. # There are 2 GPUs belong to cpu affinity '0-9,20-29', then
  44. # cores [0, 1, ..., 8, 9] would be split to two groups every
  45. # 2-th elements
  46. # [0, 2, 4, 6, 8] and [1, 3, 5, 7, 9]
  47. # The same for cores [20, 21, ..., 28, 29].
  48. {'0-9,20-29': [
  49. [[0, 2, 4, 6, 8], [1, 3, 5, 7, 9]],
  50. [[20, 22, 24, 26, 28], [21, 23, 25, 27, 29]]
  51. ],
  52. # The same as '0-9,20-29'
  53. '10-19,30-39': [
  54. [[10, 12, 14, 16, 18], [11, 13, 15, 17, 19]],
  55. [[30, 32, 34, 36, 38], [31, 33, 35, 37, 39]]
  56. ]}
  57. """
  58. lines = os.popen('nvidia-smi topo -m').readlines()
  59. cpu_affinity_idx = -1
  60. titles = lines[0].split('\t')
  61. for idx in range(len(titles)):
  62. if 'CPU Affinity' in titles[idx]:
  63. cpu_affinity_idx = idx
  64. assert cpu_affinity_idx > 0, \
  65. "Can not obtain correct CPU affinity column index via nvidia-smi!"
  66. gpu_cpu_affinity_map = dict()
  67. cpu_socket_gpus_list = dict()
  68. # Skip title
  69. for idx in range(1, len(lines)):
  70. line = lines[idx]
  71. items = line.split('\t')
  72. if 'GPU' in items[0]:
  73. gpu_id = int(items[0][3:])
  74. affinity = items[cpu_affinity_idx]
  75. gpu_cpu_affinity_map[gpu_id] = affinity
  76. if affinity in cpu_socket_gpus_list:
  77. cpu_socket_gpus_list[affinity].append(gpu_id)
  78. else:
  79. cpu_socket_gpus_list[affinity] = [gpu_id]
  80. cpu_core_groups = _group_cpu_cores(cpu_socket_gpus_list)
  81. return gpu_cpu_affinity_map, cpu_socket_gpus_list, cpu_core_groups
  82. def _group_cpu_cores(cpu_socket_gpus_list):
  83. """
  84. Generate a dictionary that key is cpu_affinity string and value is cpu core groups.
  85. cpu core groups contains #GPUs groups, each group have, nearly eaual amount of cpu cores.
  86. The grouping way is collect cpu cores every #GPUs-th elements, due to index of hyperthreading.
  87. For examle, 4 physical cores, 8 cores with hyperthreading. The CPU indices [0, 1, 2, 3] is
  88. physical cores, and [4, 5, 6, 7] is hyperthreading. In this case, distributing physical cores
  89. first, then hyperthreading would reach better performance.
  90. Args:
  91. cpu_socket_gpus_list (dict): a dict that map cpu_affinity_str to all GPUs that belong to it.
  92. Return:
  93. cpu_core_groups (dict): a dict that map cpu_affinity_str to cpu core groups.
  94. Example:
  95. cpu_socket_gpus_list = { '0-9,20-29': [0, 1], '10-19,30-39': [2, 3] },
  96. which means there are 2 GPUs, 0 and 1, belong to '0-9,20-29' and
  97. 2 GPUs, 2 and 3, belong to '10-19,30-39'
  98. therefore, cpu_core_groups =
  99. {'0-9,20-29': [
  100. [[0, 2, 4, 6, 8], [1, 3, 5, 7, 9]],
  101. [[20, 22, 24, 26, 28], [21, 23, 25, 27, 29]]
  102. ],
  103. '10-19,30-39': [
  104. [[10, 12, 14, 16, 18], [11, 13, 15, 17, 19]],
  105. [[30, 32, 34, 36, 38], [31, 33, 35, 37, 39]]
  106. ]}
  107. """
  108. cpu_core_groups = dict()
  109. for cpu_socket in cpu_socket_gpus_list:
  110. cpu_core_groups[cpu_socket] = list()
  111. gpu_count = len(cpu_socket_gpus_list[cpu_socket])
  112. cores = cpu_socket.split(',')
  113. for core in cores:
  114. core_indices = _get_core_indices(core)
  115. core_group = list()
  116. for i in range(gpu_count):
  117. start = i % len(core_indices)
  118. sub_core_set = core_indices[start::gpu_count]
  119. core_group.append(sub_core_set)
  120. cpu_core_groups[cpu_socket].append(core_group)
  121. return cpu_core_groups
  122. def _get_core_indices(cores_str):
  123. """
  124. Generate a dictionary of cpu core indices.
  125. Args:
  126. cores_str (str): a string with format "start_idx-end_idx".
  127. Return:
  128. cpu_core_indices (list): a list collected all indices in [start_idx, end_idx].
  129. Example:
  130. cores_str = '0-20'
  131. cpu_core_indices = [0, 1, 2, ..., 18, 19, 20]
  132. """
  133. start, end = cores_str.split('-')
  134. return [*range(int(start), int(end) + 1)]
  135. def set_cpu_affinity():
  136. """
  137. Setup CPU affinity.
  138. Each GPU would be bound to a specific set of CPU cores for optimal and stable performance.
  139. This function would obtain GPU-CPU affinity via "nvidia-smi topo -m", then equally distribute
  140. CPU cores to each GPU.
  141. """
  142. gpu_cpu_affinity_map, cpu_socket_gpus_list, cpu_core_groups = \
  143. _get_gpu_affinity_table()
  144. node_num = paddle.distributed.fleet.node_num()
  145. gpu_per_node = paddle.distributed.get_world_size() // node_num
  146. local_rank = paddle.distributed.get_rank() % gpu_per_node
  147. # gpu_cpu_affinity_map (dict): Key is GPU ID and value is cpu_affinity string.
  148. # cpu_socket_gpus_list (dict): Key is cpu_affinity string and value is a list
  149. # collected all GPU IDs that affinity to this cpu socket.
  150. # cpu_core_groups (dict): Key is cpu_affinity string and value is cpu core groups.
  151. # cpu core groups contains #GPUs groups, each group have,
  152. # nearly eaual amount of cpu cores.
  153. # Example:
  154. # $nvidis-smi topo -m
  155. # GPU0 GPU1 GPU2 GPU3 CPU Affinity NUMA Affinity
  156. # GPU0 X SYS SYS SYS 0-9,20-29 0
  157. # GPU1 SYS X SYS SYS 0-9,20-29 0
  158. # GPU2 SYS SYS X SYS 10-19,30-39 1
  159. # GPU3 SYS SYS SYS X 10-19,30-39 1
  160. #
  161. # gpu_cpu_affinity_map =
  162. # { 0: '0-9,20-29',
  163. # 1: '0-9,20-29',
  164. # 2: '10-19,30-39',
  165. # 3: '10-19,30-39' }
  166. # cpu_socket_gpus_list =
  167. # { '0-9,20-29': [0, 1],
  168. # '10-19,30-39': [2, 3] }
  169. # cpu_core_groups =
  170. # {'0-9,20-29': [
  171. # [[0, 2, 4, 6, 8], [1, 3, 5, 7, 9]],
  172. # [[20, 22, 24, 26, 28], [21, 23, 25, 27, 29]]
  173. # ],
  174. # '10-19,30-39': [
  175. # [[10, 12, 14, 16, 18], [11, 13, 15, 17, 19]],
  176. # [[30, 32, 34, 36, 38], [31, 33, 35, 37, 39]]
  177. # ]}
  178. #
  179. # for rank-0, it belong to '0-9,20-29' cpu_affinity_key,
  180. # and it locate in index-0 of cpu_socket_gpus_list['0-9,20-29'],
  181. # therefore, affinity_mask would be a collection of all cpu cores
  182. # in index-0 of cpu_core_groups['0-9,20-29'], that is [0, 2, 4, 6, 8]
  183. # and [20, 22, 24, 26, 28].
  184. # affinity_mask = [0, 2, 4, 6, 8, 20, 22, 24, 26, 28]
  185. affinity_mask = list()
  186. cpu_affinity_key = gpu_cpu_affinity_map[local_rank]
  187. cpu_core_idx = cpu_socket_gpus_list[cpu_affinity_key].index(local_rank)
  188. for cpu_core_group in cpu_core_groups[cpu_affinity_key]:
  189. affinity_mask.extend(cpu_core_group[cpu_core_idx])
  190. pid = os.getpid()
  191. os.sched_setaffinity(pid, affinity_mask)
  192. logging.info("Set CPU affinity of rank-%d (Process %d) "
  193. "to %s.", local_rank, pid, str(os.sched_getaffinity(pid)))