SunnyMirror
/
DeepLearningExamples
mirror of https://github.com/NVIDIA/DeepLearningExamples.git


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214
							# Copyright (c) 2022 NVIDIA Corporation.  All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os
import logging
import paddle


def _get_gpu_affinity_table():
    """
    Generate three dict objects, gpu_cpu_affinity_map, cpu_socket_gpus_list, cpu_core_groups.
    gpu_cpu_affinity_map (dict): Key is GPU ID and value is cpu_affinity string.
    cpu_socket_gpus_list (dict): Key is cpu_affinity string and value is  a list
                                 collected all GPU IDs that affinity to this cpu socket.
    cpu_core_groups (dict):      Key is cpu_affinity string and value is cpu core groups.
                                 cpu core groups contains #GPUs groups, each group have,
                                 nearly eaual amount of cpu cores.

    Example:
        $nvidis-smi topo -m
            GPU0    GPU1    GPU2    GPU3    CPU Affinity    NUMA Affinity
        GPU0     X     SYS     SYS     SYS      0-9,20-29           0
        GPU1   SYS       X     SYS     SYS      0-9,20-29           0
        GPU2   SYS      SYS      X     SYS      10-19,30-39         1
        GPU3   SYS      SYS    SYS       X      10-19,30-39         1

        gpu_cpu_affinity_map =
            { 0: '0-9,20-29', # GPU0's cpu affninity is '0-9,20-29'
              1: '0-9,20-29', # GPU1's cpu affninity is '0-9,20-29'
              2: '10-19,30-39', # GPU2's cpu affninity is '10-19,30-39'
              3: '10-19,30-39' } # GPU3's cpu affninity is '10-19,30-39'
        cpu_socket_gpus_list =
            { '0-9,20-29': [0, 1], # There are 2 GPUs, 0 and 1, belong to cpu affinity '0-9,20-29'.
              '10-19,30-39': [2, 3] # There are 2 GPUs, 2 and 3, belong to cpu affinity '10-19,30-39'.
            }
        cpu_core_groups =
            # There are 2 GPUs belong to cpu affinity '0-9,20-29', then
            # cores [0, 1, ..., 8, 9] would be split to two groups every
            # 2-th elements
            # [0, 2, 4, 6, 8] and [1, 3, 5, 7, 9]
            # The same for cores [20, 21, ..., 28, 29].
            {'0-9,20-29': [
                               [[0, 2, 4, 6, 8], [1, 3, 5, 7, 9]],
                               [[20, 22, 24, 26, 28], [21, 23, 25, 27, 29]]
                              ],
            # The same as '0-9,20-29'
            '10-19,30-39': [
                            [[10, 12, 14, 16, 18], [11, 13, 15, 17, 19]],
                            [[30, 32, 34, 36, 38], [31, 33, 35, 37, 39]]
                           ]}

    """
    lines = os.popen('nvidia-smi topo -m').readlines()

    cpu_affinity_idx = -1
    titles = lines[0].split('\t')
    for idx in range(len(titles)):
        if 'CPU Affinity' in titles[idx]:
            cpu_affinity_idx = idx
    assert cpu_affinity_idx > 0, \
        "Can not obtain correct CPU affinity column index via nvidia-smi!"

    gpu_cpu_affinity_map = dict()
    cpu_socket_gpus_list = dict()
    # Skip title
    for idx in range(1, len(lines)):
        line = lines[idx]
        items = line.split('\t')

        if 'GPU' in items[0]:
            gpu_id = int(items[0][3:])
            affinity = items[cpu_affinity_idx]
            gpu_cpu_affinity_map[gpu_id] = affinity
            if affinity in cpu_socket_gpus_list:
                cpu_socket_gpus_list[affinity].append(gpu_id)
            else:
                cpu_socket_gpus_list[affinity] = [gpu_id]

    cpu_core_groups = _group_cpu_cores(cpu_socket_gpus_list)
    return gpu_cpu_affinity_map, cpu_socket_gpus_list, cpu_core_groups


def _group_cpu_cores(cpu_socket_gpus_list):
    """
    Generate a dictionary that key is cpu_affinity string and value is cpu core groups.
    cpu core groups contains #GPUs groups, each group have, nearly eaual amount of cpu cores.
    The grouping way is collect cpu cores every #GPUs-th elements, due to index of hyperthreading.
    For examle, 4 physical cores, 8 cores with hyperthreading. The CPU indices [0, 1, 2, 3] is
    physical cores, and [4, 5, 6, 7] is hyperthreading. In this case, distributing physical cores
    first, then hyperthreading would reach better performance.
    Args:
        cpu_socket_gpus_list (dict): a dict that map cpu_affinity_str to all GPUs that belong to it.
    Return:
        cpu_core_groups (dict): a dict that map cpu_affinity_str to cpu core groups.
    Example:
        cpu_socket_gpus_list = { '0-9,20-29': [0, 1], '10-19,30-39': [2, 3] },
        which means there are 2 GPUs, 0 and 1, belong to '0-9,20-29' and
        2 GPUs, 2 and 3, belong to '10-19,30-39'
        therefore, cpu_core_groups =
                {'0-9,20-29': [
                               [[0, 2, 4, 6, 8], [1, 3, 5, 7, 9]],
                               [[20, 22, 24, 26, 28], [21, 23, 25, 27, 29]]
                              ],
                 '10-19,30-39': [
                                 [[10, 12, 14, 16, 18], [11, 13, 15, 17, 19]],
                                 [[30, 32, 34, 36, 38], [31, 33, 35, 37, 39]]
                                ]}

    """
    cpu_core_groups = dict()
    for cpu_socket in cpu_socket_gpus_list:
        cpu_core_groups[cpu_socket] = list()
        gpu_count = len(cpu_socket_gpus_list[cpu_socket])
        cores = cpu_socket.split(',')
        for core in cores:
            core_indices = _get_core_indices(core)
            core_group = list()
            for i in range(gpu_count):
                start = i % len(core_indices)
                sub_core_set = core_indices[start::gpu_count]
                core_group.append(sub_core_set)
            cpu_core_groups[cpu_socket].append(core_group)
    return cpu_core_groups


def _get_core_indices(cores_str):
    """
    Generate a dictionary of cpu core indices.
    Args:
        cores_str (str): a string with format "start_idx-end_idx".
    Return:
        cpu_core_indices (list): a list collected all indices in [start_idx, end_idx].
    Example:
        cores_str = '0-20'
        cpu_core_indices = [0, 1, 2, ..., 18, 19, 20]
    """
    start, end = cores_str.split('-')
    return [*range(int(start), int(end) + 1)]


def set_cpu_affinity():
    """
    Setup CPU affinity.
    Each GPU would be bound to a specific set of CPU cores for optimal and stable performance.
    This function would obtain GPU-CPU affinity via "nvidia-smi topo -m", then equally distribute
    CPU cores to each GPU.
    """

    gpu_cpu_affinity_map, cpu_socket_gpus_list, cpu_core_groups = \
        _get_gpu_affinity_table()

    node_num = paddle.distributed.fleet.node_num()
    gpu_per_node = paddle.distributed.get_world_size() // node_num
    local_rank = paddle.distributed.get_rank() % gpu_per_node

    # gpu_cpu_affinity_map (dict): Key is GPU ID and value is cpu_affinity string.
    # cpu_socket_gpus_list (dict): Key is cpu_affinity string and value is  a list
    #                              collected all GPU IDs that affinity to this cpu socket.
    # cpu_core_groups (dict):      Key is cpu_affinity string and value is cpu core groups.
    #                              cpu core groups contains #GPUs groups, each group have,
    #                              nearly eaual amount of cpu cores.
    # Example:
    # $nvidis-smi topo -m
    #        GPU0    GPU1    GPU2    GPU3    CPU Affinity    NUMA Affinity
    # GPU0     X     SYS     SYS     SYS      0-9,20-29           0
    # GPU1   SYS       X     SYS     SYS      0-9,20-29           0
    # GPU2   SYS      SYS      X     SYS      10-19,30-39         1
    # GPU3   SYS      SYS    SYS       X      10-19,30-39         1
    #
    # gpu_cpu_affinity_map =
    #     { 0: '0-9,20-29',
    #       1: '0-9,20-29',
    #       2: '10-19,30-39',
    #       3: '10-19,30-39' }
    # cpu_socket_gpus_list =
    #     { '0-9,20-29': [0, 1],
    #       '10-19,30-39': [2, 3] }
    # cpu_core_groups =
    #     {'0-9,20-29': [
    #                     [[0, 2, 4, 6, 8], [1, 3, 5, 7, 9]],
    #                     [[20, 22, 24, 26, 28], [21, 23, 25, 27, 29]]
    #                    ],
    #       '10-19,30-39': [
    #                        [[10, 12, 14, 16, 18], [11, 13, 15, 17, 19]],
    #                        [[30, 32, 34, 36, 38], [31, 33, 35, 37, 39]]
    #                       ]}
    #
    # for rank-0, it belong to '0-9,20-29' cpu_affinity_key,
    # and it locate in index-0 of cpu_socket_gpus_list['0-9,20-29'],
    # therefore, affinity_mask would be a collection of all cpu cores
    # in index-0 of cpu_core_groups['0-9,20-29'], that is [0, 2, 4, 6, 8]
    # and [20, 22, 24, 26, 28].
    # affinity_mask = [0, 2, 4, 6, 8, 20, 22, 24, 26, 28]
    affinity_mask = list()
    cpu_affinity_key = gpu_cpu_affinity_map[local_rank]
    cpu_core_idx = cpu_socket_gpus_list[cpu_affinity_key].index(local_rank)
    for cpu_core_group in cpu_core_groups[cpu_affinity_key]:
        affinity_mask.extend(cpu_core_group[cpu_core_idx])

    pid = os.getpid()
    os.sched_setaffinity(pid, affinity_mask)
    logging.info("Set CPU affinity of rank-%d (Process %d) "
                 "to %s.", local_rank, pid, str(os.sched_getaffinity(pid)))