IMB
IMB(Intel MPI Benchmarks)用于评估HPC集群在不同消息粒度下节点间点对点、全局通信的效率。
一、作业提交参数说明
用户可通过公共模板提交IMB作业,与IMB相关的作业参数如下:
参数 | 描述 |
order | 运行测试的命令 |
二、IMB作业运行参考
1.slurm文件
#!/bin/bash
#SBATCH --job-name=mpi-benchmark
#SBATCH --partition=dell_intel
#SBATCH --output=%j.out
#SBATCH --error=%j.err
#SBATCH -N 2
#SBATCH --ntasks-per-node=20
ulimit -s unlimited
ulimit -l unlimited
source /opt/ohpc/pub/apps/intel/setvars.sh
module load intel/mpi-2021.1.1
module load intel/mpi-benchmark/2021.3
export I_MPI_OFI_PROVIDER=Verbs
export FI_VERBS_IFACE=team1.282
mpirun -genv I_MPI_FABRICS ofi -genv I_MPI_DEBUG 5 -np 40 -ppn 20 -host c1,c2 IMB-MPI1 pingpong > resoult.txt #-genv I_MPI_DEBUG 打印mpi debug信息
2.运行结果
# List of Benchmarks to run:
# PingPong
#---------------------------------------------------
# Benchmarking PingPong
# #processes = 2
# ( 38 additional processes waiting in MPI_Barrier)
#---------------------------------------------------
#bytes #repetitions t[usec] Mbytes/sec
0 1000 1.07 0.00
1 1000 1.08 0.92
2 1000 1.08 1.86
4 1000 1.08 3.70
8 1000 1.08 7.40
16 1000 1.08 14.81
32 1000 1.09 29.36
64 1000 1.13 56.86
128 1000 1.17 109.49
256 1000 1.60 160.24
512 1000 1.73 296.62
1024 1000 1.97 518.91
2048 1000 2.47 828.47
4096 1000 3.56 1149.07
8192 1000 5.23 1566.88
16384 1000 8.51 1924.82
32768 1000 11.50 2849.71
65536 640 17.36 3774.78
131072 320 29.06 4510.81
262144 160 48.87 5363.88
524288 80 89.53 5856.23
1048576 40 170.34 6155.92
2097152 20 332.13 6314.19
4194304 10 655.83 6395.43
# All processes entering MPI_Finalize
#t[usec]:MPI时延结果
#Mbytes/sec:MPI带宽测试结果
3.input文件
#job_name=lmp
#run_time=24:00:00
work_dir=/home/liupeng/MPI-Benchmark
partition=dell_intel
node_num=2
task_per_node=10
order="-genv I_MPI_DEBUG 5 IMB-MPI1 pingpong"
4.执行脚本
#!/bin/sh
set -x
source /home/wushiming/mpi-benchmark/input
time=`date +%m%d_%H%M%S`
if [ "x$job_name" == "x" ];then
sbatch_job_name="YHPC_$time "
else
sbatch_job_name=$job_name
fi
if [ "x$partition" == "x" ];then
sbatch_partition=""
else
sbatch_partition=$partition
fi
if [ "x$work_dir" == "x" ];then
mkdir -p ~/yhpc/YHPC_$time
sbatch_work_dir=~/yhpc/YHPC_$time
else
sbatch_work_dir=$work_dir
fi
if [ "x$run_time" == "x" ];then
sbatch_run_time=03:00:00
else
sbatch_run_time=$run_time
fi
if [ "x$order" == "x" ];then
echo "The order cannot be empty."
exit 1
else
necessary_order=$order
fi
sbatch_node_num=$node_num
sbatch_task_per_node=$task_per_node
sbatch_err_log=$sbatch_work_dir/%j.err
sbatch_out_log=$sbatch_work_dir/%j.out
#slurm文件
cat > $sbatch_work_dir/mpi_benchmark.slurm <<EOF
#!/bin/bash
#SBATCH --chdir=$sbatch_work_dir
#SBATCH --ntasks-per-node=$sbatch_task_per_node
#SBATCH --job-name $sbatch_job_name
#SBATCH --nodes=$sbatch_node_num
#SBATCH --mail-type=ALL
#SBATCH --partition $sbatch_partition
#SBATCH -e $sbatch_err_log
#SBATCH -o $sbatch_out_log
ulimit -s unlimited
ulimit -l unlimited
# 导入运行环境
module use /opt/ohpc/pub/modulefiles/
source /opt/ohpc/pub/apps/intel/setvars.sh
module load intel/mpi-2021.1.1
module load intel/mpi-benchmark/2021.3
export I_MPI_OFI_PROVIDER=Verbs
export FI_VERBS_IFACE=team1.282
echo -e "The start time is: `date +"%Y-%m-%d %H:%M:%S"`"
echo -e "My job ID is: SLURM_JOB_ID"
echo -e "The total cores is: SLURM_NPROCS"
echo -e "The SLURM_JOB_ID Job info:"
scontrol show job SLURM_JOB_ID
#执行命令
cd $sbatch_work_dir
mpirun -genv I_MPI_FABRICS ofi -genv I_MPI_DEBUG 5 $necessary_order > resoult.txt
echo -e "The end time is: `date +"%Y-%m-%d %H:%M:%S"` \n"
EOF
sed -i 's/SLURM*/\$SLURM/g' $sbatch_work_dir/mpi_benchmark.slurm
/usr/bin/sbatch $sbatch_work_dir/mpi_benchmark.slurm
# --mail-type=ALL \
# $sbatch_work_dir \
# $sbatch_task_per_node \
# $sbatch_job_name \
# $sbatch_node_num \
# $sbatch_partition \
# $work_dir/job.sh