Намного заметок про работу с InfiniBand
Список устройств IB на хосте
# ibv_devices
device node GUID
------ ----------------
mlx5_0 b88303ffff8ec424
Подробная информация о устройствах
# ibv_devinfo
hca_id: mlx5_0
transport: InfiniBand (0)
fw_ver: 16.33.1048
node_guid: b883:03ff:ff8e:c424
sys_image_guid: b883:03ff:ff8e:c424
vendor_id: 0x02c9
vendor_part_id: 4119
hw_ver: 0x0
board_id: HPE0000000008
phys_port_cnt: 1
Device ports:
port: 1
state: PORT_ACTIVE (4)
max_mtu: 4096 (5)
active_mtu: 4096 (5)
sm_lid: 1
port_lid: 4
port_lmc: 0x00
link_layer: InfiniBand
Адрес IB
# ibaddr
GID fe80::b883:3ff:ff8e:c424 LID start 0x4 end 0x4
Статус IB
# ibstatus
Infiniband device 'mlx5_0' port 1 status:
default gid: fe80:0000:0000:0000:b883:03ff:ff8e:c424
base lid: 0x4
sm lid: 0x1
state: 4: ACTIVE
phys state: 5: LinkUp
rate: 100 Gb/sec (4X EDR)
link_layer: InfiniBand
Скорость (в файле)
# cat /sys/class/infiniband/mlx5_0/ports/1/rate
100 Gb/sec (4X EDR)
И вообще в директории /sys/class/infiniband/mlx5_0/ports/1/ много инфы по карте.
Список доступных хостов по IB
# ibhosts
Ca : 0xb88303ffff8ec464 ports 1 "mix9 HCA-1"
Ca : 0x043f72030010f298 ports 1 "Mellanox Technologies Aggregation Node"
Ca : 0xb88303ffff8e649c ports 1 "mix18 HCA-1"
Ca : 0xb88303ffff8ec430 ports 1 "mix2 HCA-1"
Ca : 0xb88303ffff8e74bc ports 1 "mix1 HCA-1"
Ca : 0xb88303ffff8ec488 ports 1 "mix22 HCA-1"
Ca : 0xb88303ffff8ec424 ports 1 "mix5 HCA-1"
Ca : 0xb88303ffff8ec450 ports 1 "mix13 HCA-1"
Ca : 0xb88303ffff8ec444 ports 1 "mix17 HCA-1"
Ca : 0xb88303ffff8ec478 ports 1 "mix14 HCA-1"
Ca : 0xb88303ffff8ec458 ports 1 "mix21 HCA-1"
Ca : 0xb88303ffff8ec41c ports 1 "mix7 HCA-1"
Ca : 0xb88303ffff8ec43c ports 1 "mix10 HCA-1"
Ca : 0xb88303ffff8ea4f0 ports 1 "manager1 HCA-1"
Ca : 0xb88303ffff8ec46c ports 1 "mix15 HCA-1"
Ca : 0xb88303ffff8ec42c ports 1 "mix8 HCA-1"
Ca : 0xb88303ffff8ec44c ports 1 "mix23 HCA-1"
Ca : 0xb88303ffff8ec474 ports 1 "mix20 HCA-1"
Ca : 0xb88303ffff8ec420 ports 1 "mix4 HCA-1"
Ca : 0xb88303ffff8ec448 ports 1 "mix12 HCA-1"
Ca : 0xb88303ffff8ec468 ports 1 "mix11 HCA-1"
Ca : 0xb88303ffff8e74c8 ports 1 "MT4119 ConnectX5 Mellanox Technologies"
Ca : 0xb88303ffff8ec440 ports 1 "mix19 HCA-1"
Ca : 0xb88303ffff8ec438 ports 1 "mix24 HCA-1"
Ca : 0xb88303ffff8e6488 ports 1 "mix16 HCA-1"
Ca : 0xb88303ffff8e74c0 ports 1 "mix6 HCA-1"
Подробная сетевая информация
# iblinkinfo
CA: mix22 HCA-1:
0xb88303ffff8ec488 20 1[ ] ==( 4X 25.78125 Gbps Active/ LinkUp)==> 21 31[ ] "SwitchIB Mellanox Technologies" ( )
CA: Mellanox Technologies Aggregation Node:
0x043f72030010f298 26 1[ ] ==( 4X 25.78125 Gbps Active/ LinkUp)==> 21 37[ ] "SwitchIB Mellanox Technologies" ( )
CA: mix18 HCA-1:
0xb88303ffff8e649c 22 1[ ] ==( 4X 25.78125 Gbps Active/ LinkUp)==> 21 34[ ] "SwitchIB Mellanox Technologies" ( )
CA: mix2 HCA-1:
0xb88303ffff8ec430 6 1[ ] ==( 4X 25.78125 Gbps Active/ LinkUp)==> 21 33[ ] "SwitchIB Mellanox Technologies" ( )
CA: mix1 HCA-1:
0xb88303ffff8e74bc 23 1[ ] ==( 4X 25.78125 Gbps Active/ LinkUp)==> 21 32[ ] "SwitchIB Mellanox Technologies" ( )
CA: mix9 HCA-1:
0xb88303ffff8ec464 14 1[ ] ==( 4X 25.78125 Gbps Active/ LinkUp)==> 21 30[ ] "SwitchIB Mellanox Technologies" ( )
CA: mix21 HCA-1:
0xb88303ffff8ec458 13 1[ ] ==( 4X 25.78125 Gbps Active/ LinkUp)==> 21 26[ ] "SwitchIB Mellanox Technologies" ( )
CA: mix17 HCA-1:
0xb88303ffff8ec444 10 1[ ] ==( 4X 25.78125 Gbps Active/ LinkUp)==> 21 28[ ] "SwitchIB Mellanox Technologies" ( )
CA: mix13 HCA-1:
0xb88303ffff8ec450 12 1[ ] ==( 4X 25.78125 Gbps Active/ LinkUp)==> 21 27[ ] "SwitchIB Mellanox Technologies" ( )
CA: mix14 HCA-1:
0xb88303ffff8ec478 18 1[ ] ==( 4X 25.78125 Gbps Active/ LinkUp)==> 21 25[ ] "SwitchIB Mellanox Technologies" ( )
CA: mix6 HCA-1:
0xb88303ffff8e74c0 24 1[ ] ==( 4X 25.78125 Gbps Active/ LinkUp)==> 21 24[ ] "SwitchIB Mellanox Technologies" ( )
CA: mix10 HCA-1:
0xb88303ffff8ec43c 8 1[ ] ==( 4X 25.78125 Gbps Active/ LinkUp)==> 21 23[ ] "SwitchIB Mellanox Technologies" ( )
CA: manager1 HCA-1:
0xb88303ffff8ea4f0 1 1[ ] ==( 4X 25.78125 Gbps Active/ LinkUp)==> 21 19[ ] "SwitchIB Mellanox Technologies" ( )
CA: mix15 HCA-1:
0xb88303ffff8ec46c 16 1[ ] ==( 4X 25.78125 Gbps Active/ LinkUp)==> 21 14[ ] "SwitchIB Mellanox Technologies" ( )
CA: mix7 HCA-1:
0xb88303ffff8ec41c 2 1[ ] ==( 4X 25.78125 Gbps Active/ LinkUp)==> 21 11[ ] "SwitchIB Mellanox Technologies" ( )
CA: mix8 HCA-1:
0xb88303ffff8ec42c 5 1[ ] ==( 4X 25.78125 Gbps Active/ LinkUp)==> 21 13[ ] "SwitchIB Mellanox Technologies" ( )
CA: mix23 HCA-1:
0xb88303ffff8ec44c 27 1[ ] ==( 4X 25.78125 Gbps Active/ LinkUp)==> 21 12[ ] "SwitchIB Mellanox Technologies" ( )
CA: mix20 HCA-1:
0xb88303ffff8ec474 17 1[ ] ==( 4X 25.78125 Gbps Active/ LinkUp)==> 21 10[ ] "SwitchIB Mellanox Technologies" ( )
CA: mix4 HCA-1:
0xb88303ffff8ec420 3 1[ ] ==( 4X 25.78125 Gbps Active/ LinkUp)==> 21 9[ ] "SwitchIB Mellanox Technologies" ( )
CA: mix12 HCA-1:
0xb88303ffff8ec448 11 1[ ] ==( 4X 25.78125 Gbps Active/ LinkUp)==> 21 8[ ] "SwitchIB Mellanox Technologies" ( )
CA: mix3 HCA-1:
0xb88303ffff8e74c8 25 1[ ] ==( 4X 25.78125 Gbps Active/ LinkUp)==> 21 7[ ] "SwitchIB Mellanox Technologies" ( )
CA: mix11 HCA-1:
0xb88303ffff8ec468 15 1[ ] ==( 4X 25.78125 Gbps Active/ LinkUp)==> 21 5[ ] "SwitchIB Mellanox Technologies" ( )
CA: mix19 HCA-1:
0xb88303ffff8ec440 9 1[ ] ==( 4X 25.78125 Gbps Active/ LinkUp)==> 21 6[ ] "SwitchIB Mellanox Technologies" ( )
CA: mix24 HCA-1:
0xb88303ffff8ec438 7 1[ ] ==( 4X 25.78125 Gbps Active/ LinkUp)==> 21 4[ ] "SwitchIB Mellanox Technologies" ( )
CA: mix16 HCA-1:
0xb88303ffff8e6488 19 1[ ] ==( 4X 25.78125 Gbps Active/ LinkUp)==> 21 3[ ] "SwitchIB Mellanox Technologies" ( )
Switch: 0x043f72030010f290 SwitchIB Mellanox Technologies:
21 1[ ] ==( Down/ Polling)==> [ ] "" ( )
21 2[ ] ==( Down/ Polling)==> [ ] "" ( )
21 3[ ] ==( 4X 25.78125 Gbps Active/ LinkUp)==> 19 1[ ] "mix16 HCA-1" ( )
21 4[ ] ==( 4X 25.78125 Gbps Active/ LinkUp)==> 7 1[ ] "mix24 HCA-1" ( )
21 5[ ] ==( 4X 25.78125 Gbps Active/ LinkUp)==> 15 1[ ] "mix11 HCA-1" ( )
21 6[ ] ==( 4X 25.78125 Gbps Active/ LinkUp)==> 9 1[ ] "mix19 HCA-1" ( )
21 7[ ] ==( 4X 25.78125 Gbps Active/ LinkUp)==> 25 1[ ] "mix3 HCA-1" ( )
21 8[ ] ==( 4X 25.78125 Gbps Active/ LinkUp)==> 11 1[ ] "mix12 HCA-1" ( )
21 9[ ] ==( 4X 25.78125 Gbps Active/ LinkUp)==> 3 1[ ] "mix4 HCA-1" ( )
21 10[ ] ==( 4X 25.78125 Gbps Active/ LinkUp)==> 17 1[ ] "mix20 HCA-1" ( )
21 11[ ] ==( 4X 25.78125 Gbps Active/ LinkUp)==> 2 1[ ] "mix7 HCA-1" ( )
21 12[ ] ==( 4X 25.78125 Gbps Active/ LinkUp)==> 27 1[ ] "mix23 HCA-1" ( )
21 13[ ] ==( 4X 25.78125 Gbps Active/ LinkUp)==> 5 1[ ] "mix8 HCA-1" ( )
21 14[ ] ==( 4X 25.78125 Gbps Active/ LinkUp)==> 16 1[ ] "mix15 HCA-1" ( )
21 15[ ] ==( Down/ Polling)==> [ ] "" ( )
21 16[ ] ==( Down/ Polling)==> [ ] "" ( )
21 17[ ] ==( Down/ Polling)==> [ ] "" ( )
21 18[ ] ==( Down/ Polling)==> [ ] "" ( )
21 19[ ] ==( 4X 25.78125 Gbps Active/ LinkUp)==> 1 1[ ] "manager1 HCA-1" ( )
21 20[ ] ==( Down/ Polling)==> [ ] "" ( )
21 21[ ] ==( Down/ Polling)==> [ ] "" ( )
21 22[ ] ==( Down/ Polling)==> [ ] "" ( )
21 23[ ] ==( 4X 25.78125 Gbps Active/ LinkUp)==> 8 1[ ] "mix10 HCA-1" ( )
21 24[ ] ==( 4X 25.78125 Gbps Active/ LinkUp)==> 24 1[ ] "mix6 HCA-1" ( )
21 25[ ] ==( 4X 25.78125 Gbps Active/ LinkUp)==> 18 1[ ] "mix14 HCA-1" ( )
21 26[ ] ==( 4X 25.78125 Gbps Active/ LinkUp)==> 13 1[ ] "mix21 HCA-1" ( )
21 27[ ] ==( 4X 25.78125 Gbps Active/ LinkUp)==> 12 1[ ] "mix13 HCA-1" ( )
21 28[ ] ==( 4X 25.78125 Gbps Active/ LinkUp)==> 10 1[ ] "mix17 HCA-1" ( )
21 29[ ] ==( 4X 25.78125 Gbps Active/ LinkUp)==> 4 1[ ] "mix5 HCA-1" ( )
21 30[ ] ==( 4X 25.78125 Gbps Active/ LinkUp)==> 14 1[ ] "mix9 HCA-1" ( )
21 31[ ] ==( 4X 25.78125 Gbps Active/ LinkUp)==> 20 1[ ] "mix22 HCA-1" ( )
21 32[ ] ==( 4X 25.78125 Gbps Active/ LinkUp)==> 23 1[ ] "mix1 HCA-1" ( )
21 33[ ] ==( 4X 25.78125 Gbps Active/ LinkUp)==> 6 1[ ] "mix2 HCA-1" ( )
21 34[ ] ==( 4X 25.78125 Gbps Active/ LinkUp)==> 22 1[ ] "mix18 HCA-1" ( )
21 35[ ] ==( Down/ Polling)==> [ ] "" ( )
21 36[ ] ==( Down/ Polling)==> [ ] "" ( )
21 37[ ] ==( 4X 25.78125 Gbps Active/ LinkUp)==> 26 1[ ] "Mellanox Technologies Aggregation Node" ( )
CA: mix5 HCA-1:
0xb88303ffff8ec424 4 1[ ] ==( 4X 25.78125 Gbps Active/ LinkUp)==> 21 29[ ] "SwitchIB Mellanox Technologies" ( )Пинг по IB ( параметро по умолчанию или -L LID берем из ibstatus или ibaddr его можно указывать в hex или в dec ) (-G GID берем из ibhosts)
на сервере # ibping -S
на клиенте # ibping -L 0x18
Pong from mix6.(none) (Lid 24): time 0.212 ms
Pong from mix6.(none) (Lid 24): time 0.215 ms
# ibping 24
Pong from mix6.(none) (Lid 24): time 0.227 ms
Pong from mix6.(none) (Lid 24): time 0.194 ms
Pong from mix6.(none) (Lid 24): time 0.194 ms
# ibping -G 0xb88303ffff8e74c0
Pong from mix6.(none) (Lid 24): time 0.190 ms
Pong from mix6.(none) (Lid 24): time 0.217 ms
Pong from mix6.(none) (Lid 24): time 0.194 msПинг через IPoIB
На сервере # ibv_rc_pingpong -g 0 -d mlx5_0 -i 1
local address: LID 0x0004, QPN 0x00002e, PSN 0x507e79, GID fe80::b883:3ff:ff8e:c424
remote address: LID 0x0018, QPN 0x00002e, PSN 0x0551e4, GID fe80::b883:3ff:ff8e:74c0
8192000 bytes in 0.00 seconds = 14206.81 Mbit/sec
1000 iters in 0.00 seconds = 4.61 usec/iter
На клиенте # ibv_rc_pingpong -g 0 -d mlx5_0 -i 1 192.168.1.14
local address: LID 0x0018, QPN 0x00002e, PSN 0x0551e4, GID fe80::b883:3ff:ff8e:74c0
remote address: LID 0x0004, QPN 0x00002e, PSN 0x507e79, GID fe80::b883:3ff:ff8e:c424
8192000 bytes in 0.00 seconds = 15240.93 Mbit/sec
1000 iters in 0.00 seconds = 4.30 usec/iter
Пропускная способность
На сервере # ib_send_bw
************************************
* Waiting for client to connect... *
************************************
---------------------------------------------------------------------------------------
Send BW Test
Dual-port : OFF Device : mlx5_0
Number of qps : 1 Transport type : IB
Connection type : RC Using SRQ : OFF
RX depth : 512
CQ Moderation : 1
Mtu : 4096[B]
Link type : IB
Max inline data : 0[B]
rdma_cm QPs : OFF
Data ex. method : Ethernet
---------------------------------------------------------------------------------------
local address: LID 0x04 QPN 0x002f PSN 0x94e8b4
remote address: LID 0x18 QPN 0x002f PSN 0xdbf534
---------------------------------------------------------------------------------------
bytes iterations BW peak[MB/sec] BW average[MB/sec] MsgRate[Mpps]
65536 1000 0.00 11552.95 0.184847
---------------------------------------------------------------------------------------На клиенте # ib_send_bw 192.168.1.14
---------------------------------------------------------------------------------------
Send BW Test
Dual-port : OFF Device : mlx5_0
Number of qps : 1 Transport type : IB
Connection type : RC Using SRQ : OFF
TX depth : 128
CQ Moderation : 1
Mtu : 4096[B]
Link type : IB
Max inline data : 0[B]
rdma_cm QPs : OFF
Data ex. method : Ethernet
---------------------------------------------------------------------------------------
local address: LID 0x18 QPN 0x002f PSN 0xdbf534
remote address: LID 0x04 QPN 0x002f PSN 0x94e8b4
---------------------------------------------------------------------------------------
bytes iterations BW peak[MB/sec] BW average[MB/sec] MsgRate[Mpps]
65536 1000 11507.94 11496.83 0.183949
---------------------------------------------------------------------------------------
Проверка RDMA
На сервере # rping -s -a 192.168.1.14 -v
На клиенте # rping -c -a 192.168.1.14 -v
Каунтеры устройства IB
[root@mix5 ~]# perfquery | grep Data
PortXmitData:....................0
PortRcvData:.....................159
[root@mix5 ~]# ibping 24
Pong from mix6.(none) (Lid 24): time 0.237 ms
Pong from mix6.(none) (Lid 24): time 0.183 ms
Pong from mix6.(none) (Lid 24): time 0.197 ms
[root@mix5 ~]# perfquery | grep Data
PortXmitData:....................216
PortRcvData:.....................375Ещё тесты
на сервере # qperf
на клиенте # qperf 192.168.1.15 rc_bi_bw
rc_bi_bw:
bw = 23.9 GB/secтестов много # qperf --help tests
Miscellaneous
conf Show configuration
quit Cause the server to quit
Socket Based
rds_bw RDS streaming one way bandwidth
rds_lat RDS one way latency
sctp_bw SCTP streaming one way bandwidth
sctp_lat SCTP one way latency
sdp_bw SDP streaming one way bandwidth
sdp_lat SDP one way latency
tcp_bw TCP streaming one way bandwidth
tcp_lat TCP one way latency
udp_bw UDP streaming one way bandwidth
udp_lat UDP one way latency
RDMA Send/Receive
rc_bi_bw RC streaming two way bandwidth
rc_bw RC streaming one way bandwidth
rc_lat RC one way latency
uc_bi_bw UC streaming two way bandwidth
uc_bw UC streaming one way bandwidth
uc_lat UC one way latency
ud_bi_bw UD streaming two way bandwidth
ud_bw UD streaming one way bandwidth
ud_lat UD one way latency
xrc_bi_bw XRC streaming two way bandwidth
xrc_bw XRC streaming one way bandwidth
xrc_lat XRC one way latency
RDMA
rc_rdma_read_bw RC RDMA read streaming one way bandwidth
rc_rdma_read_lat RC RDMA read one way latency
rc_rdma_write_bw RC RDMA write streaming one way bandwidth
rc_rdma_write_lat RC RDMA write one way latency
rc_rdma_write_poll_lat RC RDMA write one way polling latency
uc_rdma_write_bw UC RDMA write streaming one way bandwidth
uc_rdma_write_lat UC RDMA write one way latency
uc_rdma_write_poll_lat UC RDMA write one way polling latency
InfiniBand Atomics
rc_compare_swap_mr RC compare and swap messaging rate
rc_fetch_add_mr RC fetch and add messaging rate
Verification
ver_rc_compare_swap Verify RC compare and swap
ver_rc_fetch_add Verify RC fetch and add