Deploy IP over InfiniBand with RDMA Shared Device
Step 1: Create NicClusterPolicy with IPoIB support and 3 RDMA shared device pools
apiVersion: mellanox.com/v1alpha1
kind: NicClusterPolicy
metadata:
name: nic-cluster-policy
spec:
ofedDriver:
image: doca-driver
repository: nvcr.io/nvstaging/mellanox
version: doca3.1.0-25.07-0.8.0.0-0
rdmaSharedDevicePlugin:
image: k8s-rdma-shared-dev-plugin
repository: nvcr.io/nvstaging/mellanox
version: network-operator-v25.7.0-rc.1
config: |
{
"configList": [
{
"resourceName": "rdma_shared_device_a",
"rdmaHcaMax": 63,
"selectors": {
"ifNames": ["ibs1f0"]
}
},
{
"resourceName": "rdma_shared_device_b",
"rdmaHcaMax": 63,
"selectors": {
"ifNames": ["ibs1f1"]
}
},
{
"resourceName": "rdma_shared_device_c",
"rdmaHcaMax": 63,
"selectors": {
"ifNames": ["ibs2f0"]
]
}
nvIpam:
image: nvidia-k8s-ipam
repository: nvcr.io/nvstaging/mellanox
version: network-operator-v25.7.0-rc.1
imagePullSecrets: []
enableWebhook: false
secondaryNetwork:
cniPlugins:
image: plugins
repository: nvcr.io/nvstaging/mellanox
version: network-operator-v25.7.0-rc.1
multus:
image: multus-cni
repository: nvcr.io/nvstaging/mellanox
version: network-operator-v25.7.0-rc.1
ipoib:
image: ipoib-cni
repository: nvcr.io/nvstaging/mellanox
version: network-operator-v25.7.0-rc.1
kubectl apply -f nicclusterpolicy.yaml
Step 2: Create IPPool CRs for nv-ipam with 3 IP pools
apiVersion: nv-ipam.nvidia.com/v1alpha1
kind: IPPool
metadata:
name: ipoib-pool-a
namespace: nvidia-network-operator
spec:
subnet: 192.168.5.0/24
perNodeBlockSize: 50
gateway: 192.168.5.1
-----
apiVersion: nv-ipam.nvidia.com/v1alpha1
kind: IPPool
metadata:
name: ipoib-pool-b
namespace: nvidia-network-operator
spec:
subnet: 192.168.6.0/24
perNodeBlockSize: 50
gateway: 192.168.6.1
-----
apiVersion: nv-ipam.nvidia.com/v1alpha1
kind: IPPool
metadata:
name: ipoib-pool-c
namespace: nvidia-network-operator
spec:
subnet: 192.168.7.0/24
perNodeBlockSize: 50
gateway: 192.168.7.1
kubectl apply -f ippool.yaml
Step 3: Create IPoIBNetwork CRs for 3 IPoIB networks
apiVersion: mellanox.com/v1alpha1
kind: IPoIBNetwork
metadata:
name: ipoib-network-a
spec:
networkNamespace: "default"
master: "ibs1f0"
ipam: |
{
"type": "nv-ipam",
"poolName": "ipoib-pool-a"
}
-----
apiVersion: mellanox.com/v1alpha1
kind: IPoIBNetwork
metadata:
name: ipoib-network-b
spec:
networkNamespace: "default"
master: "ibs1f1"
ipam: |
{
"type": "nv-ipam",
"poolName": "ipoib-pool-b"
}
-----
apiVersion: mellanox.com/v1alpha1
kind: IPoIBNetwork
metadata:
name: ipoib-network-c
spec:
networkNamespace: "default"
master: "ibs2f0"
ipam: |
{
"type": "nv-ipam",
"poolName": "ipoib-pool-c"
}
kubectl apply -f ipoibnetwork.yaml
Step 4: Deploy test workloads for 3 IPoIB networks
apiVersion: v1
kind: Pod
metadata:
name: ipoib-test-pod-a
annotations:
k8s.v1.cni.cncf.io/networks: ipoib-network-a
spec:
containers:
- name: test-container
image: mellanox/rping-test
command: ["/bin/bash", "-c", "sleep infinity"]
securityContext:
capabilities:
add: ["IPC_LOCK"]
resources:
requests:
rdma/rdma_shared_device_a: 1
limits:
rdma/rdma_shared_device_a: 1
-----
apiVersion: v1
kind: Pod
metadata:
name: ipoib-test-pod-b
annotations:
k8s.v1.cni.cncf.io/networks: ipoib-network-b
spec:
containers:
- name: test-container
image: mellanox/rping-test
command: ["/bin/bash", "-c", "sleep infinity"]
securityContext:
capabilities:
add: ["IPC_LOCK"]
resources:
requests:
rdma/rdma_shared_device_b: 1
limits:
rdma/rdma_shared_device_b: 1
-----
apiVersion: v1
kind: Pod
metadata:
name: ipoib-test-pod-c
annotations:
k8s.v1.cni.cncf.io/networks: ipoib-network-c
spec:
containers:
- name: test-container
image: mellanox/rping-test
command: ["/bin/bash", "-c", "sleep infinity"]
securityContext:
capabilities:
add: ["IPC_LOCK"]
resources:
requests:
rdma/rdma_shared_device_c: 1
limits:
rdma/rdma_shared_device_c: 1
kubectl apply -f pod.yaml
Verify the deployment:
kubectl exec -it ipoib-test-pod-a -- ibstat
kubectl exec -it ipoib-test-pod-a -- ip addr show
kubectl exec -it ipoib-test-pod-b -- ibstat
kubectl exec -it ipoib-test-pod-b -- ip addr show
kubectl exec -it ipoib-test-pod-c -- ibstat
kubectl exec -it ipoib-test-pod-c -- ip addr show
Complete Configuration
apiVersion: mellanox.com/v1alpha1
kind: NicClusterPolicy
metadata:
name: nic-cluster-policy
spec:
ofedDriver:
image: doca-driver
repository: nvcr.io/nvstaging/mellanox
version: doca3.1.0-25.07-0.8.0.0-0
rdmaSharedDevicePlugin:
image: k8s-rdma-shared-dev-plugin
repository: nvcr.io/nvstaging/mellanox
version: network-operator-v25.7.0-rc.1
config: |
{
"configList": [
{
"resourceName": "rdma_shared_device_a",
"rdmaHcaMax": 63,
"selectors": {
"ifNames": ["ibs1f0"]
}
},
{
"resourceName": "rdma_shared_device_b",
"rdmaHcaMax": 63,
"selectors": {
"ifNames": ["ibs1f1"]
}
},
{
"resourceName": "rdma_shared_device_c",
"rdmaHcaMax": 63,
"selectors": {
"ifNames": ["ibs2f0"]
]
}
nvIpam:
image: nvidia-k8s-ipam
repository: nvcr.io/nvstaging/mellanox
version: network-operator-v25.7.0-rc.1
imagePullSecrets: []
enableWebhook: false
secondaryNetwork:
cniPlugins:
image: plugins
repository: nvcr.io/nvstaging/mellanox
version: network-operator-v25.7.0-rc.1
multus:
image: multus-cni
repository: nvcr.io/nvstaging/mellanox
version: network-operator-v25.7.0-rc.1
ipoib:
image: ipoib-cni
repository: nvcr.io/nvstaging/mellanox
version: network-operator-v25.7.0-rc.1
---
apiVersion: nv-ipam.nvidia.com/v1alpha1
kind: IPPool
metadata:
name: ipoib-pool-a
namespace: nvidia-network-operator
spec:
subnet: 192.168.5.0/24
perNodeBlockSize: 50
gateway: 192.168.5.1
-----
apiVersion: nv-ipam.nvidia.com/v1alpha1
kind: IPPool
metadata:
name: ipoib-pool-b
namespace: nvidia-network-operator
spec:
subnet: 192.168.6.0/24
perNodeBlockSize: 50
gateway: 192.168.6.1
-----
apiVersion: nv-ipam.nvidia.com/v1alpha1
kind: IPPool
metadata:
name: ipoib-pool-c
namespace: nvidia-network-operator
spec:
subnet: 192.168.7.0/24
perNodeBlockSize: 50
gateway: 192.168.7.1
---
apiVersion: mellanox.com/v1alpha1
kind: IPoIBNetwork
metadata:
name: ipoib-network-a
spec:
networkNamespace: "default"
master: "ibs1f0"
ipam: |
{
"type": "nv-ipam",
"poolName": "ipoib-pool-a"
}
-----
apiVersion: mellanox.com/v1alpha1
kind: IPoIBNetwork
metadata:
name: ipoib-network-b
spec:
networkNamespace: "default"
master: "ibs1f1"
ipam: |
{
"type": "nv-ipam",
"poolName": "ipoib-pool-b"
}
-----
apiVersion: mellanox.com/v1alpha1
kind: IPoIBNetwork
metadata:
name: ipoib-network-c
spec:
networkNamespace: "default"
master: "ibs2f0"
ipam: |
{
"type": "nv-ipam",
"poolName": "ipoib-pool-c"
}
---
apiVersion: v1
kind: Pod
metadata:
name: ipoib-test-pod-a
annotations:
k8s.v1.cni.cncf.io/networks: ipoib-network-a
spec:
containers:
- name: test-container
image: mellanox/rping-test
command: ["/bin/bash", "-c", "sleep infinity"]
securityContext:
capabilities:
add: ["IPC_LOCK"]
resources:
requests:
rdma/rdma_shared_device_a: 1
limits:
rdma/rdma_shared_device_a: 1
-----
apiVersion: v1
kind: Pod
metadata:
name: ipoib-test-pod-b
annotations:
k8s.v1.cni.cncf.io/networks: ipoib-network-b
spec:
containers:
- name: test-container
image: mellanox/rping-test
command: ["/bin/bash", "-c", "sleep infinity"]
securityContext:
capabilities:
add: ["IPC_LOCK"]
resources:
requests:
rdma/rdma_shared_device_b: 1
limits:
rdma/rdma_shared_device_b: 1
-----
apiVersion: v1
kind: Pod
metadata:
name: ipoib-test-pod-c
annotations:
k8s.v1.cni.cncf.io/networks: ipoib-network-c
spec:
containers:
- name: test-container
image: mellanox/rping-test
command: ["/bin/bash", "-c", "sleep infinity"]
securityContext:
capabilities:
add: ["IPC_LOCK"]
resources:
requests:
rdma/rdma_shared_device_c: 1
limits:
rdma/rdma_shared_device_c: 1