Deploy SR-IOV Network with RDMA
Step 1: Create NicClusterPolicy
apiVersion: mellanox.com/v1alpha1
kind: NicClusterPolicy
metadata:
name: nic-cluster-policy
spec:
nvIpam:
image: nvidia-k8s-ipam
repository: nvcr.io/nvidia/mellanox
version: network-operator-v25.10.0
enableWebhook: false
secondaryNetwork:
cniPlugins:
image: plugins
repository: nvcr.io/nvidia/mellanox
version: network-operator-v25.10.0
multus:
image: multus-cni
repository: nvcr.io/nvidia/mellanox
version: network-operator-v25.10.0
kubectl apply -f nicclusterpolicy.yaml
Step 2: Create IPPool for nv-ipam
apiVersion: nv-ipam.nvidia.com/v1alpha1
kind: IPPool
metadata:
name: sriov-pool
namespace: nvidia-network-operator
spec:
subnet: 192.168.2.0/24
perNodeBlockSize: 50
gateway: 192.168.2.1
kubectl apply -f ippool.yaml
Step 3: Configure SR-IOV
apiVersion: sriovnetwork.openshift.io/v1
kind: SriovNetworkNodePolicy
metadata:
name: ethernet-sriov
namespace: nvidia-network-operator
spec:
deviceType: netdevice
mtu: 1500
nodeSelector:
feature.node.kubernetes.io/pci-15b3.present: "true"
nicSelector:
vendor: "15b3"
isRdma: true
numVfs: 8
priority: 90
resourceName: sriov_resource
kubectl apply -f sriovnetworknodepolicy.yaml
Step 4: Create SR-IOV Network
apiVersion: sriovnetwork.openshift.io/v1
kind: SriovNetwork
metadata:
name: sriov-rdma-network
namespace: nvidia-network-operator
spec:
ipam: |
{
"type": "nv-ipam",
"poolName": "sriov-pool"
}
networkNamespace: default
resourceName: sriov_resource
kubectl apply -f sriovnetwork.yaml
Step 5: Deploy test workload
---
apiVersion: v1
kind: Pod
metadata:
name: sriov-rdma-server
namespace: default
labels:
app: sriov-rdma
role: server
annotations:
k8s.v1.cni.cncf.io/networks: sriov-rdma-network
spec:
tolerations:
- key: "node-role.kubernetes.io/control-plane"
operator: "Exists"
effect: "NoSchedule"
- key: "node-role.kubernetes.io/master"
operator: "Exists"
effect: "NoSchedule"
restartPolicy: Never
containers:
- name: rdma-test
image: nvcr.io/nvidia/doca/doca:3.1.0-full-rt-host
command: ["/bin/bash", "-c", "sleep infinity"]
securityContext:
capabilities:
add: ["IPC_LOCK"]
privileged: true
resources:
requests:
nvidia.com/sriov_resource: "1"
limits:
nvidia.com/sriov_resource: "1"
---
apiVersion: v1
kind: Pod
metadata:
name: sriov-rdma-client
namespace: default
labels:
app: sriov-rdma
role: client
annotations:
k8s.v1.cni.cncf.io/networks: sriov-rdma-network
spec:
affinity:
podAntiAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
- labelSelector:
matchExpressions:
- key: role
operator: In
values:
- server
topologyKey: kubernetes.io/hostname
restartPolicy: Never
containers:
- name: rdma-test
image: nvcr.io/nvidia/doca/doca:3.1.0-full-rt-host
command: ["/bin/bash", "-c", "sleep infinity"]
securityContext:
capabilities:
add: ["IPC_LOCK"]
privileged: true
resources:
requests:
nvidia.com/sriov_resource: "1"
limits:
nvidia.com/sriov_resource: "1"
kubectl apply -f pod.yaml
Step 6: Verify the deployment
Check that the pods are running on different nodes:
kubectl get pods -n default -o wide
Verify RDMA devices are available in the pods:
kubectl -n default exec sriov-rdma-server -- ibv_devices
kubectl -n default exec sriov-rdma-client -- ibv_devices
Capture the server IP and RDMA device names in environment variables:
export SERVER_IP=$(kubectl get pod sriov-rdma-server -n default -o jsonpath='{.metadata.annotations.k8s\.v1\.cni\.cncf\.io/network-status}' | jq -r '.[] | select(.name=="default/sriov-rdma-network") | .ips[0]')
export SERVER_RDMA_DEV=$(kubectl -n default exec sriov-rdma-server -- ibv_devices | awk 'NR==3 {print $1}')
export CLIENT_RDMA_DEV=$(kubectl -n default exec sriov-rdma-client -- ibv_devices | awk 'NR==3 {print $1}')
echo "Server IP: $SERVER_IP"
echo "Server RDMA Device: $SERVER_RDMA_DEV"
echo "Client RDMA Device: $CLIENT_RDMA_DEV"
Step 7: Test RDMA connectivity
Start the RDMA bandwidth test server:
kubectl -n default exec -it sriov-rdma-server -- bash -lc "ib_write_bw -d $SERVER_RDMA_DEV -R -a --report_gbits"
In a separate terminal, run the RDMA bandwidth test client:
kubectl -n default exec -it sriov-rdma-client -- bash -lc "ib_write_bw -d $CLIENT_RDMA_DEV -R -a --report_gbits $SERVER_IP"
Note
The commands above automatically use the first available RDMA device from each pod. If you need to use a different device, manually set the environment variables or replace them in the command.
Complete Configuration
apiVersion: mellanox.com/v1alpha1
kind: NicClusterPolicy
metadata:
name: nic-cluster-policy
spec:
nvIpam:
image: nvidia-k8s-ipam
repository: nvcr.io/nvidia/mellanox
version: network-operator-v25.10.0
enableWebhook: false
secondaryNetwork:
cniPlugins:
image: plugins
repository: nvcr.io/nvidia/mellanox
version: network-operator-v25.10.0
multus:
image: multus-cni
repository: nvcr.io/nvidia/mellanox
version: network-operator-v25.10.0
---
apiVersion: nv-ipam.nvidia.com/v1alpha1
kind: IPPool
metadata:
name: sriov-pool
namespace: nvidia-network-operator
spec:
subnet: 192.168.2.0/24
perNodeBlockSize: 50
gateway: 192.168.2.1
---
apiVersion: sriovnetwork.openshift.io/v1
kind: SriovNetworkNodePolicy
metadata:
name: ethernet-sriov
namespace: nvidia-network-operator
spec:
deviceType: netdevice
mtu: 1500
nodeSelector:
feature.node.kubernetes.io/pci-15b3.present: "true"
nicSelector:
vendor: "15b3"
isRdma: true
numVfs: 8
priority: 90
resourceName: sriov_resource
---
apiVersion: sriovnetwork.openshift.io/v1
kind: SriovNetwork
metadata:
name: sriov-rdma-network
namespace: nvidia-network-operator
spec:
ipam: |
{
"type": "nv-ipam",
"poolName": "sriov-pool"
}
networkNamespace: default
resourceName: sriov_resource
---
---
apiVersion: v1
kind: Pod
metadata:
name: sriov-rdma-server
namespace: default
labels:
app: sriov-rdma
role: server
annotations:
k8s.v1.cni.cncf.io/networks: sriov-rdma-network
spec:
tolerations:
- key: "node-role.kubernetes.io/control-plane"
operator: "Exists"
effect: "NoSchedule"
- key: "node-role.kubernetes.io/master"
operator: "Exists"
effect: "NoSchedule"
restartPolicy: Never
containers:
- name: rdma-test
image: nvcr.io/nvidia/doca/doca:3.1.0-full-rt-host
command: ["/bin/bash", "-c", "sleep infinity"]
securityContext:
capabilities:
add: ["IPC_LOCK"]
privileged: true
resources:
requests:
nvidia.com/sriov_resource: "1"
limits:
nvidia.com/sriov_resource: "1"
---
apiVersion: v1
kind: Pod
metadata:
name: sriov-rdma-client
namespace: default
labels:
app: sriov-rdma
role: client
annotations:
k8s.v1.cni.cncf.io/networks: sriov-rdma-network
spec:
affinity:
podAntiAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
- labelSelector:
matchExpressions:
- key: role
operator: In
values:
- server
topologyKey: kubernetes.io/hostname
restartPolicy: Never
containers:
- name: rdma-test
image: nvcr.io/nvidia/doca/doca:3.1.0-full-rt-host
command: ["/bin/bash", "-c", "sleep infinity"]
securityContext:
capabilities:
add: ["IPC_LOCK"]
privileged: true
resources:
requests:
nvidia.com/sriov_resource: "1"
limits:
nvidia.com/sriov_resource: "1"