Deploy SR-IOV InfiniBand Network with RDMA

Note

You can automate the configuration of this use case with NVIDIA Kubernetes Launch Kit. For more details, see Configuration Assistance with Kubernetes Launch Kit.

Step 1: Create NicClusterPolicy for InfiniBand

apiVersion: mellanox.com/v1alpha1
kind: NicClusterPolicy
metadata:
  name: nic-cluster-policy
spec:
  ofedDriver:
    image: doca-driver
    repository: nvcr.io/nvstaging/mellanox
    version: doca3.4.0-26.04-0.6.1.0-0
  nvIpam:
    image: nvidia-k8s-ipam
    repository: nvcr.io/nvstaging/mellanox
    version: network-operator-v26.4.0-beta.7
    imagePullSecrets: []
    enableWebhook: false
  secondaryNetwork:
    cniPlugins:
      image: plugins
      repository: nvcr.io/nvstaging/mellanox
      version: network-operator-v26.4.0-beta.7
    multus:
      image: multus-cni
      repository: nvcr.io/nvstaging/mellanox
      version: network-operator-v26.4.0-beta.7
kubectl apply -f nicclusterpolicy.yaml

Step 2: Create IPPool for nv-ipam

apiVersion: nv-ipam.nvidia.com/v1alpha1
kind: IPPool
metadata:
  name: sriov-ib-pool
  namespace: nvidia-network-operator
spec:
  subnet: 192.168.6.0/24
  perNodeBlockSize: 50
  gateway: 192.168.6.1
kubectl apply -f ippool.yaml

Step 3: Configure SR-IOV for InfiniBand

apiVersion: sriovnetwork.openshift.io/v1
kind: SriovNetworkNodePolicy
metadata:
  name: infiniband-sriov
  namespace: nvidia-network-operator
spec:
  deviceType: netdevice
  mtu: 1500
  nodeSelector:
    feature.node.kubernetes.io/pci-15b3.present: "true"
  nicSelector:
    vendor: "15b3"
  linkType: IB
  isRdma: true
  numVfs: 8
  priority: 90
  resourceName: mlnxnics
kubectl apply -f sriovnetworknodepolicy.yaml

Step 4: Create SriovIBNetwork

apiVersion: sriovnetwork.openshift.io/v1
kind: SriovIBNetwork
metadata:
  name: sriov-ib-network
  namespace: nvidia-network-operator
spec:
  ipam: |
    {
      "type": "nv-ipam",
      "poolName": "sriov-ib-pool"
    }
  resourceName: mlnxnics
  linkState: enable
  networkNamespace: default
kubectl apply -f sriovibnetwork.yaml

Step 5: Deploy test workload

apiVersion: v1
kind: Pod
metadata:
  name: sriov-ib-test-pod
  annotations:
    k8s.v1.cni.cncf.io/networks: sriov-ib-network
spec:
  containers:
  - name: test-container
    image: mellanox/rping-test
    command: ["/bin/bash", "-c", "sleep infinity"]
    securityContext:
      capabilities:
        add: ["IPC_LOCK"]
    resources:
      requests:
        nvidia.com/mlnxnics: '1'
      limits:
        nvidia.com/mlnxnics: '1'
kubectl apply -f pod.yaml

Verify the deployment:

kubectl exec -it sriov-ib-test-pod -- ibv_devices
kubectl exec -it sriov-ib-test-pod -- ibstat

Complete Configuration

apiVersion: mellanox.com/v1alpha1
kind: NicClusterPolicy
metadata:
  name: nic-cluster-policy
spec:
  ofedDriver:
    image: doca-driver
    repository: nvcr.io/nvstaging/mellanox
    version: doca3.4.0-26.04-0.6.1.0-0
  nvIpam:
    image: nvidia-k8s-ipam
    repository: nvcr.io/nvstaging/mellanox
    version: network-operator-v26.4.0-beta.7
    imagePullSecrets: []
    enableWebhook: false
  secondaryNetwork:
    cniPlugins:
      image: plugins
      repository: nvcr.io/nvstaging/mellanox
      version: network-operator-v26.4.0-beta.7
    multus:
      image: multus-cni
      repository: nvcr.io/nvstaging/mellanox
      version: network-operator-v26.4.0-beta.7
---
apiVersion: nv-ipam.nvidia.com/v1alpha1
kind: IPPool
metadata:
  name: sriov-ib-pool
  namespace: nvidia-network-operator
spec:
  subnet: 192.168.6.0/24
  perNodeBlockSize: 50
  gateway: 192.168.6.1
---
apiVersion: sriovnetwork.openshift.io/v1
kind: SriovNetworkNodePolicy
metadata:
  name: infiniband-sriov
  namespace: nvidia-network-operator
spec:
  deviceType: netdevice
  mtu: 1500
  nodeSelector:
    feature.node.kubernetes.io/pci-15b3.present: "true"
  nicSelector:
    vendor: "15b3"
  linkType: IB
  isRdma: true
  numVfs: 8
  priority: 90
  resourceName: mlnxnics
---
apiVersion: sriovnetwork.openshift.io/v1
kind: SriovIBNetwork
metadata:
  name: sriov-ib-network
  namespace: nvidia-network-operator
spec:
  ipam: |
    {
      "type": "nv-ipam",
      "poolName": "sriov-ib-pool"
    }
  resourceName: mlnxnics
  linkState: enable
  networkNamespace: default
---
apiVersion: v1
kind: Pod
metadata:
  name: sriov-ib-test-pod
  annotations:
    k8s.v1.cni.cncf.io/networks: sriov-ib-network
spec:
  containers:
  - name: test-container
    image: mellanox/rping-test
    command: ["/bin/bash", "-c", "sleep infinity"]
    securityContext:
      capabilities:
        add: ["IPC_LOCK"]
    resources:
      requests:
        nvidia.com/mlnxnics: '1'
      limits:
        nvidia.com/mlnxnics: '1'