Deploy IP over InfiniBand with RDMA Shared Device

Note

You can automate the configuration of this use case with NVIDIA Kubernetes Launch Kit. For more details, see Configuration Assistance with Kubernetes Launch Kit.

Step 1: Create NicClusterPolicy with IPoIB support and 3 RDMA shared device pools

apiVersion: mellanox.com/v1alpha1
kind: NicClusterPolicy
metadata:
  name: nic-cluster-policy
spec:
  ofedDriver:
    image: doca-driver
    repository: nvcr.io/nvstaging/mellanox
    version: doca3.4.0-26.04-0.6.1.0-0
  rdmaSharedDevicePlugin:
    image: k8s-rdma-shared-dev-plugin
    repository: nvcr.io/nvstaging/mellanox
    version: network-operator-v26.4.0-beta.7
    config: |
      {
        "configList": [
          {
            "resourceName": "rdma_shared_device_a",
            "rdmaHcaMax": 63,
            "selectors": {
              "ifNames": ["ibs1f0"]
            }
          },
          {
            "resourceName": "rdma_shared_device_b",
            "rdmaHcaMax": 63,
            "selectors": {
              "ifNames": ["ibs1f1"]
            }
          },
          {
            "resourceName": "rdma_shared_device_c",
            "rdmaHcaMax": 63,
            "selectors": {
              "ifNames": ["ibs2f0"]
            }
          }
        ]
      }
  nvIpam:
    image: nvidia-k8s-ipam
    repository: nvcr.io/nvstaging/mellanox
    version: network-operator-v26.4.0-beta.7
    imagePullSecrets: []
    enableWebhook: false
  secondaryNetwork:
    cniPlugins:
      image: plugins
      repository: nvcr.io/nvstaging/mellanox
      version: network-operator-v26.4.0-beta.7
    multus:
      image: multus-cni
      repository: nvcr.io/nvstaging/mellanox
      version: network-operator-v26.4.0-beta.7
    ipoib:
      image: ipoib-cni
      repository: nvcr.io/nvstaging/mellanox
      version: network-operator-v26.4.0-beta.7
kubectl apply -f nicclusterpolicy.yaml

Step 2: Create IPPool CRs for nv-ipam with 3 IP pools

apiVersion: nv-ipam.nvidia.com/v1alpha1
kind: IPPool
metadata:
  name: ipoib-pool-a
  namespace: nvidia-network-operator
spec:
  subnet: 192.168.5.0/24
  perNodeBlockSize: 50
  gateway: 192.168.5.1
---
apiVersion: nv-ipam.nvidia.com/v1alpha1
kind: IPPool
metadata:
  name: ipoib-pool-b
  namespace: nvidia-network-operator
spec:
  subnet: 192.168.6.0/24
  perNodeBlockSize: 50
  gateway: 192.168.6.1
---
apiVersion: nv-ipam.nvidia.com/v1alpha1
kind: IPPool
metadata:
  name: ipoib-pool-c
  namespace: nvidia-network-operator
spec:
  subnet: 192.168.7.0/24
  perNodeBlockSize: 50
  gateway: 192.168.7.1
kubectl apply -f ippool.yaml

Step 3: Create IPoIBNetwork CRs for 3 IPoIB networks

apiVersion: mellanox.com/v1alpha1
kind: IPoIBNetwork
metadata:
  name: ipoib-network-a
spec:
  networkNamespace: "default"
  master: "ibs1f0"
  ipam: |
    {
      "type": "nv-ipam",
      "poolName": "ipoib-pool-a"
    }
---
apiVersion: mellanox.com/v1alpha1
kind: IPoIBNetwork
metadata:
  name: ipoib-network-b
spec:
  networkNamespace: "default"
  master: "ibs1f1"
  ipam: |
    {
      "type": "nv-ipam",
      "poolName": "ipoib-pool-b"
    }
---
apiVersion: mellanox.com/v1alpha1
kind: IPoIBNetwork
metadata:
  name: ipoib-network-c
spec:
  networkNamespace: "default"
  master: "ibs2f0"
  ipam: |
    {
      "type": "nv-ipam",
      "poolName": "ipoib-pool-c"
    }
kubectl apply -f ipoibnetwork.yaml

Step 4: Deploy test workloads for 3 IPoIB networks

apiVersion: v1
kind: Pod
metadata:
  name: ipoib-test-pod-a
  annotations:
    k8s.v1.cni.cncf.io/networks: ipoib-network-a
spec:
  containers:
  - name: test-container
    image: mellanox/rping-test
    command: ["/bin/bash", "-c", "sleep infinity"]
    securityContext:
      capabilities:
        add: ["IPC_LOCK"]
    resources:
      requests:
        rdma/rdma_shared_device_a: 1
      limits:
        rdma/rdma_shared_device_a: 1
---
apiVersion: v1
kind: Pod
metadata:
  name: ipoib-test-pod-b
  annotations:
    k8s.v1.cni.cncf.io/networks: ipoib-network-b
spec:
  containers:
  - name: test-container
    image: mellanox/rping-test
    command: ["/bin/bash", "-c", "sleep infinity"]
    securityContext:
      capabilities:
        add: ["IPC_LOCK"]
    resources:
      requests:
        rdma/rdma_shared_device_b: 1
      limits:
        rdma/rdma_shared_device_b: 1
---
apiVersion: v1
kind: Pod
metadata:
  name: ipoib-test-pod-c
  annotations:
    k8s.v1.cni.cncf.io/networks: ipoib-network-c
spec:
  containers:
  - name: test-container
    image: mellanox/rping-test
    command: ["/bin/bash", "-c", "sleep infinity"]
    securityContext:
      capabilities:
        add: ["IPC_LOCK"]
    resources:
      requests:
        rdma/rdma_shared_device_c: 1
      limits:
        rdma/rdma_shared_device_c: 1
kubectl apply -f pod.yaml

Verify the deployment:

kubectl exec -it ipoib-test-pod-a -- ibstat
kubectl exec -it ipoib-test-pod-a -- ip addr show

kubectl exec -it ipoib-test-pod-b -- ibstat
kubectl exec -it ipoib-test-pod-b -- ip addr show

kubectl exec -it ipoib-test-pod-c -- ibstat
kubectl exec -it ipoib-test-pod-c -- ip addr show

Complete Configuration

apiVersion: mellanox.com/v1alpha1
kind: NicClusterPolicy
metadata:
  name: nic-cluster-policy
spec:
  ofedDriver:
    image: doca-driver
    repository: nvcr.io/nvstaging/mellanox
    version: doca3.4.0-26.04-0.6.1.0-0
  rdmaSharedDevicePlugin:
    image: k8s-rdma-shared-dev-plugin
    repository: nvcr.io/nvstaging/mellanox
    version: network-operator-v26.4.0-beta.7
    config: |
      {
        "configList": [
          {
            "resourceName": "rdma_shared_device_a",
            "rdmaHcaMax": 63,
            "selectors": {
              "ifNames": ["ibs1f0"]
            }
          },
          {
            "resourceName": "rdma_shared_device_b",
            "rdmaHcaMax": 63,
            "selectors": {
              "ifNames": ["ibs1f1"]
            }
          },
          {
            "resourceName": "rdma_shared_device_c",
            "rdmaHcaMax": 63,
            "selectors": {
              "ifNames": ["ibs2f0"]
            }
          }
        ]
      }
  nvIpam:
    image: nvidia-k8s-ipam
    repository: nvcr.io/nvstaging/mellanox
    version: network-operator-v26.4.0-beta.7
    imagePullSecrets: []
    enableWebhook: false
  secondaryNetwork:
    cniPlugins:
      image: plugins
      repository: nvcr.io/nvstaging/mellanox
      version: network-operator-v26.4.0-beta.7
    multus:
      image: multus-cni
      repository: nvcr.io/nvstaging/mellanox
      version: network-operator-v26.4.0-beta.7
    ipoib:
      image: ipoib-cni
      repository: nvcr.io/nvstaging/mellanox
      version: network-operator-v26.4.0-beta.7
---
apiVersion: nv-ipam.nvidia.com/v1alpha1
kind: IPPool
metadata:
  name: ipoib-pool-a
  namespace: nvidia-network-operator
spec:
  subnet: 192.168.5.0/24
  perNodeBlockSize: 50
  gateway: 192.168.5.1
---
apiVersion: nv-ipam.nvidia.com/v1alpha1
kind: IPPool
metadata:
  name: ipoib-pool-b
  namespace: nvidia-network-operator
spec:
  subnet: 192.168.6.0/24
  perNodeBlockSize: 50
  gateway: 192.168.6.1
---
apiVersion: nv-ipam.nvidia.com/v1alpha1
kind: IPPool
metadata:
  name: ipoib-pool-c
  namespace: nvidia-network-operator
spec:
  subnet: 192.168.7.0/24
  perNodeBlockSize: 50
  gateway: 192.168.7.1
---
apiVersion: mellanox.com/v1alpha1
kind: IPoIBNetwork
metadata:
  name: ipoib-network-a
spec:
  networkNamespace: "default"
  master: "ibs1f0"
  ipam: |
    {
      "type": "nv-ipam",
      "poolName": "ipoib-pool-a"
    }
---
apiVersion: mellanox.com/v1alpha1
kind: IPoIBNetwork
metadata:
  name: ipoib-network-b
spec:
  networkNamespace: "default"
  master: "ibs1f1"
  ipam: |
    {
      "type": "nv-ipam",
      "poolName": "ipoib-pool-b"
    }
---
apiVersion: mellanox.com/v1alpha1
kind: IPoIBNetwork
metadata:
  name: ipoib-network-c
spec:
  networkNamespace: "default"
  master: "ibs2f0"
  ipam: |
    {
      "type": "nv-ipam",
      "poolName": "ipoib-pool-c"
    }
---
apiVersion: v1
kind: Pod
metadata:
  name: ipoib-test-pod-a
  annotations:
    k8s.v1.cni.cncf.io/networks: ipoib-network-a
spec:
  containers:
  - name: test-container
    image: mellanox/rping-test
    command: ["/bin/bash", "-c", "sleep infinity"]
    securityContext:
      capabilities:
        add: ["IPC_LOCK"]
    resources:
      requests:
        rdma/rdma_shared_device_a: 1
      limits:
        rdma/rdma_shared_device_a: 1
---
apiVersion: v1
kind: Pod
metadata:
  name: ipoib-test-pod-b
  annotations:
    k8s.v1.cni.cncf.io/networks: ipoib-network-b
spec:
  containers:
  - name: test-container
    image: mellanox/rping-test
    command: ["/bin/bash", "-c", "sleep infinity"]
    securityContext:
      capabilities:
        add: ["IPC_LOCK"]
    resources:
      requests:
        rdma/rdma_shared_device_b: 1
      limits:
        rdma/rdma_shared_device_b: 1
---
apiVersion: v1
kind: Pod
metadata:
  name: ipoib-test-pod-c
  annotations:
    k8s.v1.cni.cncf.io/networks: ipoib-network-c
spec:
  containers:
  - name: test-container
    image: mellanox/rping-test
    command: ["/bin/bash", "-c", "sleep infinity"]
    securityContext:
      capabilities:
        add: ["IPC_LOCK"]
    resources:
      requests:
        rdma/rdma_shared_device_c: 1
      limits:
        rdma/rdma_shared_device_c: 1