经验首页 前端设计 程序设计 Java相关 移动开发 数据库/运维 软件/图像 大数据/云计算 其他经验
当前位置:技术经验 » 数据库/运维 » Kubernetes » 查看文章
kubernetes container device interface (CDI)
来源:cnblogs  作者:haiyux  时间:2023/11/20 8:56:32  对本文有异议

CDI 是什么?

Container Device Interface (CDI) 是一个提议的标准,它定义了如何在容器运行时环境中向容器提供设备。这个提议的目的是使得设备供应商能够更容易地将其设备集成到 Kubernetes 集群中,而不必修改 Kubernetes 核心代码。

CDI 插件通常负责:

  1. 配置设备以供容器使用(例如,分配设备文件或设置必要的环境变量)。
  2. 在容器启动时将设备资源注入到容器中。

官网

为什么需要CDI?

如果我们想在容器内使用 nvidia 的 gpu,在没有 CDI 之前,我们需要修改 containerd 的 low-level container runtime(runc) 到 nvidia runtime。这么做的原因就是使用 gpu 不单单要绑定 gpu device 文件到容器内,还需要绑定一些驱动文件和可执行命令(比如 nvidia-smi)等到容器内,还有就执行一些 hooks。 nvidia runtime 的作用就是绑定一些文件和执行一些 hooks 然后调用 runc。

现在我们可以使用 CDI 做这些事情,除了无需修改 runtime 外,还有抽象和插件化等优点。

版本及准备工作

  • kubelet version >= 1.28.0
  • containerd version >= 1.7.0

而且这在 k8s 1.28 (1.29版本是 beta 了 默认就打开了) 版本中是一个 alpha 版本的功能,所以我们需要在 kubelet 的启动参数中加入开启特性门:--feature-gates=DevicePluginCDIDevices =true

  1. sudo vim /etc/systemd/system/kubelet.service.d/10-kubeadm.conf
  2. ExecStart=/usr/bin/kubelet $KUBELET_KUBECONFIG_ARGS $KUBELET_CONFIG_ARGS $KUBELET_KUBEADM_ARGS $KUBELET_EXTRA_ARGS --feature-gates=DevicePluginCDIDevices=true

containerd 也需要开启 CDI cdi_spec_dirs 为 cdi 配置文件的目录,enable_cdi 为开启 CDI 功能。

  1. sudo vim /etc/containerd/config.toml
  2. cdi_spec_dirs = ["/etc/cdi", "/var/run/cdi"]
  3. enable_cdi = true

重启 containerd 和 kubelet

  1. sudo systemctl restart kubelet.service containerd.service

mock

因为我的集群里没有 gpu , 所以我就随便 mock 几个文件作为 device 了。

  1. sudo mkdir /dev/mock
  2. cd /dev/mock
  3. sudo mknod /dev/mock/device_0 c 89 1
  4. sudo mknod /dev/mock/device_1 c 89 1
  5. sudo mknod /dev/mock/device_2 c 89 1
  6. sudo mknod /dev/mock/device_3 c 89 1
  7. sudo mknod /dev/mock/device_4 c 89 1
  8. sudo mknod /dev/mock/device_5 c 89 1
  9. sudo mknod /dev/mock/device_6 c 89 1
  10. sudo mknod /dev/mock/device_7 c 89 1
  1. sudo vim /mock/bin/list_device.sh
  2. #!/bin/bash
  3. # 定义目录数组
  4. directories=(/dev /dev/mock)
  5. # 遍历目录数组
  6. for dir in "${directories[@]}"; do
  7. # 检查目录是否存在
  8. if [ -d "$dir" ]; then
  9. # 目录存在,打印目录下的所有文件
  10. ls "$dir"
  11. fi
  12. done
  13. sudo chmod a+x /mock/bin/list_device.sh
  1. sudo mkdir /mock/so
  2. cd /mock/so
  3. sudo touch device_0.so device_1.so device_2.so device_3.so device_5.so device_6.so device_7.so device_4.so

开启 kubelet 的 device plugin

下面是简单写的一个 device plugin,及其 dockerfile 还有部署到 k8s 的 yaml 文件。

  1. package main
  2. import (
  3. "context"
  4. "fmt"
  5. "time"
  6. "github.com/kubevirt/device-plugin-manager/pkg/dpm"
  7. pluginapi "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1"
  8. )
  9. type PluginLister struct {
  10. ResUpdateChan chan dpm.PluginNameList
  11. }
  12. var ResourceNamespace = "mock.com"
  13. var PluginName = "gpu"
  14. func (p *PluginLister) GetResourceNamespace() string {
  15. return ResourceNamespace
  16. }
  17. func (p *PluginLister) Discover(pluginListCh chan dpm.PluginNameList) {
  18. pluginListCh <- dpm.PluginNameList{PluginName}
  19. }
  20. func (p *PluginLister) NewPlugin(name string) dpm.PluginInterface {
  21. return &Plugin{}
  22. }
  23. type Plugin struct {
  24. }
  25. func (p *Plugin) GetDevicePluginOptions(ctx context.Context, e *pluginapi.Empty) (*pluginapi.DevicePluginOptions, error) {
  26. options := &pluginapi.DevicePluginOptions{
  27. PreStartRequired: true,
  28. }
  29. return options, nil
  30. }
  31. func (p *Plugin) PreStartContainer(ctx context.Context, r *pluginapi.PreStartContainerRequest) (*pluginapi.PreStartContainerResponse, error) {
  32. return &pluginapi.PreStartContainerResponse{}, nil
  33. }
  34. func (p *Plugin) GetPreferredAllocation(ctx context.Context, r *pluginapi.PreferredAllocationRequest) (*pluginapi.PreferredAllocationResponse, error) {
  35. return &pluginapi.PreferredAllocationResponse{}, nil
  36. }
  37. func (p *Plugin) ListAndWatch(e *pluginapi.Empty, r pluginapi.DevicePlugin_ListAndWatchServer) error {
  38. devices := []*pluginapi.Device{}
  39. for i := 0; i < 8; i++ {
  40. devices = append(devices, &pluginapi.Device{
  41. // 和 device 名称保持一致
  42. ID: fmt.Sprintf("device_%d", i),
  43. Health: pluginapi.Healthy,
  44. })
  45. }
  46. for {
  47. fmt.Println("register devices")
  48. // 每分钟注册一次
  49. r.Send(&pluginapi.ListAndWatchResponse{
  50. Devices: devices,
  51. })
  52. time.Sleep(time.Second * 60)
  53. }
  54. }
  55. func (p *Plugin) Allocate(ctx context.Context, r *pluginapi.AllocateRequest) (*pluginapi.AllocateResponse, error) {
  56. // 使用cdi插件
  57. responses := &pluginapi.AllocateResponse{}
  58. for _, req := range r.ContainerRequests {
  59. cdidevices := []*pluginapi.CDIDevice{}
  60. for _, id := range req.DevicesIDs {
  61. cdidevices = append(cdidevices, &pluginapi.CDIDevice{
  62. Name: fmt.Sprintf("%s/%s=%s", ResourceNamespace, PluginName, id),
  63. })
  64. }
  65. responses.ContainerResponses = append(responses.ContainerResponses, &pluginapi.ContainerAllocateResponse{
  66. CDIDevices: cdidevices,
  67. })
  68. }
  69. return responses, nil
  70. }
  71. func main() {
  72. m := dpm.NewManager(&PluginLister{})
  73. m.Run()
  74. }
  1. FROM golang:1.21.3 as builder
  2. COPY . /src
  3. WORKDIR /src
  4. RUN go env -w GO111MODULE=on && go env -w GOPROXY=https://goproxy.io,direct
  5. RUN go build
  6. FROM debian:bookworm-slim
  7. RUN sed -i 's/deb.debian.org/mirrors.ustc.edu.cn/g' /etc/apt/sources.list.d/debian.sources
  8. RUN apt-get update && apt-get install -y --no-install-recommends ca-certificates netbase pciutils curl && rm -rf /var/lib/apt/lists/ && apt-get autoremove -y && apt-get autoclean -y
  9. RUN update-pciids
  10. COPY --from=builder /src /app
  11. WORKDIR /app
  1. apiVersion: v1
  2. kind: Namespace
  3. metadata:
  4. name: mock-plugin
  5. ---
  6. apiVersion: apps/v1
  7. kind: DaemonSet
  8. metadata:
  9. name: mock-plugin-daemonset
  10. namespace: mock-plugin
  11. spec:
  12. selector:
  13. matchLabels:
  14. name: mock-plugin
  15. template:
  16. metadata:
  17. labels:
  18. name: mock-plugin
  19. app.kubernetes.io/component: mock-plugin
  20. app.kubernetes.io/name: mock-plugin
  21. spec:
  22. containers:
  23. - image: zhaohaiyu/mock:v1
  24. name: mock-plugin
  25. command: ['/app/mock']
  26. imagePullPolicy: Always
  27. securityContext:
  28. privileged: true
  29. tty: true
  30. volumeMounts:
  31. - name: kubelet
  32. mountPath: /var/lib/kubelet
  33. volumes:
  34. - name: kubelet
  35. hostPath:
  36. path: /var/lib/kubelet

执行完毕使用 kubectl 查看

  1. ? kubectl -n mock-plugin get pod
  2. NAME READY STATUS RESTARTS AGE
  3. mock-plugin-daemonset-8w2r8 1/1 Running 0 3m27s

查看 node 是否已经注册了 device plugin

  1. kubectl describe node node1
  2. Capacity:
  3. cpu: 8
  4. ephemeral-storage: 102626232Ki
  5. hugepages-1Gi: 0
  6. hugepages-2Mi: 0
  7. memory: 24570324Ki
  8. mock.com/gpu: 8
  9. pods: 110
  10. Allocatable:
  11. cpu: 8
  12. ephemeral-storage: 94580335255
  13. hugepages-1Gi: 0
  14. hugepages-2Mi: 0
  15. memory: 24467924Ki
  16. mock.com/gpu: 8
  17. pods: 110

可以看到已经注册了 8 个 gpu 设备 名字叫 mock.com/gpu 也就是我们代码中定义的。

CDI配置文件

CDI Spec: https://github.com/cncf-tags/container-device-interface/blob/main/SPEC.md

我们也生成了一个 CDI 的配置文件,这个配置文件会被 containerd 读取,然后根据配置文件中的信息去调用 device plugin。

  1. # vim /etc/cdi/mock.yaml
  2. cdiVersion: 0.5.0
  3. kind: mock.com/gpu
  4. devices:
  5. - name: device_0
  6. containerEdits:
  7. deviceNodes:
  8. - hostPath: "/dev/mock/device_0"
  9. path: "/dev/mock/device_0"
  10. type: c
  11. permissions: rw
  12. mounts:
  13. - hostPath: "/mock/so/device_0.so"
  14. containerPath: "/mock/so/device_0.so"
  15. options:
  16. - ro
  17. - nosuid
  18. - nodev
  19. - bind
  20. - name: device_1
  21. containerEdits:
  22. deviceNodes:
  23. - hostPath: "/dev/mock/device_1"
  24. path: "/dev/mock/device_1"
  25. type: c
  26. permissions: rw
  27. mounts:
  28. - hostPath: "/mock/so/device_1.so"
  29. containerPath: "/mock/so/device_1.so"
  30. options:
  31. - ro
  32. - nosuid
  33. - nodev
  34. - bind
  35. - name: device_2
  36. containerEdits:
  37. deviceNodes:
  38. - hostPath: "/dev/mock/device_2"
  39. path: "/dev/mock/device_2"
  40. type: c
  41. permissions: rw
  42. mounts:
  43. - hostPath: "/mock/so/device_2.so"
  44. containerPath: "/mock/so/device_2.so"
  45. options:
  46. - ro
  47. - nosuid
  48. - nodev
  49. - bind
  50. - name: device_3
  51. containerEdits:
  52. deviceNodes:
  53. - hostPath: "/dev/mock/device_3"
  54. path: "/dev/mock/device_3"
  55. type: c
  56. permissions: rw
  57. mounts:
  58. - hostPath: "/mock/so/device_3.so"
  59. containerPath: "/mock/so/device_3.so"
  60. options:
  61. - ro
  62. - nosuid
  63. - nodev
  64. - bind
  65. - name: device_4
  66. containerEdits:
  67. deviceNodes:
  68. - hostPath: "/dev/mock/device_4"
  69. path: "/dev/mock/device_4"
  70. type: c
  71. permissions: rw
  72. mounts:
  73. - hostPath: "/mock/so/device_4.so"
  74. containerPath: "/mock/so/device_4.so"
  75. options:
  76. - ro
  77. - nosuid
  78. - nodev
  79. - bind
  80. - name: device_5
  81. containerEdits:
  82. deviceNodes:
  83. - hostPath: "/dev/mock/device_5"
  84. path: "/dev/mock/device_5"
  85. type: c
  86. permissions: rw
  87. mounts:
  88. - hostPath: "/mock/so/device_5.so"
  89. containerPath: "/mock/so/device_5.so"
  90. options:
  91. - ro
  92. - nosuid
  93. - nodev
  94. - bind
  95. - name: device_6
  96. containerEdits:
  97. deviceNodes:
  98. - hostPath: "/dev/mock/device_6"
  99. path: "/dev/mock/device_6"
  100. type: c
  101. permissions: rw
  102. mounts:
  103. - hostPath: "/mock/so/device_6.so"
  104. containerPath: "/mock/so/device_6.so"
  105. options:
  106. - ro
  107. - nosuid
  108. - nodev
  109. - bind
  110. - name: device_7
  111. containerEdits:
  112. deviceNodes:
  113. - hostPath: "/dev/mock/device_7"
  114. path: "/dev/mock/device_7"
  115. type: c
  116. permissions: rw
  117. mounts:
  118. - hostPath: "/mock/so/device_7.so"
  119. containerPath: "/mock/so/device_7.so"
  120. options:
  121. - ro
  122. - nosuid
  123. - nodev
  124. - bind
  125. containerEdits:
  126. mounts:
  127. - hostPath: "/mock/bin/list_device.sh"
  128. containerPath: "/usr/local/bin/list_device.sh"
  129. options:
  130. - ro
  131. - nosuid
  132. - nodev
  133. - bind

这里我只是简单示例,还有 hooks 和 env 等用法查看官方文档。

部署 pod

我部署一个 pod 使用 mock gpu 这个资源 4个。

  1. apiVersion: v1
  2. kind: Pod
  3. metadata:
  4. name: ubuntu1
  5. spec:
  6. containers:
  7. - name: ubuntu-container
  8. image: ubuntu:latest
  9. command: ["sleep"]
  10. args: ["3600"]
  11. resources:
  12. requests:
  13. memory: "64Mi"
  14. cpu: "250m"
  15. mock.com/gpu: "4"
  16. limits:
  17. memory: "128Mi"
  18. cpu: "500m"
  19. mock.com/gpu: "4"
  1. ubuntu1 1/1 Running 0 49s

现在我们 使用 kubectl exec -it ubuntu1 bash 进入容器看一看。

  1. ls /dev/mock/
  2. device_0 device_1 device_6 device_7
  3. ls /mock/so/
  4. device_0.so device_1.so device_6.so device_7.so
  5. list_device.sh
  6. so
  7. device_0 device_1 device_6 device_7

可以看到我们 cdi 配置文件配置的 device 和 so 文件和还有我们的 list_device.sh 都已经挂载到容器内了。

我现在再启动一个 pod 使用 mock gpu 这个资源 3 个。

  1. apiVersion: v1
  2. kind: Pod
  3. metadata:
  4. name: ubuntu2
  5. spec:
  6. containers:
  7. - name: ubuntu-container
  8. image: ubuntu:latest
  9. command: ["sleep"]
  10. args: ["3600"]
  11. resources:
  12. requests:
  13. memory: "64Mi"
  14. cpu: "250m"
  15. mock.com/gpu: "3"
  16. limits:
  17. memory: "128Mi"
  18. cpu: "500m"
  19. mock.com/gpu: "3"
  1. ls /dev/mock/
  2. device_2 device_3 device_5

查看node使用了多少资源

  1. Allocated resources:
  2. (Total limits may be over 100 percent, i.e., overcommitted.)
  3. Resource Requests Limits
  4. -------- -------- ------
  5. cpu 1550m (19%) 1 (12%)
  6. memory 668Mi (2%) 596Mi (2%)
  7. ephemeral-storage 0 (0%) 0 (0%)
  8. hugepages-1Gi 0 (0%) 0 (0%)
  9. hugepages-2Mi 0 (0%) 0 (0%)
  10. mock.com/gpu 7 7

可以看到我们使用了 7 个 mock.com/gpu 资源,还剩下 1 个。

nvdia gpu

我找了一台带有 nvidia gpu 的机器,然后安装了 nvidia-container-toolkit-base。

使用 nvidia-ctk cdi generate --output=/etc/cdi/nvidia.yaml 生产 cdi 配置文件。

  1. ---
  2. cdiVersion: 0.5.0
  3. containerEdits:
  4. deviceNodes:
  5. - path: /dev/nvidia-modeset
  6. - path: /dev/nvidia-uvm
  7. - path: /dev/nvidia-uvm-tools
  8. - path: /dev/nvidiactl
  9. hooks:
  10. - args:
  11. - nvidia-ctk
  12. - hook
  13. - create-symlinks
  14. - --link
  15. - libglxserver_nvidia.so.525.147.05::/usr/lib/x86_64-linux-gnu/nvidia/xorg/libglxserver_nvidia.so
  16. hookName: createContainer
  17. path: /usr/bin/nvidia-ctk
  18. - args:
  19. - nvidia-ctk
  20. - hook
  21. - update-ldcache
  22. - --folder
  23. - /usr/lib/x86_64-linux-gnu
  24. hookName: createContainer
  25. path: /usr/bin/nvidia-ctk
  26. mounts:
  27. - containerPath: /run/nvidia-persistenced/socket
  28. hostPath: /run/nvidia-persistenced/socket
  29. options:
  30. - ro
  31. - nosuid
  32. - nodev
  33. - bind
  34. - noexec
  35. - containerPath: /usr/bin/nvidia-cuda-mps-control
  36. hostPath: /usr/bin/nvidia-cuda-mps-control
  37. options:
  38. - ro
  39. - nosuid
  40. - nodev
  41. - bind
  42. - containerPath: /usr/bin/nvidia-cuda-mps-server
  43. hostPath: /usr/bin/nvidia-cuda-mps-server
  44. options:
  45. - ro
  46. - nosuid
  47. - nodev
  48. - bind
  49. - containerPath: /usr/bin/nvidia-debugdump
  50. hostPath: /usr/bin/nvidia-debugdump
  51. options:
  52. - ro
  53. - nosuid
  54. - nodev
  55. - bind
  56. - containerPath: /usr/bin/nvidia-persistenced
  57. hostPath: /usr/bin/nvidia-persistenced
  58. options:
  59. - ro
  60. - nosuid
  61. - nodev
  62. - bind
  63. - containerPath: /usr/bin/nvidia-smi
  64. hostPath: /usr/bin/nvidia-smi
  65. options:
  66. - ro
  67. - nosuid
  68. - nodev
  69. - bind
  70. - containerPath: /usr/lib/x86_64-linux-gnu/libEGL_nvidia.so.525.147.05
  71. hostPath: /usr/lib/x86_64-linux-gnu/libEGL_nvidia.so.525.147.05
  72. options:
  73. - ro
  74. - nosuid
  75. - nodev
  76. - bind
  77. - containerPath: /usr/lib/x86_64-linux-gnu/libGLESv1_CM_nvidia.so.525.147.05
  78. hostPath: /usr/lib/x86_64-linux-gnu/libGLESv1_CM_nvidia.so.525.147.05
  79. options:
  80. - ro
  81. - nosuid
  82. - nodev
  83. - bind
  84. - containerPath: /usr/lib/x86_64-linux-gnu/libGLESv2_nvidia.so.525.147.05
  85. hostPath: /usr/lib/x86_64-linux-gnu/libGLESv2_nvidia.so.525.147.05
  86. options:
  87. - ro
  88. - nosuid
  89. - nodev
  90. - bind
  91. - containerPath: /usr/lib/x86_64-linux-gnu/libGLX_nvidia.so.525.147.05
  92. hostPath: /usr/lib/x86_64-linux-gnu/libGLX_nvidia.so.525.147.05
  93. options:
  94. - ro
  95. - nosuid
  96. - nodev
  97. - bind
  98. - containerPath: /usr/lib/x86_64-linux-gnu/libcuda.so.525.147.05
  99. hostPath: /usr/lib/x86_64-linux-gnu/libcuda.so.525.147.05
  100. options:
  101. - ro
  102. - nosuid
  103. - nodev
  104. - bind
  105. - containerPath: /usr/lib/x86_64-linux-gnu/libcudadebugger.so.525.147.05
  106. hostPath: /usr/lib/x86_64-linux-gnu/libcudadebugger.so.525.147.05
  107. options:
  108. - ro
  109. - nosuid
  110. - nodev
  111. - bind
  112. - containerPath: /usr/lib/x86_64-linux-gnu/libnvcuvid.so.525.147.05
  113. hostPath: /usr/lib/x86_64-linux-gnu/libnvcuvid.so.525.147.05
  114. options:
  115. - ro
  116. - nosuid
  117. - nodev
  118. - bind
  119. - containerPath: /usr/lib/x86_64-linux-gnu/libnvidia-allocator.so.525.147.05
  120. hostPath: /usr/lib/x86_64-linux-gnu/libnvidia-allocator.so.525.147.05
  121. options:
  122. - ro
  123. - nosuid
  124. - nodev
  125. - bind
  126. - containerPath: /usr/lib/x86_64-linux-gnu/libnvidia-cfg.so.525.147.05
  127. hostPath: /usr/lib/x86_64-linux-gnu/libnvidia-cfg.so.525.147.05
  128. options:
  129. - ro
  130. - nosuid
  131. - nodev
  132. - bind
  133. - containerPath: /usr/lib/x86_64-linux-gnu/libnvidia-compiler.so.525.147.05
  134. hostPath: /usr/lib/x86_64-linux-gnu/libnvidia-compiler.so.525.147.05
  135. options:
  136. - ro
  137. - nosuid
  138. - nodev
  139. - bind
  140. - containerPath: /usr/lib/x86_64-linux-gnu/libnvidia-egl-gbm.so.1.1.0
  141. hostPath: /usr/lib/x86_64-linux-gnu/libnvidia-egl-gbm.so.1.1.0
  142. options:
  143. - ro
  144. - nosuid
  145. - nodev
  146. - bind
  147. - containerPath: /usr/lib/x86_64-linux-gnu/libnvidia-eglcore.so.525.147.05
  148. hostPath: /usr/lib/x86_64-linux-gnu/libnvidia-eglcore.so.525.147.05
  149. options:
  150. - ro
  151. - nosuid
  152. - nodev
  153. - bind
  154. - containerPath: /usr/lib/x86_64-linux-gnu/libnvidia-encode.so.525.147.05
  155. hostPath: /usr/lib/x86_64-linux-gnu/libnvidia-encode.so.525.147.05
  156. options:
  157. - ro
  158. - nosuid
  159. - nodev
  160. - bind
  161. - containerPath: /usr/lib/x86_64-linux-gnu/libnvidia-fbc.so.525.147.05
  162. hostPath: /usr/lib/x86_64-linux-gnu/libnvidia-fbc.so.525.147.05
  163. options:
  164. - ro
  165. - nosuid
  166. - nodev
  167. - bind
  168. - containerPath: /usr/lib/x86_64-linux-gnu/libnvidia-glcore.so.525.147.05
  169. hostPath: /usr/lib/x86_64-linux-gnu/libnvidia-glcore.so.525.147.05
  170. options:
  171. - ro
  172. - nosuid
  173. - nodev
  174. - bind
  175. - containerPath: /usr/lib/x86_64-linux-gnu/libnvidia-glsi.so.525.147.05
  176. hostPath: /usr/lib/x86_64-linux-gnu/libnvidia-glsi.so.525.147.05
  177. options:
  178. - ro
  179. - nosuid
  180. - nodev
  181. - bind
  182. - containerPath: /usr/lib/x86_64-linux-gnu/libnvidia-glvkspirv.so.525.147.05
  183. hostPath: /usr/lib/x86_64-linux-gnu/libnvidia-glvkspirv.so.525.147.05
  184. options:
  185. - ro
  186. - nosuid
  187. - nodev
  188. - bind
  189. - containerPath: /usr/lib/x86_64-linux-gnu/libnvidia-ml.so.525.147.05
  190. hostPath: /usr/lib/x86_64-linux-gnu/libnvidia-ml.so.525.147.05
  191. options:
  192. - ro
  193. - nosuid
  194. - nodev
  195. - bind
  196. - containerPath: /usr/lib/x86_64-linux-gnu/libnvidia-ngx.so.525.147.05
  197. hostPath: /usr/lib/x86_64-linux-gnu/libnvidia-ngx.so.525.147.05
  198. options:
  199. - ro
  200. - nosuid
  201. - nodev
  202. - bind
  203. - containerPath: /usr/lib/x86_64-linux-gnu/libnvidia-nvvm.so.525.147.05
  204. hostPath: /usr/lib/x86_64-linux-gnu/libnvidia-nvvm.so.525.147.05
  205. options:
  206. - ro
  207. - nosuid
  208. - nodev
  209. - bind
  210. - containerPath: /usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.525.147.05
  211. hostPath: /usr/lib/x86_64-linux-gnu/libnvidia-opencl.so.525.147.05
  212. options:
  213. - ro
  214. - nosuid
  215. - nodev
  216. - bind
  217. - containerPath: /usr/lib/x86_64-linux-gnu/libnvidia-opticalflow.so.525.147.05
  218. hostPath: /usr/lib/x86_64-linux-gnu/libnvidia-opticalflow.so.525.147.05
  219. options:
  220. - ro
  221. - nosuid
  222. - nodev
  223. - bind
  224. - containerPath: /usr/lib/x86_64-linux-gnu/libnvidia-ptxjitcompiler.so.525.147.05
  225. hostPath: /usr/lib/x86_64-linux-gnu/libnvidia-ptxjitcompiler.so.525.147.05
  226. options:
  227. - ro
  228. - nosuid
  229. - nodev
  230. - bind
  231. - containerPath: /usr/lib/x86_64-linux-gnu/libnvidia-rtcore.so.525.147.05
  232. hostPath: /usr/lib/x86_64-linux-gnu/libnvidia-rtcore.so.525.147.05
  233. options:
  234. - ro
  235. - nosuid
  236. - nodev
  237. - bind
  238. - containerPath: /usr/lib/x86_64-linux-gnu/libnvidia-tls.so.525.147.05
  239. hostPath: /usr/lib/x86_64-linux-gnu/libnvidia-tls.so.525.147.05
  240. options:
  241. - ro
  242. - nosuid
  243. - nodev
  244. - bind
  245. - containerPath: /usr/lib/x86_64-linux-gnu/libnvoptix.so.525.147.05
  246. hostPath: /usr/lib/x86_64-linux-gnu/libnvoptix.so.525.147.05
  247. options:
  248. - ro
  249. - nosuid
  250. - nodev
  251. - bind
  252. - containerPath: /lib/firmware/nvidia/525.147.05/gsp_ad10x.bin
  253. hostPath: /lib/firmware/nvidia/525.147.05/gsp_ad10x.bin
  254. options:
  255. - ro
  256. - nosuid
  257. - nodev
  258. - bind
  259. - containerPath: /lib/firmware/nvidia/525.147.05/gsp_tu10x.bin
  260. hostPath: /lib/firmware/nvidia/525.147.05/gsp_tu10x.bin
  261. options:
  262. - ro
  263. - nosuid
  264. - nodev
  265. - bind
  266. - containerPath: /usr/share/X11/xorg.conf.d/10-nvidia.conf
  267. hostPath: /usr/share/X11/xorg.conf.d/10-nvidia.conf
  268. options:
  269. - ro
  270. - nosuid
  271. - nodev
  272. - bind
  273. - containerPath: /usr/share/egl/egl_external_platform.d/15_nvidia_gbm.json
  274. hostPath: /usr/share/egl/egl_external_platform.d/15_nvidia_gbm.json
  275. options:
  276. - ro
  277. - nosuid
  278. - nodev
  279. - bind
  280. - containerPath: /usr/share/glvnd/egl_vendor.d/10_nvidia.json
  281. hostPath: /usr/share/glvnd/egl_vendor.d/10_nvidia.json
  282. options:
  283. - ro
  284. - nosuid
  285. - nodev
  286. - bind
  287. - containerPath: /usr/share/vulkan/icd.d/nvidia_icd.json
  288. hostPath: /usr/share/vulkan/icd.d/nvidia_icd.json
  289. options:
  290. - ro
  291. - nosuid
  292. - nodev
  293. - bind
  294. - containerPath: /usr/lib/x86_64-linux-gnu/nvidia/xorg/libglxserver_nvidia.so.525.147.05
  295. hostPath: /usr/lib/x86_64-linux-gnu/nvidia/xorg/libglxserver_nvidia.so.525.147.05
  296. options:
  297. - ro
  298. - nosuid
  299. - nodev
  300. - bind
  301. - containerPath: /usr/lib/x86_64-linux-gnu/nvidia/xorg/nvidia_drv.so
  302. hostPath: /usr/lib/x86_64-linux-gnu/nvidia/xorg/nvidia_drv.so
  303. options:
  304. - ro
  305. - nosuid
  306. - nodev
  307. - bind
  308. devices:
  309. - containerEdits:
  310. deviceNodes:
  311. - path: /dev/nvidia0
  312. - path: /dev/dri/card0
  313. - path: /dev/dri/renderD128
  314. hooks:
  315. - args:
  316. - nvidia-ctk
  317. - hook
  318. - create-symlinks
  319. - --link
  320. - ../card0::/dev/dri/by-path/pci-0000:01:00.0-card
  321. - --link
  322. - ../renderD128::/dev/dri/by-path/pci-0000:01:00.0-render
  323. hookName: createContainer
  324. path: /usr/bin/nvidia-ctk
  325. - args:
  326. - nvidia-ctk
  327. - hook
  328. - chmod
  329. - --mode
  330. - "755"
  331. - --path
  332. - /dev/dri
  333. hookName: createContainer
  334. path: /usr/bin/nvidia-ctk
  335. name: "0"
  336. - containerEdits:
  337. deviceNodes:
  338. - path: /dev/nvidia0
  339. - path: /dev/dri/card0
  340. - path: /dev/dri/renderD128
  341. hooks:
  342. - args:
  343. - nvidia-ctk
  344. - hook
  345. - create-symlinks
  346. - --link
  347. - ../card0::/dev/dri/by-path/pci-0000:01:00.0-card
  348. - --link
  349. - ../renderD128::/dev/dri/by-path/pci-0000:01:00.0-render
  350. hookName: createContainer
  351. path: /usr/bin/nvidia-ctk
  352. - args:
  353. - nvidia-ctk
  354. - hook
  355. - chmod
  356. - --mode
  357. - "755"
  358. - --path
  359. - /dev/dri
  360. hookName: createContainer
  361. path: /usr/bin/nvidia-ctk
  362. name: all
  363. kind: nvidia.com/gpu

可以看到 nvidia 的 cdi 配置文件比 mock 的要复杂很多,因为 nvidia 的 gpu 需要绑定很多文件到容器内,还有 hooks 等。这些工作之前都是在 runtime 中做的,现在都可以通过 cdi 插件来做了。

原文链接:https://www.cnblogs.com/haiyux/p/17842489.html

 友情链接:直通硅谷  点职佳  北美留学生论坛

本站QQ群:前端 618073944 | Java 606181507 | Python 626812652 | C/C++ 612253063 | 微信 634508462 | 苹果 692586424 | C#/.net 182808419 | PHP 305140648 | 运维 608723728

W3xue 的所有内容仅供测试,对任何法律问题及风险不承担任何责任。通过使用本站内容随之而来的风险与本站无关。
关于我们  |  意见建议  |  捐助我们  |  报错有奖  |  广告合作、友情链接(目前9元/月)请联系QQ:27243702 沸活量
皖ICP备17017327号-2 皖公网安备34020702000426号