Nvidia GPU not showing for apps

truenas% nvidia-smi
Thu Jul  4 13:58:48 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 545.23.08              Driver Version: 545.23.08    CUDA Version: 12.3     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|=========================================+======================+======================|
|   0  Quadro P2000                   Off | 00000000:04:00.0 Off |                  N/A |
| 49%   39C    P8               6W /  75W |      0MiB /  5120MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                                         
+---------------------------------------------------------------------------------------+
| Processes:                                                                            |
|  GPU   GI   CI        PID   Type   Process name                            GPU Memory |
|        ID   ID                                                             Usage      |
|=======================================================================================|
|  No running processes found                                                           |
+---------------------------------------------------------------------------------------+
truenas% 

And

truenas% sudo k3s kubectl get pods -A
[sudo] password for jgreen: 
NAMESPACE      NAME                                   READY   STATUS             RESTARTS        AGE
ix-tailscale   tailscale-78fcb6bc64-hfk98             0/1     Completed          0               23h
ix-sonarr      sonarr-6995b49df7-flsct                0/1     Completed          0               23h
kube-system    snapshot-controller-546868dfb4-zhnc7   0/1     Error              0               29h
kube-system    csi-smb-controller-7fbbb8fb6f-57fp2    3/3     Running            3 (4h2m ago)    29h
kube-system    csi-smb-node-shws2                     3/3     Running            3 (4h2m ago)    29h
kube-system    csi-nfs-node-bfsbs                     3/3     Running            3 (4h2m ago)    29h
kube-system    csi-nfs-controller-7b74694749-6hqgx    4/4     Running            4 (4h2m ago)    29h
kube-system    snapshot-controller-546868dfb4-88glz   1/1     Running            1 (4h2m ago)    29h
kube-system    coredns-59b4f5bbd5-x9bs7               1/1     Running            1 (4h2m ago)    29h
kube-system    snapshot-controller-546868dfb4-jdmw9   1/1     Running            0               4h2m
ix-metube      metube-5fd77d68f4-d76vj                1/1     Running            1 (4h2m ago)    22h
ix-sonarr      sonarr-6995b49df7-vkhzg                1/1     Running            0               4h2m
ix-tailscale   tailscale-78fcb6bc64-plnj2             1/1     Running            0               4h2m
ix-plex        plex-94cf4454c-5dpjt                   1/1     Running            0               3h58m
kube-system    nvidia-device-plugin-daemonset-k58cb   0/1     CrashLoopBackOff   41 (112s ago)   4h1m
truenas% 

Clearly a container handling the nvidia driver (?) is repeatedly crashing, as others are reporting.

Log file:

truenas% sudo cat 43.log 
2024-07-04T14:10:24.134548872+01:00 stderr F 2024/07/04 13:10:24 Starting FS watcher.
2024-07-04T14:10:24.134588677+01:00 stderr F 2024/07/04 13:10:24 Starting OS watcher.
2024-07-04T14:10:24.134790434+01:00 stderr F 2024/07/04 13:10:24 Starting Plugins.
2024-07-04T14:10:24.134800283+01:00 stderr F 2024/07/04 13:10:24 Loading configuration.
2024-07-04T14:10:24.135017028+01:00 stderr F 2024/07/04 13:10:24 Updating config with default resource matching patterns.
2024-07-04T14:10:24.135037987+01:00 stderr F 2024/07/04 13:10:24 
2024-07-04T14:10:24.135056232+01:00 stderr F Running with config:
2024-07-04T14:10:24.135066391+01:00 stderr F {
2024-07-04T14:10:24.135076219+01:00 stderr F   "version": "v1",
2024-07-04T14:10:24.135087029+01:00 stderr F   "flags": {
2024-07-04T14:10:24.135096196+01:00 stderr F     "migStrategy": "none",
2024-07-04T14:10:24.135105043+01:00 stderr F     "failOnInitError": true,
2024-07-04T14:10:24.135113879+01:00 stderr F     "nvidiaDriverRoot": "/",
2024-07-04T14:10:24.135123808+01:00 stderr F     "gdsEnabled": false,
2024-07-04T14:10:24.135138876+01:00 stderr F     "mofedEnabled": false,
2024-07-04T14:10:24.135149125+01:00 stderr F     "plugin": {
2024-07-04T14:10:24.135158112+01:00 stderr F       "passDeviceSpecs": false,
2024-07-04T14:10:24.135167009+01:00 stderr F       "deviceListStrategy": "envvar",
2024-07-04T14:10:24.135175845+01:00 stderr F       "deviceIDStrategy": "uuid"
2024-07-04T14:10:24.135184622+01:00 stderr F     }
2024-07-04T14:10:24.135193508+01:00 stderr F   },
2024-07-04T14:10:24.135202285+01:00 stderr F   "resources": {
2024-07-04T14:10:24.135217884+01:00 stderr F     "gpus": [
2024-07-04T14:10:24.135227141+01:00 stderr F       {
2024-07-04T14:10:24.135235968+01:00 stderr F         "pattern": "*",
2024-07-04T14:10:24.135244774+01:00 stderr F         "name": "nvidia.com/gpu"
2024-07-04T14:10:24.135253881+01:00 stderr F       }
2024-07-04T14:10:24.135262708+01:00 stderr F     ]
2024-07-04T14:10:24.135271574+01:00 stderr F   },
2024-07-04T14:10:24.135280361+01:00 stderr F   "sharing": {
2024-07-04T14:10:24.13529585+01:00 stderr F     "timeSlicing": {
2024-07-04T14:10:24.135304877+01:00 stderr F       "resources": [
2024-07-04T14:10:24.135313713+01:00 stderr F         {
2024-07-04T14:10:24.13532255+01:00 stderr F           "name": "nvidia.com/gpu",
2024-07-04T14:10:24.135331366+01:00 stderr F           "devices": "all",
2024-07-04T14:10:24.135340163+01:00 stderr F           "replicas": 5
2024-07-04T14:10:24.135348959+01:00 stderr F         }
2024-07-04T14:10:24.135357746+01:00 stderr F       ]
2024-07-04T14:10:24.135374357+01:00 stderr F     }
2024-07-04T14:10:24.135383364+01:00 stderr F   }
2024-07-04T14:10:24.1353922+01:00 stderr F }
2024-07-04T14:10:24.135401107+01:00 stderr F 2024/07/04 13:10:24 Retreiving plugins.
2024-07-04T14:10:24.135415985+01:00 stderr F 2024/07/04 13:10:24 Detected NVML platform: found NVML library
2024-07-04T14:10:24.135431534+01:00 stderr F 2024/07/04 13:10:24 Detected non-Tegra platform: /sys/devices/soc0/family file not found
2024-07-04T14:10:24.672101541+01:00 stderr F 2024/07/04 13:10:24 Starting GRPC server for 'nvidia.com/gpu'
2024-07-04T14:10:24.672216446+01:00 stderr F 2024/07/04 13:10:24 Starting to serve 'nvidia.com/gpu' on /var/lib/kubelet/device-plugins/nvidia-gpu.sock
2024-07-04T14:10:24.673629289+01:00 stderr F 2024/07/04 13:10:24 Registered device plugin for 'nvidia.com/gpu' with Kubelet
2024-07-04T14:11:40.828161212+01:00 stderr F 2024/07/04 13:11:40 Received signal "terminated", shutting down.
2024-07-04T14:11:40.82820824+01:00 stderr F 2024/07/04 13:11:40 Stopping plugins.
2024-07-04T14:11:40.828235941+01:00 stderr F 2024/07/04 13:11:40 Stopping to serve 'nvidia.com/gpu' on /var/lib/kubelet/device-plugins/nvidia-gpu.sock