Just curious why ipmitool says the CPU Temp is 60C and TrueNAS dashboard says CPU is 50C?
BTW, is there any fan control built in to Goldeye? I see freeipmi is installed. I created a python script and added to systemd, but I’m wondering what is considered the most suitable way to accomplish fan control. How much can I get done from the GUI? I had to remove the included GPU fans, and use off card cooling to fit it in the case.
Maybe starting here:
#!/usr/bin/env python3
import os
import subprocess
import time
import syslog
import re
# Set your desired temperature range and minimum fan speed
MIN_CPU_TEMP = 45
MAX_CPU_TEMP = 65
MIN_GPU_TEMP = 45
MAX_GPU_TEMP = 65
MIN_FAN_SPEED = 40 # Sets an initial fan speed of 40%
current_cpu_fan_speed = MIN_FAN_SPEED
current_gpu_fan_speed = MIN_FAN_SPEED
# Get the current GPU temperature
def get_gpu_temperature():
try:
output = subprocess.check_output(["nvidia-smi", "--query-gpu=temperature.gpu", "--format=csv,noheader"])
temp = int(output.decode("utf-8").strip())
return temp
except Exception as e:
print(f"Error: {e}")
return None
# Get the current CPU temperature
def get_cpu_temperature():
temp_output = subprocess.check_output("ipmitool sdr type temperature", shell=True).decode()
cpu_temp_lines = [line for line in temp_output.split("\n") if "CPU" in line and "degrees" in line]
if cpu_temp_lines:
cpu_temps = [int(re.search(r'\d+(?= degrees)', line).group()) for line in cpu_temp_lines if re.search(r'\d+(?= degrees)', line)]
avg_cpu_temp = sum(cpu_temps) // len(cpu_temps)
return avg_cpu_temp
else:
print("Failed to retrieve CPU temperature.")
return None
# IPMI tool command to set the fan control mode to manual (Full)
os.system("ipmitool raw 0x30 0x45 0x01 0x01")
time.sleep(2)
# Set the CPU fan speed
def set_cpu_fan_speed(cpu_fan_speed):
global current_cpu_fan_speed
current_cpu_fan_speed = cpu_fan_speed
# Convert to hexadecimal using hex() function
cpu_hex_speed = hex(cpu_fan_speed)
# Set the fan speed for CPU
os.system(f"ipmitool raw 0x30 0x70 0x66 0x01 0x00 {cpu_hex_speed}")
time.sleep(2)
# Log the fan speed change to syslog
syslog.syslog(syslog.LOG_INFO, f"CPU fan speed adjusted to {cpu_fan_speed}%")
# Print the fan speed change to console
print(f"CPU fan speed adjusted to {cpu_fan_speed}% - {cpu_hex_speed}")
# Set the GPU fan speed
def set_gpu_fan_speed(gpu_fan_speed):
global current_gpu_fan_speed
current_gpu_fan_speed = gpu_fan_speed
# Convert to hexadecimal using hex() function
gpu_hex_speed = hex(gpu_fan_speed)
# Set the fan speed for GPU
os.system(f"ipmitool raw 0x30 0x70 0x66 0x01 0x01 {gpu_hex_speed}")
time.sleep(2)
# Log the fan speed change to syslog
syslog.syslog(syslog.LOG_INFO, f"GPU fan speed adjusted to {gpu_fan_speed}%")
# Print the fan speed change to console
print(f"GPU fan speed adjusted to {gpu_fan_speed}% - {gpu_hex_speed}")
# Set initial minimum fan speeds
set_cpu_fan_speed(MIN_FAN_SPEED)
set_gpu_fan_speed(MIN_FAN_SPEED)
while True:
cpu_temp = get_cpu_temperature()
gpu_temp = get_gpu_temperature()
# Print the current temperatures to console
print(f"Current CPU temperature: {cpu_temp}°C")
print(f"Current GPU temperature: {gpu_temp}°C")
if cpu_temp > MAX_CPU_TEMP and current_cpu_fan_speed < 100:
# Increase the fan speed by 10% to cool down the CPU
new_cpu_fan_speed = min(current_cpu_fan_speed + 10, 100)
set_cpu_fan_speed(new_cpu_fan_speed)
elif cpu_temp < MIN_CPU_TEMP and current_cpu_fan_speed > MIN_FAN_SPEED:
# Decrease the fan speed by 1% if the temperature is below the minimum threshold
new_cpu_fan_speed = max(current_cpu_fan_speed - 1, MIN_FAN_SPEED)
set_cpu_fan_speed(new_cpu_fan_speed)
if gpu_temp > MAX_GPU_TEMP and current_gpu_fan_speed < 100:
# Increase the fan speed by 10% to cool down the GPU
new_gpu_fan_speed = min(current_gpu_fan_speed + 10, 100)
set_gpu_fan_speed(new_gpu_fan_speed)
elif gpu_temp < MIN_GPU_TEMP and current_gpu_fan_speed > MIN_FAN_SPEED:
# Decrease the fan speed by 1% if the temperature is below the minimum threshold
new_gpu_fan_speed = max(current_gpu_fan_speed - 1, MIN_FAN_SPEED)
set_gpu_fan_speed(new_gpu_fan_speed)
# Wait for 60 seconds before checking the temperature again
time.sleep(60)