diff --git a/ccminer.cpp b/ccminer.cpp index b7f182d0..dc69ea3d 100644 --- a/ccminer.cpp +++ b/ccminer.cpp @@ -71,6 +71,7 @@ void cuda_devicereset(); int cuda_finddevice(char *name); void cuda_print_devices(); void cuda_get_device_sm(); +void cuda_reset_device(int thr_id, bool *init); #include "nvml.h" #ifdef USE_WRAPNVML @@ -90,6 +91,11 @@ struct workio_cmd { } u; }; +bool opt_debug_diff = false; +bool opt_debug_threads = false; +bool opt_showdiff = true; +bool opt_hwmonitor = true; + static const char *algo_names[] = { "bitcoin", "blake", @@ -149,6 +155,7 @@ static json_t *opt_config = nullptr; static const bool opt_time = true; enum sha_algos opt_algo; int opt_n_threads = 0; +int gpu_threads = 1; int opt_affinity = -1; int opt_priority = 0; static double opt_difficulty = 1; // CH @@ -156,14 +163,21 @@ static bool opt_extranonce = true; bool opt_trust_pool = false; int num_cpus; int active_gpus; +bool need_nvsettings = false; +bool need_memclockrst = false; char * device_name[MAX_GPUS] = { nullptr }; int device_map[MAX_GPUS] = { 0 }; long device_sm[MAX_GPUS] = { 0 }; uint32_t gpus_intensity[MAX_GPUS] = {0}; +int32_t device_mem_offsets[MAX_GPUS] = {0}; uint32_t device_gpu_clocks[MAX_GPUS] = {0}; uint32_t device_mem_clocks[MAX_GPUS] = {0}; uint32_t device_plimit[MAX_GPUS] = {0}; int8_t device_pstate[MAX_GPUS]; +int32_t device_led[MAX_GPUS] = {-1, -1}; +int opt_led_mode = 0; +int opt_cudaschedule = -1; +uint8_t device_tlimit[MAX_GPUS] = {0}; char *rpc_user = NULL; static char *rpc_url = nullptr; static char *rpc_userpass = nullptr; @@ -179,16 +193,17 @@ int longpoll_thr_id = -1; int stratum_thr_id = -1; int api_thr_id = -1; bool stratum_need_reset = false; +volatile bool abort_flag = false; struct work_restart *work_restart = NULL; struct stratum_ctx stratum = { 0 }; bool stop_mining = false; volatile bool mining_has_stopped[MAX_GPUS]; pthread_mutex_t applog_lock = PTHREAD_MUTEX_INITIALIZER; -static pthread_mutex_t stats_lock = PTHREAD_MUTEX_INITIALIZER; +pthread_mutex_t stats_lock = PTHREAD_MUTEX_INITIALIZER; uint32_t accepted_count = 0L; uint32_t rejected_count = 0L; -static double thr_hashrates[MAX_GPUS]; +double thr_hashrates[MAX_GPUS]; uint64_t global_hashrate = 0; double global_diff = 0.0; uint64_t net_hashrate = 0; @@ -291,8 +306,8 @@ Options:\n\ "\ --mem-clock=N Set the gpu memory max clock (346.72+ driver)\n\ --gpu-clock=N Set the gpu engine max clock (346.72+ driver)\n\ - --pstate=N Set the gpu power state (352.21+ driver)\n\ - --plimit=N Set the gpu power limit(352.21 + driver)\n" + --pstate=N (not for 10xx cards) Set the gpu power state (352.21+ driver)\n\ + --plimit=N Set the gpu power limit (352.21+ driver)\n" #endif ""; @@ -463,6 +478,26 @@ void proper_exit(int reason) #ifdef WIN32 timeEndPeriod(1); #endif +#ifdef USE_WRAPNVML + if(hnvml) + { + for(int n = 0; n < opt_n_threads; n++) + { + nvml_reset_clocks(hnvml, device_map[n]); + } + nvml_destroy(hnvml); + } + if(need_memclockrst) + { +# ifdef WIN32 + for(int n = 0; n < opt_n_threads; n++) + { + nvapi_toggle_clocks(n, false); + } +# endif + } +#endif + sleep(1); exit(reason); } @@ -2538,7 +2573,11 @@ static void parse_arg(int key, char *arg) while(pch != NULL && n < MAX_GPUS) { int dev_id = device_map[n++]; - device_mem_clocks[dev_id] = atoi(pch); + if(*pch == '+' || *pch == '-') + device_mem_offsets[dev_id] = atoi(pch); + else + device_mem_clocks[dev_id] = atoi(pch); + need_nvsettings = true; pch = strtok(NULL, ","); } } @@ -2804,28 +2843,6 @@ int main(int argc, char *argv[]) cuda_devicenames(); -#ifdef USE_WRAPNVML -#if defined(__linux__) || defined(_WIN64) - /* nvml is currently not the best choice on Windows (only in x64) */ - hnvml = nvml_create(); - if(hnvml) - { - bool gpu_reinit = false;// (opt_cudaschedule >= 0); - cuda_devicenames(); // refresh gpu vendor name - applog(LOG_INFO, "NVML GPU monitoring enabled."); - } -#endif -#ifdef WIN32 - if(!hnvml && nvapi_init() == 0) - { - applog(LOG_INFO, "NVAPI GPU monitoring enabled."); - cuda_devicenames(); // refresh gpu vendor name - } -#endif - else if(!hnvml) - applog(LOG_INFO, "GPU monitoring is not available."); -#endif - if(opt_protocol) { curl_version_info_data *info; @@ -3025,26 +3042,63 @@ int main(int argc, char *argv[]) tq_push(thr_info[stratum_thr_id].q, strdup(rpc_url)); } + +#ifdef __linux__ + if(need_nvsettings) + { + if(nvs_init() < 0) + need_nvsettings = false; + } +#endif + #ifdef USE_WRAPNVML #if defined(__linux__) || defined(_WIN64) /* nvml is currently not the best choice on Windows (only in x64) */ - if (hnvml) { - bool gpu_reinit = false;// (opt_cudaschedule >= 0); + hnvml = nvml_create(); + if(hnvml) + { + bool gpu_reinit = (opt_cudaschedule >= 0); //false + cuda_devicenames(); // refresh gpu vendor name + if(!opt_quiet) + applog(LOG_INFO, "NVML GPU monitoring enabled."); for(int n = 0; n < active_gpus; n++) { if(nvml_set_pstate(hnvml, device_map[n]) == 1) gpu_reinit = true; if(nvml_set_plimit(hnvml, device_map[n]) == 1) gpu_reinit = true; - if(nvml_set_clocks(hnvml, device_map[n]) == 1) + if(!is_windows() && nvml_set_clocks(hnvml, device_map[n]) == 1) gpu_reinit = true; if(gpu_reinit) { -// cuda_reset_device(n, NULL); + cuda_reset_device(n, NULL); } } } #endif +#ifdef WIN32 + if(nvapi_init() == 0) + { + if(!opt_quiet) + applog(LOG_INFO, "NVAPI GPU monitoring enabled."); + if(!hnvml) + { + cuda_devicenames(); // refresh gpu vendor name + } + nvapi_init_settings(); + } +#endif + else if(!hnvml && !opt_quiet) + applog(LOG_INFO, "GPU monitoring is not available."); + + // force reinit to set default device flags + if(opt_cudaschedule >= 0 && !hnvml) + { + for(int n = 0; n < active_gpus; n++) + { + cuda_reset_device(n, NULL); + } + } #endif if(opt_api_listen) diff --git a/ccminer.vcxproj b/ccminer.vcxproj index fe8582ee..8a62ede9 100644 --- a/ccminer.vcxproj +++ b/ccminer.vcxproj @@ -285,6 +285,7 @@ + diff --git a/ccminer.vcxproj.filters b/ccminer.vcxproj.filters index 55ee3a64..caa2c361 100644 --- a/ccminer.vcxproj.filters +++ b/ccminer.vcxproj.filters @@ -207,6 +207,9 @@ Source Files + + Source Files + diff --git a/cuda.cpp b/cuda.cpp index 4d26f83d..b65ca872 100644 --- a/cuda.cpp +++ b/cuda.cpp @@ -281,3 +281,11 @@ double throughput2intensity(uint32_t throughput) } return intensity; } + +void cuda_reset_device(int thr_id, bool *init) +{ + int dev_id = device_map[thr_id]; + cudaSetDevice(dev_id); + cudaDeviceReset(); + cudaDeviceSynchronize(); +} diff --git a/miner.h b/miner.h index 66575b31..2e5dbce6 100644 --- a/miner.h +++ b/miner.h @@ -75,9 +75,11 @@ void *alloca (size_t); #ifdef HAVE_SYSLOG_H #include -#define LOG_BLUE 0x10 /* unique value */ +#define LOG_BLUE 0x10 +#define LOG_RAW 0x99 #else -enum { +enum +{ LOG_ERR, LOG_WARNING, LOG_NOTICE, @@ -85,6 +87,7 @@ enum { LOG_DEBUG, /* custom notices */ LOG_BLUE = 0x10, + LOG_RAW = 0x99 }; #endif @@ -481,6 +484,7 @@ struct thr_info { extern int cuda_num_devices(); extern int cuda_version(); extern int cuda_gpu_clocks(struct cgpu_info *gpu); +int cuda_gpu_info(struct cgpu_info *gpu); extern bool opt_verify; extern bool opt_benchmark; extern bool opt_debug; @@ -507,7 +511,7 @@ extern int longpoll_thr_id; extern int stratum_thr_id; extern int api_thr_id; extern bool opt_trust_pool; - +extern volatile bool abort_flag; extern uint64_t global_hashrate; extern double global_diff; @@ -515,8 +519,10 @@ extern double global_diff; extern char* device_name[MAX_GPUS]; extern int device_map[MAX_GPUS]; extern long device_sm[MAX_GPUS]; +extern uint32_t device_plimit[MAX_GPUS]; extern uint32_t gpus_intensity[MAX_GPUS]; double throughput2intensity(uint32_t throughput); +extern void gpulog(int prio, int thr_id, const char *fmt, ...); #define CL_N "\x1B[0m" #define CL_RED "\x1B[31m" diff --git a/nvml.cpp b/nvml.cpp index bce34e99..7b7d3227 100644 --- a/nvml.cpp +++ b/nvml.cpp @@ -1,4 +1,4 @@ -/* +/* * A trivial little dlopen()-based wrapper library for the * NVIDIA NVML library, to allow runtime discovery of NVML on an * arbitrary system. This is all very hackish and simple-minded, but @@ -34,19 +34,19 @@ static uint32_t device_bus_ids[MAX_GPUS] = { 0 }; extern uint32_t device_gpu_clocks[MAX_GPUS]; extern uint32_t device_mem_clocks[MAX_GPUS]; -extern uint32_t device_plimit[MAX_GPUS]; +extern int32_t device_mem_offsets[MAX_GPUS]; +extern uint8_t device_tlimit[MAX_GPUS]; extern int8_t device_pstate[MAX_GPUS]; +extern int32_t device_led[MAX_GPUS]; +int32_t device_led_state[MAX_GPUS] = { 0 }; +static THREAD bool has_rgb_ok = false; uint32_t clock_prev[MAX_GPUS] = { 0 }; uint32_t clock_prev_mem[MAX_GPUS] = { 0 }; uint32_t limit_prev[MAX_GPUS] = { 0 }; -static bool nvml_plimit_set = false; -#ifdef WIN32 -#include "nvapi/nvapi_ccminer.h" -static int nvapi_dev_map[MAX_GPUS] = {0}; -static NvPhysicalGpuHandle phys[NVAPI_MAX_PHYSICAL_GPUS] = {0}; -#endif +static bool nvml_plimit_set = false; +extern bool need_memclockrst; /* * Wrappers to emulate dlopen() on other systems like Windows @@ -94,7 +94,7 @@ nvml_handle * nvml_create() int i=0; nvml_handle *nvmlh = NULL; -#if defined(WIN32) +#ifdef WIN32 /* Windows (do not use slashes, else ExpandEnvironmentStrings will mix them) */ #define libnvidia_ml "%PROGRAMFILES%\\NVIDIA Corporation\\NVSMI\\nvml.dll" #else @@ -195,14 +195,20 @@ nvml_handle * nvml_create() nvmlh->nvmlDeviceGetEnforcedPowerLimit = (nvmlReturn_t (*)(nvmlDevice_t, unsigned int *limit)) wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetEnforcedPowerLimit"); // v340 - /* NVML_ERROR_NOT_SUPPORTED - nvmlh->nvmlDeviceGetAutoBoostedClocksEnabled = (nvmlReturn_t (*)(nvmlDevice_t, nvmlEnableState_t *isEnabled, nvmlEnableState_t *defaultIsEnabled)) - wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetAutoBoostedClocksEnabled"); - nvmlh->nvmlDeviceSetAutoBoostedClocksEnabled = (nvmlReturn_t (*)(nvmlDevice_t, nvmlEnableState_t enabled)) - wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceSetAutoBoostedClocksEnabled"); */ +#ifdef __linux__ + nvmlh->nvmlDeviceClearCpuAffinity = (nvmlReturn_t (*)(nvmlDevice_t)) + wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceClearCpuAffinity"); + nvmlh->nvmlDeviceGetCpuAffinity = (nvmlReturn_t (*)(nvmlDevice_t, unsigned int sz, unsigned long *cpuSet)) + wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetCpuAffinity"); + nvmlh->nvmlDeviceSetCpuAffinity = (nvmlReturn_t (*)(nvmlDevice_t)) + wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceSetCpuAffinity"); +#endif // v346 nvmlh->nvmlDeviceGetPcieThroughput = (nvmlReturn_t (*)(nvmlDevice_t, nvmlPcieUtilCounter_t, unsigned int *value)) wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetPcieThroughput"); + // v36x (API 8 / Pascal) + nvmlh->nvmlDeviceGetClock = (nvmlReturn_t (*)(nvmlDevice_t, nvmlClockType_t clockType, nvmlClockId_t clockId, unsigned int *clockMHz)) + wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetClock"); if (nvmlh->nvmlInit == NULL || nvmlh->nvmlShutdown == NULL || @@ -218,20 +224,11 @@ nvml_handle * nvml_create() free(nvmlh); return NULL; } - nvmlReturn_t rc; - rc = nvmlh->nvmlInit(); - if(rc != NVML_SUCCESS) - { - applog(LOG_WARNING, "nvmlInit() failed: %s", nvmlh->nvmlErrorString(rc)); - return NULL; - } - rc = nvmlh->nvmlSystemGetDriverVersion(driver_version, sizeof(driver_version)); - if(rc != NVML_SUCCESS) - applog(LOG_WARNING, "nvmlSystemGetDriverVersion() failed: %s", nvmlh->nvmlErrorString(rc)); - rc = nvmlh->nvmlDeviceGetCount(&nvmlh->nvml_gpucount); - if(rc != NVML_SUCCESS) - applog(LOG_WARNING, "nvmlDeviceGetCount() failed: %s", nvmlh->nvmlErrorString(rc)); + nvmlh->nvmlInit(); + if (nvmlh->nvmlSystemGetDriverVersion) + nvmlh->nvmlSystemGetDriverVersion(driver_version, sizeof(driver_version)); + nvmlh->nvmlDeviceGetCount(&nvmlh->nvml_gpucount); /* Query CUDA device count, in case it doesn't agree with NVML, since */ /* CUDA will only report GPUs with compute capability greater than 1.0 */ @@ -247,17 +244,15 @@ nvml_handle * nvml_create() nvmlh->nvml_pci_domain_id = (unsigned int*) calloc(nvmlh->nvml_gpucount, sizeof(unsigned int)); nvmlh->nvml_pci_bus_id = (unsigned int*) calloc(nvmlh->nvml_gpucount, sizeof(unsigned int)); nvmlh->nvml_pci_device_id = (unsigned int*) calloc(nvmlh->nvml_gpucount, sizeof(unsigned int)); + nvmlh->nvml_pci_vendor_id = (unsigned int*) calloc(nvmlh->nvml_gpucount, sizeof(unsigned int)); nvmlh->nvml_pci_subsys_id = (unsigned int*) calloc(nvmlh->nvml_gpucount, sizeof(unsigned int)); nvmlh->nvml_cuda_device_id = (int*) calloc(nvmlh->nvml_gpucount, sizeof(int)); nvmlh->cuda_nvml_device_id = (int*) calloc(nvmlh->cuda_gpucount, sizeof(int)); nvmlh->app_clocks = (nvmlEnableState_t*) calloc(nvmlh->nvml_gpucount, sizeof(nvmlEnableState_t)); /* Obtain GPU device handles we're going to need repeatedly... */ - for (i=0; invml_gpucount; i++) - { - rc = nvmlh->nvmlDeviceGetHandleByIndex(i, &nvmlh->devs[i]); - if(rc != NVML_SUCCESS) - applog(LOG_WARNING, "GPU %d: nvmlDeviceGetHandleByIndex() failed: %s", i, nvmlh->nvmlErrorString(rc)); + for (i=0; invml_gpucount; i++) { + nvmlh->nvmlDeviceGetHandleByIndex(i, &nvmlh->devs[i]); } /* Query PCI info for each NVML device, and build table for mapping of */ @@ -269,23 +264,18 @@ nvml_handle * nvml_create() nvmlh->nvml_pci_domain_id[i] = pciinfo.domain; nvmlh->nvml_pci_bus_id[i] = pciinfo.bus; nvmlh->nvml_pci_device_id[i] = pciinfo.device; + nvmlh->nvml_pci_vendor_id[i] = pciinfo.pci_device_id; nvmlh->nvml_pci_subsys_id[i] = pciinfo.pci_subsystem_id; nvmlh->app_clocks[i] = NVML_FEATURE_UNKNOWN; - if (nvmlh->nvmlDeviceSetAPIRestriction) - { - rc = nvmlh->nvmlDeviceSetAPIRestriction(nvmlh->devs[i], NVML_RESTRICTED_API_SET_APPLICATION_CLOCKS, + if (nvmlh->nvmlDeviceSetAPIRestriction) { + nvmlh->nvmlDeviceSetAPIRestriction(nvmlh->devs[i], NVML_RESTRICTED_API_SET_APPLICATION_CLOCKS, NVML_FEATURE_ENABLED); - if(rc != NVML_SUCCESS && opt_debug) - applog(LOG_WARNING, "Device %d: nvmlDeviceSetAPIRestriction() failed: %s", nvmlh->devs[i], nvmlh->nvmlErrorString(rc)); /* there is only this API_SET_APPLICATION_CLOCKS on the 750 Ti (340.58) */ } - if (nvmlh->nvmlDeviceGetAPIRestriction) - { - rc = nvmlh->nvmlDeviceGetAPIRestriction(nvmlh->devs[i], NVML_RESTRICTED_API_SET_APPLICATION_CLOCKS, + if (nvmlh->nvmlDeviceGetAPIRestriction) { + nvmlh->nvmlDeviceGetAPIRestriction(nvmlh->devs[i], NVML_RESTRICTED_API_SET_APPLICATION_CLOCKS, &nvmlh->app_clocks[i]); - if(rc != NVML_SUCCESS) - applog(LOG_WARNING, "Device %d: nvmlDeviceGetAPIRestriction() failed: %s", nvmlh->devs[i], nvmlh->nvmlErrorString(rc)); } } @@ -316,30 +306,14 @@ nvml_handle * nvml_create() return nvmlh; } -#ifdef WIN32 -// Replacement for WIN32 CUDA 6.5 on pascal -int nvapiMemGetInfo(int dev_id, uint64_t *free, uint64_t *total) -{ - NvAPI_Status ret = NVAPI_OK; - NV_DISPLAY_DRIVER_MEMORY_INFO mem = {0}; - mem.version = NV_DISPLAY_DRIVER_MEMORY_INFO_VER; - unsigned int devNum = nvapi_dev_map[dev_id % MAX_GPUS]; - if((ret = NvAPI_GPU_GetMemoryInfo(phys[devNum], &mem)) == NVAPI_OK) - { - *total = (uint64_t)mem.dedicatedVideoMemory;// mem.availableDedicatedVideoMemory; - *free = (uint64_t)mem.curAvailableDedicatedVideoMemory; - } - return (int)ret; -} -#endif - -#define MAXCLOCKS 255 /* apply config clocks to an used device */ int nvml_set_clocks(nvml_handle *nvmlh, int dev_id) { nvmlReturn_t rc; uint32_t gpu_clk = 0, mem_clk = 0; int n = nvmlh->cuda_nvml_device_id[dev_id]; + //if (need_nvsettings) /* prefer later than init time */ + // nvs_set_clocks(dev_id); if (n < 0 || n >= nvmlh->nvml_gpucount) return -ENODEV; @@ -352,36 +326,17 @@ int nvml_set_clocks(nvml_handle *nvmlh, int dev_id) } uint32_t mem_prev = clock_prev_mem[dev_id]; - if(!mem_prev) - { - rc = nvmlh->nvmlDeviceGetApplicationsClock(nvmlh->devs[n], NVML_CLOCK_MEM, &mem_prev); - if(rc != NVML_SUCCESS) - { - applog(LOG_WARNING, "GPU #%d: unable to query memory clock", dev_id); - return -1; - } - } + if (!mem_prev) + nvmlh->nvmlDeviceGetApplicationsClock(nvmlh->devs[n], NVML_CLOCK_MEM, &mem_prev); uint32_t gpu_prev = clock_prev[dev_id]; - if(!gpu_prev) - { - rc = nvmlh->nvmlDeviceGetApplicationsClock(nvmlh->devs[n], NVML_CLOCK_GRAPHICS, &gpu_prev); - if(rc != NVML_SUCCESS) - { - applog(LOG_WARNING, "GPU #%d: unable to query graphics clock", dev_id); - return -1; - } - } + if (!gpu_prev) + nvmlh->nvmlDeviceGetApplicationsClock(nvmlh->devs[n], NVML_CLOCK_GRAPHICS, &gpu_prev); - rc = nvmlh->nvmlDeviceGetDefaultApplicationsClock(nvmlh->devs[n], NVML_CLOCK_MEM, &mem_clk); - if(rc != NVML_SUCCESS) - { - applog(LOG_WARNING, "GPU #%d: unable to query default memory clock", dev_id); - return -1; - } + nvmlh->nvmlDeviceGetDefaultApplicationsClock(nvmlh->devs[n], NVML_CLOCK_MEM, &mem_clk); rc = nvmlh->nvmlDeviceGetDefaultApplicationsClock(nvmlh->devs[n], NVML_CLOCK_GRAPHICS, &gpu_clk); if (rc != NVML_SUCCESS) { - applog(LOG_WARNING, "GPU #%d: unable to query default graphics clock", dev_id); - return -1; + applog(LOG_WARNING, "GPU #%d: unable to query application clocks", dev_id); + return -EINVAL; } if (opt_debug) @@ -391,45 +346,43 @@ int nvml_set_clocks(nvml_handle *nvmlh, int dev_id) if (device_mem_clocks[dev_id]) mem_clk = device_mem_clocks[dev_id]; if (device_gpu_clocks[dev_id]) gpu_clk = device_gpu_clocks[dev_id]; - // these functions works for the 960 and the 970 (346.72+), not for the 750 Ti - uint32_t nclocks = MAXCLOCKS; - uint32_t clocks[MAXCLOCKS] = {0}; - - rc = nvmlh->nvmlDeviceGetSupportedMemoryClocks(nvmlh->devs[n], &nclocks, clocks); - if(rc != NVML_SUCCESS) - { - applog(LOG_WARNING, "GPU #%d: unable to query supported memory clocks", dev_id); - return -1; - } + // these functions works for the 960 and the 970 (346.72+), and for the 750 Ti with driver ~361+ + uint32_t nclocks = 0, mem_clocks[32] = { 0 }; + nvmlh->nvmlDeviceGetSupportedMemoryClocks(nvmlh->devs[n], &nclocks, NULL); + nclocks = min(nclocks, 32); + if (nclocks) + nvmlh->nvmlDeviceGetSupportedMemoryClocks(nvmlh->devs[n], &nclocks, mem_clocks); for (uint8_t u=0; u < nclocks; u++) { // ordered by pstate (so highest is first memory clock - P0) - if(clocks[u] <= mem_clk) - { - mem_clk = clocks[u]; + if (mem_clocks[u] <= mem_clk) { + mem_clk = mem_clocks[u]; break; } } - nclocks = MAXCLOCKS; - rc = nvmlh->nvmlDeviceGetSupportedGraphicsClocks(nvmlh->devs[n], mem_clk, &nclocks, clocks); - if(rc != NVML_SUCCESS) - { - applog(LOG_WARNING, "GPU #%d: unable to query supported graphics clocks", dev_id); - return -1; - } - for (uint8_t u=0; u < nclocks; u++) { - // ordered desc, so get first - if (clocks[u] <= gpu_clk) { - gpu_clk = clocks[u]; - break; + uint32_t* gpu_clocks = NULL; + nclocks = 0; + nvmlh->nvmlDeviceGetSupportedGraphicsClocks(nvmlh->devs[n], mem_clk, &nclocks, NULL); + if (nclocks) { + if (opt_debug) + applog(LOG_DEBUG, "GPU #%d: %u clocks found for mem %u", dev_id, nclocks, mem_clk); + gpu_clocks = (uint32_t*) calloc(1, sizeof(uint32_t) * nclocks + 4); + nvmlh->nvmlDeviceGetSupportedGraphicsClocks(nvmlh->devs[n], mem_clk, &nclocks, gpu_clocks); + for (uint8_t u=0; u < nclocks; u++) { + // ordered desc, so get first + if (gpu_clocks[u] <= gpu_clk) { + gpu_clk = gpu_clocks[u]; + break; + } } + free(gpu_clocks); } rc = nvmlh->nvmlDeviceSetApplicationsClocks(nvmlh->devs[n], mem_clk, gpu_clk); if (rc == NVML_SUCCESS) applog(LOG_INFO, "GPU #%d: application clocks set to %u/%u", dev_id, mem_clk, gpu_clk); else { - applog(LOG_WARNING, "GPU #%d: %u/%u - %s", dev_id, mem_clk, gpu_clk, nvmlh->nvmlErrorString(rc)); + applog(LOG_WARNING, "GPU #%d: %u/%u - %s (NVML)", dev_id, mem_clk, gpu_clk, nvmlh->nvmlErrorString(rc)); return -1; } @@ -446,6 +399,8 @@ int nvml_reset_clocks(nvml_handle *nvmlh, int dev_id) nvmlReturn_t rc; uint32_t gpu_clk = 0, mem_clk = 0; int n = nvmlh->cuda_nvml_device_id[dev_id]; + if (need_nvsettings) + nvs_reset_clocks(dev_id); if (n < 0 || n >= nvmlh->nvml_gpucount) return -ENODEV; @@ -472,7 +427,6 @@ int nvml_reset_clocks(nvml_handle *nvmlh, int dev_id) return ret; } - /** * Set power state of a device (9xx) * Code is similar as clocks one, which allow the change of the pstate @@ -493,12 +447,7 @@ int nvml_set_pstate(nvml_handle *nvmlh, int dev_id) return -EPERM; } - rc = nvmlh->nvmlDeviceGetDefaultApplicationsClock(nvmlh->devs[n], NVML_CLOCK_MEM, &mem_clk); - if(rc != NVML_SUCCESS) - { - applog(LOG_WARNING, "GPU #%d: nvmlDeviceGetDefaultApplicationsClock: %s", dev_id, nvmlh->nvmlErrorString(rc)); - return -1; - } + nvmlh->nvmlDeviceGetDefaultApplicationsClock(nvmlh->devs[n], NVML_CLOCK_MEM, &mem_clk); rc = nvmlh->nvmlDeviceGetDefaultApplicationsClock(nvmlh->devs[n], NVML_CLOCK_GRAPHICS, &gpu_clk); if (rc != NVML_SUCCESS) { applog(LOG_WARNING, "GPU #%d: unable to query application clocks", dev_id); @@ -509,60 +458,155 @@ int nvml_set_pstate(nvml_handle *nvmlh, int dev_id) if (device_mem_clocks[dev_id]) mem_clk = device_mem_clocks[dev_id]; if (device_gpu_clocks[dev_id]) gpu_clk = device_gpu_clocks[dev_id]; - // these functions works for the 960 and the 970 (346.72+), not for the 750 Ti - uint32_t clocks[MAXCLOCKS] = {0}; - uint32_t nclocks = MAXCLOCKS; + // these functions works for the 960 and the 970 (346.72+), and for the 750 Ti with driver ~361+ + uint32_t nclocks = 0, mem_clocks[32] = { 0 }; int8_t wanted_pstate = device_pstate[dev_id]; - rc = nvmlh->nvmlDeviceGetSupportedMemoryClocks(nvmlh->devs[n], &nclocks, clocks); - if(rc != NVML_SUCCESS) - { - applog(LOG_WARNING, "GPU #%d: nvmlDeviceGetSupportedMemoryClocks: %s", dev_id, nvmlh->nvmlErrorString(rc)); - return -1; - } - if(wanted_pstate < 0) - return -1; - if(wanted_pstate < nclocks) - { - mem_clk = clocks[wanted_pstate]; + nvmlh->nvmlDeviceGetSupportedMemoryClocks(nvmlh->devs[n], &nclocks, NULL); + nclocks = min(nclocks, 32); + if (nclocks) + nvmlh->nvmlDeviceGetSupportedMemoryClocks(nvmlh->devs[n], &nclocks, mem_clocks); + if ((uint32_t) wanted_pstate+1 > nclocks) { + applog(LOG_WARNING, "GPU #%d: only %u mem clocks available (p-states)", dev_id, nclocks); } - else - { - applog(LOG_WARNING, "GPU #%d: pstate %d is unsupported"); - return -1; + for (uint8_t u=0; u < nclocks; u++) { + // ordered by pstate (so highest P0 first) + if (u == wanted_pstate) { + mem_clk = mem_clocks[u]; + break; + } } - nclocks = MAXCLOCKS; - rc = nvmlh->nvmlDeviceGetSupportedGraphicsClocks(nvmlh->devs[n], mem_clk, &nclocks, clocks); - if(rc != NVML_SUCCESS) - { - applog(LOG_WARNING, "GPU #%d: nvmlDeviceGetSupportedGraphicsClocks: %s", dev_id, nvmlh->nvmlErrorString(rc)); - return -1; - } - if(device_gpu_clocks[dev_id] == 0) - gpu_clk = 9999; - for(uint8_t u = 0; u < nclocks; u++) - { - // ordered desc, so get first - if(clocks[u] <= gpu_clk) - { - gpu_clk = clocks[u]; - break; + uint32_t* gpu_clocks = NULL; + nclocks = 0; + nvmlh->nvmlDeviceGetSupportedGraphicsClocks(nvmlh->devs[n], mem_clk, &nclocks, NULL); + if (nclocks) { + gpu_clocks = (uint32_t*) calloc(1, sizeof(uint32_t) * nclocks + 4); + rc = nvmlh->nvmlDeviceGetSupportedGraphicsClocks(nvmlh->devs[n], mem_clk, &nclocks, gpu_clocks); + if (rc == NVML_SUCCESS) { + // ordered desc, get the max app clock (do not limit) + gpu_clk = gpu_clocks[0]; } + free(gpu_clocks); } rc = nvmlh->nvmlDeviceSetApplicationsClocks(nvmlh->devs[n], mem_clk, gpu_clk); if (rc != NVML_SUCCESS) { - applog(LOG_WARNING, "GPU #%d: pstate %s", dev_id, nvmlh->nvmlErrorString(rc)); + applog(LOG_WARNING, "GPU #%d: pstate P%d (%u/%u) %s", dev_id, (int) wanted_pstate, + mem_clk, gpu_clk, nvmlh->nvmlErrorString(rc)); return -1; } if (!opt_quiet) - applog(LOG_INFO, "GPU #%d: app clocks set to P%d (%u/%u)", dev_id, (int)wanted_pstate, mem_clk, gpu_clk); + applog(LOG_INFO, "GPU #%d: app clocks set to P%d (%u/%u)", dev_id, (int) wanted_pstate, mem_clk, gpu_clk); clock_prev[dev_id] = 1; return 1; } +int nvml_set_plimit(nvml_handle *nvmlh, int dev_id) +{ + nvmlReturn_t rc = NVML_ERROR_UNKNOWN; + uint32_t gpu_clk = 0, mem_clk = 0; + int n = nvmlh->cuda_nvml_device_id[dev_id]; + if (n < 0 || n >= nvmlh->nvml_gpucount) + return -ENODEV; + + if (!device_plimit[dev_id]) + return 0; // nothing to do + + if (!nvmlh->nvmlDeviceSetPowerManagementLimit) + return -ENOSYS; + + uint32_t plimit = device_plimit[dev_id] * 1000; + uint32_t pmin = 1000, pmax = 0, prev_limit = 0; + if (nvmlh->nvmlDeviceGetPowerManagementLimitConstraints) + rc = nvmlh->nvmlDeviceGetPowerManagementLimitConstraints(nvmlh->devs[n], &pmin, &pmax); + + if (rc != NVML_SUCCESS) { + if (!nvmlh->nvmlDeviceGetPowerManagementLimit) + return -ENOSYS; + } + nvmlh->nvmlDeviceGetPowerManagementLimit(nvmlh->devs[n], &prev_limit); + if (!pmax) pmax = prev_limit; + + plimit = min(plimit, pmax); + plimit = max(plimit, pmin); + rc = nvmlh->nvmlDeviceSetPowerManagementLimit(nvmlh->devs[n], plimit); + if (rc != NVML_SUCCESS) { +#ifndef WIN32 + applog(LOG_WARNING, "GPU #%d: plimit %s", dev_id, nvmlh->nvmlErrorString(rc)); +#endif + return -1; + } else { + device_plimit[dev_id] = plimit / 1000; + nvml_plimit_set = true; + } + + if (!opt_quiet) { + applog(LOG_INFO, "GPU #%d: power limit set to %uW (allowed range is %u-%u)", + dev_id, plimit/1000U, pmin/1000U, pmax/1000U); + } + + limit_prev[dev_id] = prev_limit; + return 1; +} + +uint32_t nvml_get_plimit(nvml_handle *nvmlh, int dev_id) +{ + uint32_t plimit = 0; + int n = nvmlh ? nvmlh->cuda_nvml_device_id[dev_id] : -1; + if (n < 0 || n >= nvmlh->nvml_gpucount) + return 0; + + if (nvmlh->nvmlDeviceGetPowerManagementLimit) { + nvmlh->nvmlDeviceGetPowerManagementLimit(nvmlh->devs[n], &plimit); + } + return plimit; +} + +// ccminer -D -n +#define LSTDEV_PFX " " +void nvml_print_device_info(int dev_id) +{ + if (!hnvml) return; + + int n = hnvml->cuda_nvml_device_id[dev_id]; + if (n < 0 || n >= hnvml->nvml_gpucount) + return; + + nvmlReturn_t rc; + + // fprintf(stderr, "------ Hardware ------\n"); + int gvid = hnvml->nvml_pci_vendor_id[n] & 0xFFFF; + int gpid = hnvml->nvml_pci_vendor_id[n] >> 16; + int svid = hnvml->nvml_pci_subsys_id[n] & 0xFFFF; + int spid = hnvml->nvml_pci_subsys_id[n] >> 16; + + fprintf(stderr, LSTDEV_PFX "ID %04x:%04x/%04x:%04x BUS %04x:%02x:%02x.0\n", gvid, gpid, svid, spid, + (int) hnvml->nvml_pci_domain_id[n], (int) hnvml->nvml_pci_bus_id[n], (int) hnvml->nvml_pci_device_id[n]); + + if (hnvml->nvmlDeviceGetClock) { + uint32_t gpu_clk = 0, mem_clk = 0; + + // fprintf(stderr, "------- Clocks -------\n"); + + hnvml->nvmlDeviceGetClock(hnvml->devs[n], NVML_CLOCK_GRAPHICS, NVML_CLOCK_ID_APP_CLOCK_DEFAULT, &gpu_clk); + rc = hnvml->nvmlDeviceGetClock(hnvml->devs[n], NVML_CLOCK_MEM, NVML_CLOCK_ID_APP_CLOCK_DEFAULT, &mem_clk); + if (rc == NVML_SUCCESS) { + fprintf(stderr, LSTDEV_PFX "DEFAULT MEM %4u GPU %4u MHz\n", mem_clk, gpu_clk); + } + hnvml->nvmlDeviceGetClock(hnvml->devs[n], NVML_CLOCK_GRAPHICS, NVML_CLOCK_ID_APP_CLOCK_TARGET, &gpu_clk); + rc = hnvml->nvmlDeviceGetClock(hnvml->devs[n], NVML_CLOCK_MEM, NVML_CLOCK_ID_APP_CLOCK_TARGET, &mem_clk); + if (rc == NVML_SUCCESS) { + fprintf(stderr, LSTDEV_PFX "TARGET MEM %4u GPU %4u MHz\n", mem_clk, gpu_clk); + } + hnvml->nvmlDeviceGetClock(hnvml->devs[n], NVML_CLOCK_GRAPHICS, NVML_CLOCK_ID_CURRENT, &gpu_clk); + rc = hnvml->nvmlDeviceGetClock(hnvml->devs[n], NVML_CLOCK_MEM, NVML_CLOCK_ID_CURRENT, &mem_clk); + if (rc == NVML_SUCCESS) { + fprintf(stderr, LSTDEV_PFX "CURRENT MEM %4u GPU %4u MHz\n", mem_clk, gpu_clk); + } + } +} int nvml_get_gpucount(nvml_handle *nvmlh, int *gpucount) { @@ -576,6 +620,7 @@ int cuda_get_gpucount(nvml_handle *nvmlh, int *gpucount) return 0; } + int nvml_get_gpu_name(nvml_handle *nvmlh, int cudaindex, char *namebuf, int bufsize) { int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex]; @@ -629,6 +674,22 @@ int nvml_get_fanpcnt(nvml_handle *nvmlh, int cudaindex, unsigned int *fanpcnt) return 0; } + +int nvml_get_current_clocks(int cudaindex, unsigned int *graphics_clock, unsigned int *mem_clock) +{ + nvmlReturn_t rc; + int gpuindex = hnvml->cuda_nvml_device_id[cudaindex]; + if (gpuindex < 0 || gpuindex >= hnvml->nvml_gpucount) return -ENODEV; + if (!hnvml->nvmlDeviceGetClockInfo) return -ENOSYS; + + rc = hnvml->nvmlDeviceGetClockInfo(hnvml->devs[gpuindex], NVML_CLOCK_SM, graphics_clock); + if (rc != NVML_SUCCESS) return -1; + rc = hnvml->nvmlDeviceGetClockInfo(hnvml->devs[gpuindex], NVML_CLOCK_MEM, mem_clock); + if (rc != NVML_SUCCESS) return -1; + + return 0; +} + /* Not Supported on 750Ti 340.23 */ int nvml_get_power_usage(nvml_handle *nvmlh, int cudaindex, unsigned int *milliwatts) { @@ -739,9 +800,11 @@ int nvml_get_info(nvml_handle *nvmlh, int cudaindex, uint16_t &vid, uint16_t &pi return -ENODEV; subids = nvmlh->nvml_pci_subsys_id[gpuindex]; - if (!subids) subids = nvmlh->nvml_pci_device_id[gpuindex]; + if (!subids) subids = nvmlh->nvml_pci_vendor_id[gpuindex]; pid = subids >> 16; vid = subids & 0xFFFF; + // Colorful and Inno3D + if (pid == 0) pid = nvmlh->nvml_pci_vendor_id[gpuindex] >> 16; return 0; } @@ -754,6 +817,7 @@ int nvml_destroy(nvml_handle *nvmlh) free(nvmlh->nvml_pci_bus_id); free(nvmlh->nvml_pci_device_id); free(nvmlh->nvml_pci_domain_id); + free(nvmlh->nvml_pci_vendor_id); free(nvmlh->nvml_pci_subsys_id); free(nvmlh->nvml_cuda_device_id); free(nvmlh->cuda_nvml_device_id); @@ -764,6 +828,8 @@ int nvml_destroy(nvml_handle *nvmlh) return 0; } +// ---------------------------------------------------------------------------- + /** * nvapi alternative for windows x86 binaries * nvml api doesn't exists as 32bit dll :/// @@ -771,8 +837,11 @@ int nvml_destroy(nvml_handle *nvmlh) #ifdef WIN32 #include "nvapi/nvapi_ccminer.h" +static unsigned int nvapi_dev_map[MAX_GPUS] = { 0 }; static NvDisplayHandle hDisplay_a[NVAPI_MAX_PHYSICAL_GPUS * 2] = { 0 }; +static NvPhysicalGpuHandle phys[NVAPI_MAX_PHYSICAL_GPUS] = { 0 }; static NvU32 nvapi_dev_cnt = 0; +extern bool nvapi_dll_loaded; int nvapi_temperature(unsigned int devNum, unsigned int *temperature) { @@ -819,7 +888,7 @@ int nvapi_fanspeed(unsigned int devNum, unsigned int *speed) return 0; } -int nvapi_getpstate(unsigned int devNum, unsigned int *power) +int nvapi_getpstate(unsigned int devNum, unsigned int *pstate) { NvAPI_Status ret; @@ -837,7 +906,7 @@ int nvapi_getpstate(unsigned int devNum, unsigned int *power) } else { // get pstate for the moment... often 0 = P0 - (*power) = (unsigned int)CurrentPstate; + (*pstate) = (unsigned int)CurrentPstate; } return 0; @@ -891,6 +960,8 @@ int nvapi_getinfo(unsigned int devNum, uint16_t &vid, uint16_t &pid) if (vid == 0x10DE && pSubSystemId) { vid = pSubSystemId & 0xFFFF; pid = pSubSystemId >> 16; + // Colorful and Inno3D + if (pid == 0) pid = pDeviceId >> 16; } return 0; @@ -898,25 +969,27 @@ int nvapi_getinfo(unsigned int devNum, uint16_t &vid, uint16_t &pid) int nvapi_getserial(unsigned int devNum, char *serial, unsigned int maxlen) { -// NvAPI_Status ret; + NvAPI_Status ret; if (devNum >= nvapi_dev_cnt) return -ENODEV; - sprintf(serial, ""); + memset(serial, 0, maxlen); - if (maxlen < 64) // Short String - return -1; + if (maxlen < 11) + return -EINVAL; -#if 0 - ret = NvAPI_GPU_Get..(phys[devNum], serial); + NvAPI_ShortString ser = { 0 }; + ret = NvAPI_DLL_GetSerialNumber(phys[devNum], ser); if (ret != NVAPI_OK) { NvAPI_ShortString string; NvAPI_GetErrorMessage(ret, string); if (opt_debug) - applog(LOG_DEBUG, "NVAPI ...: %s", string); + applog(LOG_DEBUG, "NVAPI GetSerialNumber: %s", string); return -1; } -#endif + + uint8_t *bytes = (uint8_t*) ser; + for (int n=0; n<5; n++) sprintf(&serial[n*2], "%02X", bytes[n]); return 0; } @@ -939,20 +1012,565 @@ int nvapi_getbios(unsigned int devNum, char *desc, unsigned int maxlen) } return 0; } + +static int SetAsusRGBLogo(unsigned int devNum, uint32_t RGB, bool ignorePrevState) +{ + NvAPI_Status ret = NVAPI_OK; + NV_I2C_INFO_EX* i2cInfo; + + int delay1 = 20000; + int delay2 = 0; + + uchar4 rgb = { 0 }; + memcpy(&rgb, &RGB, 4); + uchar4 prgb = { 0 }; + int32_t prev = device_led_state[nvapi_devid(devNum)]; + memcpy(&prgb, &prev, 4); + + NV_INIT_STRUCT_ALLOC(NV_I2C_INFO_EX, i2cInfo); + if (i2cInfo == NULL) return -ENOMEM; + + NvU32 data[5] = { 0 }; + NvU32 datv[2] = { 0, 1 }; + NvU32 datw[2] = { 1, 0 }; + if (rgb.z != prgb.z || ignorePrevState) { + data[2] = 4; // R:4 G:5 B:6, Mode = 7 (1 static, 2 breath, 3 blink, 4 demo) + data[3] = 1; + datv[0] = rgb.z | 0x13384000; + + i2cInfo->i2cDevAddress = 0x52; + i2cInfo->pbI2cRegAddress = (NvU8*) (&data[2]); + i2cInfo->regAddrSize = 1; + i2cInfo->pbData = (NvU8*) datv; + i2cInfo->cbRead = 5; + i2cInfo->cbSize = 1; + i2cInfo->portId = 1; + i2cInfo->bIsPortIdSet = 1; + + ret = NvAPI_DLL_I2CWriteEx(phys[devNum], i2cInfo, datw); + usleep(delay1); + has_rgb_ok = (ret == NVAPI_OK); + } + + if (rgb.y != prgb.y || ignorePrevState) { + data[2] = 5; + data[3] = 1; + datv[0] = rgb.y | 0x4000; + + i2cInfo->i2cDevAddress = 0x52; + i2cInfo->pbI2cRegAddress = (NvU8*) (&data[2]); + i2cInfo->regAddrSize = 1; + i2cInfo->pbData = (NvU8*) datv; + i2cInfo->cbRead = 5; + i2cInfo->cbSize = 1; + i2cInfo->portId = 1; + i2cInfo->bIsPortIdSet = 1; + + ret = NvAPI_DLL_I2CWriteEx(phys[devNum], i2cInfo, datw); + usleep(delay1); + has_rgb_ok = (ret == NVAPI_OK); + } + + if (rgb.y != prgb.y || ignorePrevState) { + data[2] = 6; + data[3] = 1; + datv[0] = rgb.x | 0x4000; + + i2cInfo->i2cDevAddress = 0x52; + i2cInfo->pbI2cRegAddress = (NvU8*) (&data[2]); + i2cInfo->regAddrSize = 1; + i2cInfo->pbData = (NvU8*) datv; + i2cInfo->cbRead = 5; + i2cInfo->cbSize = 1; + i2cInfo->portId = 1; + i2cInfo->bIsPortIdSet = 1; + + ret = NvAPI_DLL_I2CWriteEx(phys[devNum], i2cInfo, datw); + usleep(delay1); + has_rgb_ok = (ret == NVAPI_OK); + } + + if (rgb.w && ignorePrevState) { + data[2] = 7; + data[3] = 1; + datv[0] = rgb.w | 0x4000; + + i2cInfo->i2cDevAddress = 0x52; + i2cInfo->pbI2cRegAddress = (NvU8*) (&data[2]); + i2cInfo->regAddrSize = 1; + i2cInfo->pbData = (NvU8*) datv; + i2cInfo->cbRead = 5; + i2cInfo->cbSize = 1; + i2cInfo->portId = 1; + i2cInfo->bIsPortIdSet = 1; + + ret = NvAPI_DLL_I2CWriteEx(phys[devNum], i2cInfo, datw); + usleep(delay1); + has_rgb_ok = (ret == NVAPI_OK); + } + usleep(delay2); + free(i2cInfo); + return (int) ret; +} + +static int SetGigabyteRGBLogo(unsigned int devNum, uint32_t RGB) +{ + NvAPI_Status ret; + NV_I2C_INFO_EX* i2cInfo; + NV_INIT_STRUCT_ALLOC(NV_I2C_INFO_EX, i2cInfo); + if (i2cInfo == NULL) + return -ENOMEM; + + NvU32 readBuf[25] = { 0 }; + NvU32 data[5] = { 0 }; + data[0] = 1; + data[2] = swab32(RGB & 0xfcfcfcU) | 0x40; + + i2cInfo->i2cDevAddress = 0x48 << 1; + i2cInfo->pbI2cRegAddress = (NvU8*) (&data[2]); + i2cInfo->regAddrSize = 4; // NVAPI_MAX_SIZEOF_I2C_REG_ADDRESS + i2cInfo->pbData = (NvU8*) readBuf; + i2cInfo->cbRead = 2; + i2cInfo->cbSize = sizeof(readBuf); + i2cInfo->portId = 1; + i2cInfo->bIsPortIdSet = 1; + + //ret = NvAPI_DLL_I2CWriteEx(phys[devNum], i2cInfo, data); + ret = NvAPI_DLL_I2CReadEx(phys[devNum], i2cInfo, data); + usleep(20000); + free(i2cInfo); + return (int) ret; +} + +static int SetZotacRGBLogo(unsigned int devNum, uint32_t RGB) +{ + NvAPI_Status ret; + NV_I2C_INFO* i2cInfo; + NV_INIT_STRUCT_ALLOC(NV_I2C_INFO, i2cInfo); + if (i2cInfo == NULL) + return -ENOMEM; + + NvU32 buf[25] = { 0 }; + NvU32 data[5] = { 0 }; + + uint32_t color = 0, level = 0x40; + + uchar4 rgb = { 0 }; + memcpy(&rgb, &RGB, 4); + level = rgb.x & 0xF0; + level |= rgb.y & 0xF0; + level |= rgb.z & 0xF0; + //applog(LOG_DEBUG, "R %u G %u B %u", rgb.z, rgb.y, rgb.x); + + // Not really RGB custom, only some basic colors, so convert + // 0: Red, 1: Yellow, 2: Green, 3: Cyan, 4: Blue, 5: magenta, 6: white + if ((RGB & 0xFF0000) && (RGB & 0xFF00) && (RGB & 0xFF)) color = 6; + else if ((RGB & 0xFF0000) && (RGB & 0xFF)) color = 5; + else if ((RGB & 0xFF00) && (RGB & 0xFF)) color = 3; + else if ((RGB & 0xFF0000) && (RGB & 0xFF00)) color = 1; + else if (RGB & 0xFF) color = 4; + else if (RGB & 0xFF00) color = 2; + + buf[0] = 0xF0; // F0 set colors + buf[0] |= (color << 8); // logo + buf[0] |= (1 << 16); // top + if (RGB != 0) // level : 0x10 to 0xF0 + buf[0] |= (level << 24); + else + buf[0] |= (0x10U << 24); + + // todo: i2c data crc ? + + i2cInfo->displayMask = 1; + i2cInfo->bIsDDCPort = 1; + i2cInfo->i2cDevAddress = 0x48 << 1; + i2cInfo->pbI2cRegAddress = (NvU8*) (&data[2]); + i2cInfo->regAddrSize = 1; + i2cInfo->pbData = (NvU8*) buf; + i2cInfo->cbSize = 4; + i2cInfo->i2cSpeed = NVAPI_I2C_SPEED_DEPRECATED; + i2cInfo->i2cSpeedKhz = NVAPI_I2C_SPEED_100KHZ; // 4 + i2cInfo->portId = 1; + i2cInfo->bIsPortIdSet = 1; + + ret = NvAPI_I2CWrite(phys[devNum], i2cInfo); + // required to prevent i2c lock + usleep(20000); + +#if 0 + buf[0] = 0xF7; // F7 toggle leds + if (RGB == 0) + buf[0] |= (1 << 8); // 0 logo on, 1 off + buf[0] |= (1 << 16); // 1 top off + ret = NvAPI_I2CWrite(phys[devNum], i2cInfo); + usleep(20000); +#endif + // other modes: + // 0xF1 breathing green (0x070202F1) + // 0xF2 strobe green (0x070202F2) + // 0xF3 cycle (0x000000F3) + + free(i2cInfo); + return (int) ret; +} + +int nvapi_set_led(unsigned int devNum, int RGB, char *device_name) +{ + uint16_t vid = 0, pid = 0; + NvAPI_Status ret; + if (strstr(device_name, "Gigabyte GTX 10")) { + if (opt_debug) + applog(LOG_DEBUG, "GPU %x: Set RGB led to %06x", (int) phys[devNum], RGB); + return SetGigabyteRGBLogo(devNum, (uint32_t) RGB); + } else if (strstr(device_name, "ASUS GTX 10")) { + if (opt_debug) + applog(LOG_DEBUG, "GPU %x: Set RGB led to %06x", (int) phys[devNum], RGB); + return SetAsusRGBLogo(devNum, (uint32_t) RGB, !has_rgb_ok); + } else if (strstr(device_name, "Zotac GTX 10")) { + if (opt_debug) + applog(LOG_DEBUG, "GPU %x: Set RGB led to %06x", (int) phys[devNum], RGB); + return SetZotacRGBLogo(devNum, (uint32_t) RGB); + } else { + NV_GPU_QUERY_ILLUMINATION_SUPPORT_PARM* illu; + NV_INIT_STRUCT_ALLOC(NV_GPU_QUERY_ILLUMINATION_SUPPORT_PARM, illu); + illu->hPhysicalGpu = phys[devNum]; + illu->Attribute = NV_GPU_IA_LOGO_BRIGHTNESS; + ret = NvAPI_GPU_QueryIlluminationSupport(illu); + if (!ret && illu->bSupported) { + NV_GPU_GET_ILLUMINATION_PARM *led; + NV_INIT_STRUCT_ALLOC(NV_GPU_GET_ILLUMINATION_PARM, led); + led->hPhysicalGpu = phys[devNum]; + led->Attribute = NV_GPU_IA_LOGO_BRIGHTNESS; + NvAPI_GPU_GetIllumination(led); + if (opt_debug) + applog(LOG_DEBUG, "GPU %x: Led level was %d, set to %d", (int) phys[devNum], led->Value, RGB); + led->Value = (uint32_t) RGB; + ret = NvAPI_GPU_SetIllumination((NV_GPU_SET_ILLUMINATION_PARM*) led); + free(led); + } + free(illu); + return ret; + } +} + +int nvapi_pstateinfo(unsigned int devNum) +{ + uint32_t n; + NvAPI_Status ret; + uint32_t* mem = (uint32_t*) calloc(1, 0x4000); + if (!mem) + return -ENOMEM; + + unsigned int current = 0xFF; + // useless on init but... + nvapi_getpstate(devNum, ¤t); + +#if 0 + // try :p + uint32_t* buf = (uint32_t*) calloc(1, 0x8000); + for (int i=8; i < 0x8000 && buf; i+=4) { + buf[0] = 0x10000 + i; + NV_GPU_PERF_PSTATE_ID pst = NVAPI_GPU_PERF_PSTATE_P0; + ret = NvAPI_DLL_GetPstateClientLimits(phys[devNum], pst, buf); + if (ret != NVAPI_INCOMPATIBLE_STRUCT_VERSION) { + NvAPI_ShortString string; + NvAPI_GetErrorMessage(ret, string); + applog(LOG_BLUE, "struct size is %06x : %s", buf[0], string); + for (int n=0; n < i/32; n++) + applog_hex(&buf[n*(32/4)], 32); + break; + } + } + free(buf); +#endif + +#if 0 + // Unsure of the meaning of these values + NVAPI_GPU_POWER_TOPO topo = { 0 }; + topo.version = NVAPI_GPU_POWER_TOPO_VER; + if ((ret = NvAPI_DLL_ClientPowerTopologyGetStatus(phys[devNum], &topo)) == NVAPI_OK) { + if (topo.count) + applog(LOG_RAW, " GPU TDP is %.1f~%.1f W ?", + (double) topo.entries[0].power/1000, (double) topo.entries[1].power/1000); + + // Ok on 970, not pascal + NV_GPU_PERF_PSTATES20_INFO_V2 pset2 = { 0 }; + pset2.version = NV_GPU_PERF_PSTATES20_INFO_VER2; + pset2.ov.numVoltages = 1; + pset2.ov.voltages[0].voltDelta_uV.value = 3000; // gpu + 3000 uv; + ret = NvAPI_DLL_SetPstates20v2(phys[devNum], &pset2); +#endif + + NV_GPU_PERF_PSTATES20_INFO* info; + NV_INIT_STRUCT_ON(NV_GPU_PERF_PSTATES20_INFO, info, mem); + if ((ret = NvAPI_GPU_GetPstates20(phys[devNum], info)) != NVAPI_OK) { + NvAPI_ShortString string; + NvAPI_GetErrorMessage(ret, string); + if (opt_debug) + applog(LOG_RAW, "NVAPI GetPstates20: %s", string); + return -1; + } + + for (n=0; n < info->numPstates; n++) { + NV_GPU_PSTATE20_CLOCK_ENTRY_V1* clocks = info->pstates[n].clocks; + applog(LOG_RAW, "%sP%d: MEM %4u MHz%s GPU %6.1f MHz%s %4u mV%s \x7F %d/%d", + info->pstates[n].pstateId == current ? ">":" ", (int) info->pstates[n].pstateId, + clocks[1].data.single.freq_kHz/1000, clocks[1].bIsEditable ? "*":" ", + (double) clocks[0].data.single.freq_kHz/1000, clocks[0].bIsEditable ? "*":" ", + info->pstates[n].baseVoltages[0].volt_uV/1000, info->pstates[n].baseVoltages[0].bIsEditable ? "*": " ", + info->pstates[n].baseVoltages[0].voltDelta_uV.valueRange.min/1000, // range if editable + info->pstates[n].baseVoltages[0].voltDelta_uV.valueRange.max/1000); + if (clocks[1].freqDelta_kHz.value || clocks[0].freqDelta_kHz.value) { + applog(LOG_RAW, " OC %+4d MHz %+6.1f MHz", + clocks[1].freqDelta_kHz.value/1000, (double) clocks[0].freqDelta_kHz.value/1000); + } + } + // boost over volting (GTX 9xx only ?) + for (n=0; n < info->ov.numVoltages; n++) { + applog(LOG_RAW, " OV: %u%+d mV%s \x7F %d/%d", + info->ov.voltages[n].volt_uV/1000, info->ov.voltages[n].voltDelta_uV.value/1000, info->ov.voltages[n].bIsEditable ? "*":" ", + info->ov.voltages[n].voltDelta_uV.valueRange.min/1000, info->ov.voltages[n].voltDelta_uV.valueRange.max/1000); + } + + NV_GPU_CLOCK_FREQUENCIES *freqs; + NV_INIT_STRUCT_ON(NV_GPU_CLOCK_FREQUENCIES, freqs, mem); + freqs->ClockType = NV_GPU_CLOCK_FREQUENCIES_BASE_CLOCK; + ret = NvAPI_GPU_GetAllClockFrequencies(phys[devNum], freqs); + applog(LOG_RAW, " MEM %4.0f MHz GPU %6.1f MHz Base Clocks", + (double) freqs->domain[NVAPI_GPU_PUBLIC_CLOCK_MEMORY].frequency / 1000, + (double) freqs->domain[NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS].frequency / 1000); + + freqs->ClockType = NV_GPU_CLOCK_FREQUENCIES_BOOST_CLOCK; + ret = NvAPI_GPU_GetAllClockFrequencies(phys[devNum], freqs); + applog(LOG_RAW, " MEM %4.0f MHz GPU %6.1f MHz Boost Clocks", + (double) freqs->domain[NVAPI_GPU_PUBLIC_CLOCK_MEMORY].frequency / 1000, + (double) freqs->domain[NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS].frequency / 1000); + + freqs->ClockType = NV_GPU_CLOCK_FREQUENCIES_CURRENT_FREQ; + ret = NvAPI_GPU_GetAllClockFrequencies(phys[devNum], freqs); + applog(LOG_RAW, " MEM %4.0f MHz GPU %6.1f MHz >Current", + (double) freqs->domain[NVAPI_GPU_PUBLIC_CLOCK_MEMORY].frequency / 1000, + (double) freqs->domain[NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS].frequency / 1000); + + // Other clock values ?? + NVAPI_GPU_PERF_CLOCKS *pcl; + NV_INIT_STRUCT_ALLOC(NVAPI_GPU_PERF_CLOCKS, pcl); + int numClock=0; ret = NVAPI_OK; + while (ret == NVAPI_OK) { + if ((ret = NvAPI_DLL_GetPerfClocks(phys[devNum], numClock, pcl)) == NVAPI_OK) { + applog(LOG_RAW, " C%d: MEM %4.0f MHz GPU %6.1f MHz [%5.1f/%6.1f]", numClock, + (double) pcl->memFreq1/1000, (double) pcl->gpuFreq1/1000, (double) pcl->gpuFreqMin/1000, (double) pcl->gpuFreqMax/1000); + // ret = NvAPI_DLL_SetPerfClocks(phys[devNum], numClock, pcl); // error + } + numClock++; + } + + // Pascal only + NVAPI_VOLTBOOST_PERCENT *pvb; + NV_INIT_STRUCT_ON(NVAPI_VOLTBOOST_PERCENT, pvb, mem); + if ((ret = NvAPI_DLL_GetCoreVoltageBoostPercent(phys[devNum], pvb)) == NVAPI_OK) { + NVAPI_VOLTAGE_STATUS *pvdom; + NV_INIT_STRUCT_ALLOC(NVAPI_VOLTAGE_STATUS, pvdom); + NvAPI_DLL_GetCurrentVoltage(phys[devNum], pvdom); + if (pvdom && pvdom->value_uV) + applog(LOG_RAW, " GPU Voltage is %u mV %+d%% boost", pvdom->value_uV/1000, pvb->percent); + else if (pvdom) + applog(LOG_RAW, " GPU Voltage is %u mV", pvdom->value_uV/1000); + free(pvdom); + } else { + // Maxwell 9xx + NVAPI_VOLT_STATUS *mvdom, *mvstep; + NV_INIT_STRUCT_ALLOC(NVAPI_VOLT_STATUS, mvdom); + if (mvdom && (ret = NvAPI_DLL_GetVoltageDomainsStatus(phys[devNum], mvdom)) == NVAPI_OK) { + NV_INIT_STRUCT_ALLOC(NVAPI_VOLT_STATUS, mvstep); + NvAPI_DLL_GetVoltageStep(phys[devNum], mvstep); + if (mvdom->value_uV) applog(LOG_RAW, " GPU Voltage is %.1f mV with %.3f mV resolution", + (double) mvdom->value_uV/1000, (double) mvstep->value_uV/1000); + free(mvstep); + } + free(mvdom); + } + + uint32_t plim = nvapi_get_plimit(devNum); + double min_pw = 0, max_pw = 0; // percent + + NVAPI_GPU_POWER_INFO nfo = { 0 }; + nfo.version = NVAPI_GPU_POWER_INFO_VER; + ret = NvAPI_DLL_ClientPowerPoliciesGetInfo(phys[devNum], &nfo); + if (ret == NVAPI_OK && nfo.valid) { + min_pw = (double)nfo.entries[0].min_power / 1000; + max_pw = (double)nfo.entries[0].max_power / 1000; + } + applog(LOG_RAW, " Power limit is set to %u%%, range [%.0f-%.0f%%]", plim, min_pw, max_pw); + +#if 0 + NVAPI_COOLER_SETTINGS *cooler; + NV_INIT_STRUCT_ON(NVAPI_COOLER_SETTINGS, cooler, mem); + ret = NvAPI_DLL_GetCoolerSettings(phys[devNum], 7, cooler); + if (ret == NVAPI_OK) { + applog(LOG_RAW, " Fan level is set to %u%%", cooler->level); // wrong val, seems 1 (auto ?) + NVAPI_COOLER_LEVEL *fan; + NV_INIT_STRUCT_ALLOC(NVAPI_COOLER_LEVEL, fan); + fan->level = 100; + fan->count = 1; + ret = NvAPI_DLL_SetCoolerLevels(phys[devNum], 7, fan); + free(fan); + sleep(10); + ret = NvAPI_DLL_RestoreCoolerSettings(phys[devNum], cooler, 7); + } +#endif + + NV_GPU_THERMAL_SETTINGS *tset; + NV_INIT_STRUCT_ON(NV_GPU_THERMAL_SETTINGS, tset, mem); + + NVAPI_GPU_THERMAL_INFO *tnfo; + NV_INIT_STRUCT_ALLOC(NVAPI_GPU_THERMAL_INFO, tnfo); + NVAPI_GPU_THERMAL_LIMIT *tlim; + NV_INIT_STRUCT_ALLOC(NVAPI_GPU_THERMAL_LIMIT, tlim); + NvAPI_GPU_GetThermalSettings(phys[devNum], 0, tset); + NvAPI_DLL_ClientThermalPoliciesGetInfo(phys[devNum], tnfo); + if ((ret = NvAPI_DLL_ClientThermalPoliciesGetLimit(phys[devNum], tlim)) == NVAPI_OK) { + applog(LOG_RAW, " Thermal limit is set to %u, current Tc %d, range [%u-%u]", + tlim->entries[0].value >> 8, tset->sensor[0].currentTemp, + tnfo->entries[0].min_temp >> 8, tnfo->entries[0].max_temp >> 8); + } + free(tnfo); + free(tlim); + +#if 1 + // Read pascal Clocks Table, Empty on 9xx + //NVAPI_CLOCKS_RANGE* ranges; + //NV_INIT_STRUCT_ON(NVAPI_CLOCKS_RANGE, ranges, mem); + //ret = NvAPI_DLL_GetClockBoostRanges(phys[devNum], ranges); + + NVAPI_CLOCK_MASKS* boost; + NV_INIT_STRUCT_ON(NVAPI_CLOCK_MASKS, boost, mem); + ret = NvAPI_DLL_GetClockBoostMask(phys[devNum], boost); + int gpuClocks = 0, memClocks = 0; + for (n=0; n < 80+23; n++) { + if (boost->clocks[n].memDelta) memClocks++; + if (boost->clocks[n].gpuDelta) gpuClocks++; + } + + // PASCAL GTX ONLY + if (gpuClocks || memClocks) { + NVAPI_CLOCK_TABLE *table; + NV_INIT_STRUCT_ALLOC(NVAPI_CLOCK_TABLE, table); + memcpy(table->mask, boost->mask, 12); + ret = NvAPI_DLL_GetClockBoostTable(phys[devNum], table); + gpuClocks = 0, memClocks = 0; + for (n=0; n < 12; n++) { + if (table->buf0[n] != 0) applog(LOG_RAW, "boost table 0[%u] not empty (%u)", n, table->buf0[n]); + } + for (n=0; n < 80; n++) { + if (table->gpuDeltas[n].freqDelta) { + // note: gpu delta value seems to be x2, not the memory + //applog(LOG_RAW, " Boost gpu clock delta %u set to %d MHz", n, table->gpuDeltas[n].freqDelta/2000); + gpuClocks++; + } + } + for (n=0; n < 23; n++) { + if (table->memFilled[n]) { + //applog(LOG_RAW, " Boost mem clock delta %u set to %d MHz", n, table->memDeltas[n]/1000); + memClocks++; + } + } + for (n=0; n < 1529; n++) { + if (table->buf1[n] != 0) applog(LOG_RAW, "boost table 1[%u] not empty (%u)", n, table->buf1[n]); + } + applog(LOG_RAW, " Boost table contains %d gpu and %d mem levels.", gpuClocks, memClocks); + free(table); + + NVAPI_VFP_CURVE *curve; + NV_INIT_STRUCT_ALLOC(NVAPI_VFP_CURVE, curve); + memcpy(curve->mask, boost->mask, 12); + ret = NvAPI_DLL_GetVFPCurve(phys[devNum], curve); + gpuClocks = 0, memClocks = 0; + for (n=0; n < 80; n++) { + if (curve->gpuEntries[n].freq_kHz || curve->gpuEntries[n].volt_uV) { + // applog(LOG_RAW, "gpu volt table %2u %4u MHz - %6u mV", n, curve->gpuEntries[n].freq_kHz/1000, curve->gpuEntries[n].volt_uV/1000); + gpuClocks++; + } + } + for (n=0; n < 23; n++) { + if (curve->memEntries[n].freq_kHz || curve->memEntries[n].volt_uV) { + // applog(LOG_RAW, "mem volt table %2u %4u MHz - %6u mV", n, curve->memEntries[n].freq_kHz/1000, curve->memEntries[n].volt_uV/1000); + memClocks++; + } + } + for (n=0; n < 1064; n++) { + if (curve->buf1[n] != 0) applog(LOG_RAW, "volt table buf1[%u] not empty (%u)", n, curve->buf1[n]); + } + applog(LOG_RAW, " Volts table contains %d gpu and %d mem levels.", gpuClocks, memClocks); + free(curve); + } + + // Maxwell + else { + NVAPI_VOLTAGES_TABLE* volts; + NV_INIT_STRUCT_ALLOC(NVAPI_VOLTAGES_TABLE, volts); + int entries = 0; + ret = NvAPI_DLL_GetVoltages(phys[devNum], volts); + for (n=0; n < 128; n++) { + if (volts->entries[n].volt_uV) + entries++; + } + applog(LOG_RAW, " Volts table contains %d gpu levels.", entries); + free(volts); + } + + NV_DISPLAY_DRIVER_MEMORY_INFO* meminfo; + NV_INIT_STRUCT_ON(NV_DISPLAY_DRIVER_MEMORY_INFO, meminfo, mem); + meminfo->version = NV_DISPLAY_DRIVER_MEMORY_INFO_VER; + if ((ret = NvAPI_GPU_GetMemoryInfo(phys[devNum], meminfo)) == NVAPI_OK) { + applog(LOG_RAW, " Memory: %u MB, %.1f used", meminfo->dedicatedVideoMemory/1024, + (double) (meminfo->availableDedicatedVideoMemory - meminfo->curAvailableDedicatedVideoMemory)/1024); + } +#if 0 /* some undetermined stats */ + NVAPI_GPU_PERF_INFO pi = { 0 }; + pi.version = NVAPI_GPU_PERF_INFO_VER; + ret = NvAPI_DLL_PerfPoliciesGetInfo(phys[devNum], &pi); + + NVAPI_GPU_PERF_STATUS ps = { 0 }; + ps.version = NVAPI_GPU_PERF_STATUS_VER; + ret = NvAPI_DLL_PerfPoliciesGetStatus(phys[devNum], &ps); + applog(LOG_BLUE, "%llx %lld. %lld. %llx %llx %llx", ps.timeRef, ps.val1, ps.val2, ps.values[0], ps.values[1], ps.values[2]); +#endif + +#endif + free(mem); + return 0; +} + +// workaround for buggy driver 378.49 +unsigned int nvapi_get_gpu_clock(unsigned int devNum) +{ + NvAPI_Status ret = NVAPI_OK; + unsigned int freq = 0; + NV_GPU_CLOCK_FREQUENCIES *freqs; + NV_INIT_STRUCT_ALLOC(NV_GPU_CLOCK_FREQUENCIES, freqs); + freqs->ClockType = NV_GPU_CLOCK_FREQUENCIES_CURRENT_FREQ; + ret = NvAPI_GPU_GetAllClockFrequencies(phys[devNum], freqs); + if (ret == NVAPI_OK) { + freq = freqs->domain[NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS].frequency / 1000; + } + free(freqs); + return freq; // in MHz +} + uint8_t nvapi_get_plimit(unsigned int devNum) { NvAPI_Status ret = NVAPI_OK; - NVAPI_GPU_POWER_STATUS pol = {0}; + NVAPI_GPU_POWER_STATUS pol = { 0 }; pol.version = NVAPI_GPU_POWER_STATUS_VER; - if((ret = NvAPI_DLL_ClientPowerPoliciesGetStatus(phys[devNum], &pol)) != NVAPI_OK) - { + if ((ret = NvAPI_DLL_ClientPowerPoliciesGetStatus(phys[devNum], &pol)) != NVAPI_OK) { NvAPI_ShortString string; NvAPI_GetErrorMessage(ret, string); - if(opt_debug) + if (opt_debug) applog(LOG_DEBUG, "NVAPI PowerPoliciesGetStatus: %s", string); return 0; } - return (uint8_t)(pol.entries[0].power / 1000); // in percent + return (uint8_t) (pol.entries[0].power / 1000); // in percent } int nvapi_set_plimit(unsigned int devNum, uint16_t percent) @@ -960,39 +1578,208 @@ int nvapi_set_plimit(unsigned int devNum, uint16_t percent) NvAPI_Status ret = NVAPI_OK; uint32_t val = percent * 1000; - NVAPI_GPU_POWER_INFO nfo = {0}; + NVAPI_GPU_POWER_INFO nfo = { 0 }; nfo.version = NVAPI_GPU_POWER_INFO_VER; ret = NvAPI_DLL_ClientPowerPoliciesGetInfo(phys[devNum], &nfo); - if(ret == NVAPI_OK) - { - if(val == 0) + if (ret == NVAPI_OK) { + if (val == 0) val = nfo.entries[0].def_power; - else if(val < nfo.entries[0].min_power) + else if (val < nfo.entries[0].min_power) val = nfo.entries[0].min_power; - else if(val > nfo.entries[0].max_power) + else if (val > nfo.entries[0].max_power) val = nfo.entries[0].max_power; } - NVAPI_GPU_POWER_STATUS pol = {0}; + NVAPI_GPU_POWER_STATUS pol = { 0 }; pol.version = NVAPI_GPU_POWER_STATUS_VER; pol.flags = 1; pol.entries[0].power = val; - if((ret = NvAPI_DLL_ClientPowerPoliciesSetStatus(phys[devNum], &pol)) != NVAPI_OK) - { + if ((ret = NvAPI_DLL_ClientPowerPoliciesSetStatus(phys[devNum], &pol)) != NVAPI_OK) { NvAPI_ShortString string; NvAPI_GetErrorMessage(ret, string); - if(opt_debug) + if (opt_debug) applog(LOG_DEBUG, "NVAPI PowerPoliciesSetStatus: %s", string); return -1; } return ret; } +int nvapi_set_tlimit(unsigned int devNum, uint8_t limit) +{ + NvAPI_Status ret; + uint32_t val = limit; + + if (devNum >= nvapi_dev_cnt) + return -ENODEV; + + NV_GPU_THERMAL_SETTINGS tset = { 0 }; + NVAPI_GPU_THERMAL_INFO tnfo = { 0 }; + NVAPI_GPU_THERMAL_LIMIT tlim = { 0 }; + tset.version = NV_GPU_THERMAL_SETTINGS_VER; + NvAPI_GPU_GetThermalSettings(phys[devNum], 0, &tset); + tnfo.version = NVAPI_GPU_THERMAL_INFO_VER; + NvAPI_DLL_ClientThermalPoliciesGetInfo(phys[devNum], &tnfo); + tlim.version = NVAPI_GPU_THERMAL_LIMIT_VER; + if ((ret = NvAPI_DLL_ClientThermalPoliciesGetLimit(phys[devNum], &tlim)) == NVAPI_OK) { + tlim.entries[0].value = val << 8; + tlim.flags = 1; + ret = NvAPI_DLL_ClientThermalPoliciesSetLimit(phys[devNum], &tlim); + if (ret == NVAPI_OK) { + applog(LOG_INFO, "GPU #%u: thermal limit set to %u, current Tc %d, range [%u-%u]", + devNum, val, tset.sensor[0].currentTemp, + tnfo.entries[0].min_temp >> 8, tnfo.entries[0].max_temp >> 8); + } else { + NvAPI_ShortString string; + NvAPI_GetErrorMessage(ret, string); + applog(LOG_WARNING, "GPU #%u: thermal limit: %s, valid range is [%u-%u]", devNum, string, + tnfo.entries[0].min_temp >> 8, tnfo.entries[0].max_temp >> 8); + } + } + return (int) ret; +} + +int nvapi_set_gpuclock(unsigned int devNum, uint32_t clock) +{ + NvAPI_Status ret; + NvS32 delta = 0; + + if (devNum >= nvapi_dev_cnt) + return -ENODEV; +#if 0 + // wrong api to get default base clock when modified, cuda props seems fine + NV_GPU_CLOCK_FREQUENCIES freqs = { 0 }; + freqs.version = NV_GPU_CLOCK_FREQUENCIES_VER; + freqs.ClockType = NV_GPU_CLOCK_FREQUENCIES_BASE_CLOCK; + ret = NvAPI_GPU_GetAllClockFrequencies(phys[devNum], &freqs); + if (ret == NVAPI_OK) { + delta = (clock * 1000) - freqs.domain[NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS].frequency; + } + + NV_GPU_PERF_PSTATES_INFO deffreqs = { 0 }; + deffreqs.version = NV_GPU_PERF_PSTATES_INFO_VER; + ret = NvAPI_GPU_GetPstatesInfoEx(phys[devNum], &deffreqs, 0); // we want default clock grr! + if (ret == NVAPI_OK) { + if (deffreqs.pstates[0].clocks[1].domainId == NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS) + delta = (clock * 1000) - deffreqs.pstates[0].clocks[1].freq*2; + } +#endif + + cudaDeviceProp props = { 0 }; + NvU32 busId = 0xFFFF; + ret = NvAPI_GPU_GetBusId(phys[devNum], &busId); + for (int d=0; d < (int) nvapi_dev_cnt; d++) { + // unsure about devNum, so be safe + cudaGetDeviceProperties(&props, d); + if (props.pciBusID == busId) { + delta = (clock * 1000) - props.clockRate; + break; + } + } + + if (delta == (clock * 1000)) + return ret; + + NV_GPU_PERF_PSTATES20_INFO_V1 pset1 = { 0 }; + pset1.version = NV_GPU_PERF_PSTATES20_INFO_VER1; + pset1.numPstates = 1; + pset1.numClocks = 1; + // Ok on both 1080 and 970 + pset1.pstates[0].clocks[0].domainId = NVAPI_GPU_PUBLIC_CLOCK_GRAPHICS; + pset1.pstates[0].clocks[0].freqDelta_kHz.value = delta; + ret = NvAPI_DLL_SetPstates20v1(phys[devNum], &pset1); + if (ret == NVAPI_OK) { + applog(LOG_INFO, "GPU #%u: boost gpu clock set to %u (delta %d)", devNum, clock, delta/1000); + } + return ret; +} + +int nvapi_set_memclock(unsigned int devNum, uint32_t clock) +{ + NvAPI_Status ret; + NvS32 delta = 0; + + if (devNum >= nvapi_dev_cnt) + return -ENODEV; + + // wrong to get default base clock (when modified) on maxwell (same as cuda props one) + NV_GPU_CLOCK_FREQUENCIES freqs = { 0 }; + freqs.version = NV_GPU_CLOCK_FREQUENCIES_VER; + freqs.ClockType = NV_GPU_CLOCK_FREQUENCIES_BASE_CLOCK; + ret = NvAPI_GPU_GetAllClockFrequencies(phys[devNum], &freqs); // wrong base clocks, useless + if (ret == NVAPI_OK) { + delta = (clock * 1000) - freqs.domain[NVAPI_GPU_PUBLIC_CLOCK_MEMORY].frequency; + } + + // seems ok on maxwell and pascal for the mem clocks + NV_GPU_PERF_PSTATES_INFO deffreqs = { 0 }; + deffreqs.version = NV_GPU_PERF_PSTATES_INFO_VER; + ret = NvAPI_GPU_GetPstatesInfoEx(phys[devNum], &deffreqs, 0x1); // deprecated but req for def clocks + if (ret == NVAPI_OK) { + if (deffreqs.pstates[0].clocks[0].domainId == NVAPI_GPU_PUBLIC_CLOCK_MEMORY) + delta = (clock * 1000) - deffreqs.pstates[0].clocks[0].freq; + } + + if (delta == (clock * 1000)) + return ret; + + // todo: bounds check with GetPstates20 + + NV_GPU_PERF_PSTATES20_INFO_V1 pset1 = { 0 }; + pset1.version = NV_GPU_PERF_PSTATES20_INFO_VER1; + pset1.numPstates = 1; + pset1.numClocks = 1; + pset1.pstates[0].clocks[0].domainId = NVAPI_GPU_PUBLIC_CLOCK_MEMORY; + pset1.pstates[0].clocks[0].freqDelta_kHz.value = delta; + ret = NvAPI_DLL_SetPstates20v1(phys[devNum], &pset1); + if (ret == NVAPI_OK) { + applog(LOG_INFO, "GPU #%u: Boost mem clock set to %u (delta %d)", devNum, clock, delta/1000); + } + return ret; +} + +static int nvapi_set_memoffset(unsigned int devNum, int32_t delta, bool log=true) +{ + NvAPI_Status ret; + NvS32 deltaKHz = delta * 1000; + + if (devNum >= nvapi_dev_cnt) + return -ENODEV; + + // todo: bounds check with GetPstates20 + + NV_GPU_PERF_PSTATES20_INFO_V1 pset1 = { 0 }; + pset1.version = NV_GPU_PERF_PSTATES20_INFO_VER1; + pset1.numPstates = 1; + pset1.numClocks = 1; + pset1.pstates[0].clocks[0].domainId = NVAPI_GPU_PUBLIC_CLOCK_MEMORY; + pset1.pstates[0].clocks[0].freqDelta_kHz.value = deltaKHz; + ret = NvAPI_DLL_SetPstates20v1(phys[devNum], &pset1); + if (ret == NVAPI_OK) { + if (log) applog(LOG_INFO, "GPU #%u: Memory clock offset set to %+d MHz", devNum, deltaKHz / 1000); + need_memclockrst = true; + } + return ret; +} + +// Replacement for WIN32 CUDA 6.5 on pascal +int nvapiMemGetInfo(int dev_id, uint64_t *free, uint64_t *total) +{ + NvAPI_Status ret = NVAPI_OK; + NV_DISPLAY_DRIVER_MEMORY_INFO mem = { 0 }; + mem.version = NV_DISPLAY_DRIVER_MEMORY_INFO_VER; + unsigned int devNum = nvapi_dev_map[dev_id % MAX_GPUS]; + if ((ret = NvAPI_GPU_GetMemoryInfo(phys[devNum], &mem)) == NVAPI_OK) { + *total = (uint64_t) mem.dedicatedVideoMemory;// mem.availableDedicatedVideoMemory; + *free = (uint64_t) mem.curAvailableDedicatedVideoMemory; + } + return (int) ret; +} + int nvapi_init() { int num_gpus = cuda_num_devices(); NvAPI_Status ret = NvAPI_Initialize(); - if (!ret == NVAPI_OK){ + if (ret != NVAPI_OK) { NvAPI_ShortString string; NvAPI_GetErrorMessage(ret, string); if (opt_debug) @@ -1038,11 +1825,12 @@ int nvapi_init() applog(LOG_DEBUG, "NVAPI NvAPI_GPU_GetFullName: %s", string); } } - #if 0 - NvAPI_ShortString ver; - NvAPI_GetInterfaceVersionString(ver); - applog(LOG_DEBUG, "NVAPI Version: %s", ver); + if (opt_debug) { + NvAPI_ShortString ver; + NvAPI_GetInterfaceVersionString(ver); + applog(LOG_DEBUG, "%s", ver); + } #endif NvU32 udv; @@ -1054,7 +1842,92 @@ int nvapi_init() return 0; } -#endif + +int nvapi_init_settings() +{ + // nvapi.dll + int ret = nvapi_dll_init(); + if (ret != NVAPI_OK) + return ret; + + if (!opt_n_threads) { + opt_n_threads = active_gpus; + } + + for (int n=0; n < opt_n_threads; n++) { + int dev_id = device_map[n % MAX_GPUS]; + if (device_plimit[dev_id] && !nvml_plimit_set) { + if (nvapi_set_plimit(nvapi_dev_map[dev_id], device_plimit[dev_id]) == NVAPI_OK) { + uint32_t res = nvapi_get_plimit(nvapi_dev_map[dev_id]); + gpulog(LOG_INFO, n, "Power limit is set to %u%%", res); + } + } + if (device_tlimit[dev_id]) { + nvapi_set_tlimit(nvapi_dev_map[dev_id], device_tlimit[dev_id]); + } + if (device_gpu_clocks[dev_id]) { + ret = nvapi_set_gpuclock(nvapi_dev_map[dev_id], device_gpu_clocks[dev_id]); + if (ret) { + NvAPI_ShortString string; + NvAPI_GetErrorMessage((NvAPI_Status) ret, string); + gpulog(LOG_WARNING, n, "nvapi_set_gpuclock %s", string); + } + } + if (device_mem_offsets[dev_id]) { + ret = nvapi_set_memoffset(nvapi_dev_map[dev_id], device_mem_offsets[dev_id]); + if (ret) { + NvAPI_ShortString string; + NvAPI_GetErrorMessage((NvAPI_Status)ret, string); + gpulog(LOG_WARNING, n, "nvapi_set_memoffset %s", string); + } + } + else if (device_mem_clocks[dev_id]) { + ret = nvapi_set_memclock(nvapi_dev_map[dev_id], device_mem_clocks[dev_id]); + if (ret) { + NvAPI_ShortString string; + NvAPI_GetErrorMessage((NvAPI_Status) ret, string); + gpulog(LOG_WARNING, n, "nvapi_set_memclock %s", string); + } + } + if (device_pstate[dev_id]) { + // dunno how via nvapi or/and pascal + } + if (device_led[dev_id] != -1) { + int err = nvapi_set_led(nvapi_dev_map[dev_id], device_led[dev_id], device_name[dev_id]); + if (err != 0) { + gpulog(LOG_WARNING, n, "Unable to set led value (err %d)", err); + } + device_led_state[dev_id] = device_led[dev_id]; + } + } + + return ret; +} + +void nvapi_toggle_clocks(int thr_id, bool enable) +{ + int dev_id = device_map[thr_id % MAX_GPUS]; + if (device_mem_offsets[dev_id]) { + nvapi_set_memoffset(nvapi_dev_map[dev_id], enable ? device_mem_offsets[dev_id] : 0, false); + } +} + +unsigned int nvapi_devnum(int dev_id) +{ + return nvapi_dev_map[dev_id]; +} + +int nvapi_devid(unsigned int devNum) +{ + for (int i=0; i < opt_n_threads; i++) { + int dev_id = device_map[i % MAX_GPUS]; + if (nvapi_dev_map[dev_id] = devNum) + return dev_id; + } + return 0; +} + +#endif /* WIN32 : Windows specific (nvapi) */ /* api functions -------------------------------------- */ @@ -1144,13 +2017,11 @@ unsigned int gpu_power(struct cgpu_info *gpu) { unsigned int mw = 0; int support = -1; - if(hnvml) - { + if (hnvml) { support = nvml_get_power_usage(hnvml, gpu->gpu_id, &mw); } #ifdef WIN32 - if(support == -1) - { + if (support == -1) { unsigned int pct = 0; nvapi_getusage(nvapi_dev_map[gpu->gpu_id], &pct); pct *= nvapi_get_plimit(nvapi_dev_map[gpu->gpu_id]); @@ -1158,94 +2029,24 @@ unsigned int gpu_power(struct cgpu_info *gpu) mw = pct; // to fix } #endif - if(gpu->gpu_power > 0) - { + if (gpu->gpu_power > 0) { // average mw = (gpu->gpu_power + mw) / 2; } return mw; } -int nvml_set_plimit(nvml_handle *nvmlh, int dev_id) -{ - nvmlReturn_t rc = NVML_ERROR_UNKNOWN; - uint32_t gpu_clk = 0, mem_clk = 0; - int n = nvmlh->cuda_nvml_device_id[dev_id]; - if(n < 0 || n >= nvmlh->nvml_gpucount) - return -ENODEV; - - if(!device_plimit[dev_id]) - return 0; // nothing to do - - if(!nvmlh->nvmlDeviceSetPowerManagementLimit) - return -ENOSYS; - - uint32_t plimit = device_plimit[dev_id] * 1000; - uint32_t pmin = 1000, pmax = 0, prev_limit = 0; - if(nvmlh->nvmlDeviceGetPowerManagementLimitConstraints) - rc = nvmlh->nvmlDeviceGetPowerManagementLimitConstraints(nvmlh->devs[n], &pmin, &pmax); - - if(rc != NVML_SUCCESS) - { - if(!nvmlh->nvmlDeviceGetPowerManagementLimit) - return -ENOSYS; - } - nvmlh->nvmlDeviceGetPowerManagementLimit(nvmlh->devs[n], &prev_limit); - if(!pmax) pmax = prev_limit; - - plimit = min(plimit, pmax); - plimit = max(plimit, pmin); - rc = nvmlh->nvmlDeviceSetPowerManagementLimit(nvmlh->devs[n], plimit); - if(rc != NVML_SUCCESS) - { -#ifndef WIN32 - applog(LOG_WARNING, "GPU #%d: plimit %s", dev_id, nvmlh->nvmlErrorString(rc)); -#endif - return -1; - } - else - { - device_plimit[dev_id] = plimit / 1000; - nvml_plimit_set = true; - } - - if(!opt_quiet) - { - applog(LOG_INFO, "GPU #%d: power limit set to %uW (allowed range is %u-%u)", - dev_id, plimit / 1000U, pmin / 1000U, pmax / 1000U); - } - - limit_prev[dev_id] = prev_limit; - return 1; -} - -uint32_t nvml_get_plimit(nvml_handle *nvmlh, int dev_id) -{ - uint32_t plimit = 0; - int n = nvmlh ? nvmlh->cuda_nvml_device_id[dev_id] : -1; - if(n < 0 || n >= nvmlh->nvml_gpucount) - return 0; - - if(nvmlh->nvmlDeviceGetPowerManagementLimit) - { - nvmlh->nvmlDeviceGetPowerManagementLimit(nvmlh->devs[n], &plimit); - } - return plimit; -} - unsigned int gpu_plimit(struct cgpu_info *gpu) { unsigned int mw = 0; int support = -1; - if(hnvml) - { + if (hnvml) { mw = nvml_get_plimit(hnvml, gpu->gpu_id); support = (mw > 0); } #ifdef WIN32 // NVAPI value is in % (< 100 so) - if(support == -1) - { + if (support == -1) { mw = nvapi_get_plimit(nvapi_dev_map[gpu->gpu_id]); } #endif @@ -1259,18 +2060,21 @@ static int translate_vendor_id(uint16_t vid, char *vendorname) const char *name; } vendors[] = { { 0x1043, "ASUS" }, + { 0x1048, "Elsa" }, { 0x107D, "Leadtek" }, { 0x10B0, "Gainward" }, // { 0x10DE, "NVIDIA" }, { 0x1458, "Gigabyte" }, { 0x1462, "MSI" }, - { 0x154B, "PNY" }, + { 0x154B, "PNY" }, // maybe storage devices + { 0x1569, "Palit" }, { 0x1682, "XFX" }, { 0x196D, "Club3D" }, + { 0x196E, "PNY" }, { 0x19DA, "Zotac" }, { 0x19F1, "BFG" }, { 0x1ACC, "PoV" }, - { 0x1B4C, "KFA2" }, + { 0x1B4C, "Galax" }, // KFA2 in EU, to check on Pascal cards { 0x3842, "EVGA" }, { 0x7377, "Colorful" }, { 0, "" } @@ -1290,52 +2094,8 @@ static int translate_vendor_id(uint16_t vid, char *vendorname) return 0; } -#ifdef HAVE_PCIDEV -extern "C" { -#include -} -static int linux_gpu_vendor(uint8_t pci_bus_id, char* vendorname, uint16_t &pid) -{ - uint16_t subvendor = 0; - struct pci_access *pci; - struct pci_dev *dev; - uint16_t subdevice; - - if (!vendorname) - return -EINVAL; - - pci = pci_alloc(); - if (!pci) - return -ENODEV; - - pci_init(pci); - pci_scan_bus(pci); - - for(dev = pci->devices; dev; dev = dev->next) - { - if (dev->bus == pci_bus_id && dev->vendor_id == 0x10DE) - { - if (!(dev->known_fields & PCI_FILL_CLASS)) - pci_fill_info(dev, PCI_FILL_CLASS); - if (dev->device_class != PCI_CLASS_DISPLAY_VGA) - continue; - subvendor = pci_read_word(dev, PCI_SUBSYSTEM_VENDOR_ID); - subdevice = pci_read_word(dev, PCI_SUBSYSTEM_ID); // model - - translate_vendor_id(subvendor, vendorname); - } - } - pci_cleanup(pci); - return (int) subvendor; -} -#endif - int gpu_vendor(uint8_t pci_bus_id, char *vendorname) { -#ifdef HAVE_PCIDEV - uint16_t pid = 0; - return linux_gpu_vendor(pci_bus_id, vendorname, pid); -#else uint16_t vid = 0, pid = 0; if (hnvml) { // may not be initialized on start... for (int id=0; id < hnvml->nvml_gpucount; id++) { @@ -1355,7 +2115,6 @@ int gpu_vendor(uint8_t pci_bus_id, char *vendorname) #endif } return translate_vendor_id(vid, vendorname); -#endif } int gpu_info(struct cgpu_info *gpu) @@ -1372,13 +2131,7 @@ int gpu_info(struct cgpu_info *gpu) if (hnvml) { gpu->nvml_id = (int8_t) hnvml->cuda_nvml_device_id[id]; -#ifdef HAVE_PCIDEV - gpu->gpu_vid = linux_gpu_vendor(hnvml->nvml_pci_bus_id[id], vendorname, gpu->gpu_pid); - if (!gpu->gpu_vid || !gpu->gpu_pid) - nvml_get_info(hnvml, id, gpu->gpu_vid, gpu->gpu_pid); -#else nvml_get_info(hnvml, id, gpu->gpu_vid, gpu->gpu_pid); -#endif nvml_get_serial(hnvml, id, gpu->gpu_sn, sizeof(gpu->gpu_sn)); nvml_get_bios(hnvml, id, gpu->gpu_desc, sizeof(gpu->gpu_desc)); } @@ -1392,3 +2145,132 @@ int gpu_info(struct cgpu_info *gpu) } #endif /* USE_WRAPNVML */ + +static int rgb_percent(int RGB, int percent) +{ + uint8_t* comp = (uint8_t*) &RGB; + int res = ((percent*comp[2]) / 100) << 16; + res += ((percent*comp[1]) / 100) << 8; + return res + ((percent*comp[0]) / 100); +} + +void gpu_led_on(int dev_id) +{ +#if defined(WIN32) && defined(USE_WRAPNVML) + int value = device_led[dev_id]; + if (device_led_state[dev_id] != value) { + if (nvapi_set_led(nvapi_dev_map[dev_id], value, device_name[dev_id]) == 0) + device_led_state[dev_id] = value; + } +#endif +} + +void gpu_led_percent(int dev_id, int percent) +{ +#if defined(WIN32) && defined(USE_WRAPNVML) + int value = rgb_percent(device_led[dev_id], percent); + if (device_led_state[dev_id] != value) { + if (nvapi_set_led(nvapi_dev_map[dev_id], value, device_name[dev_id]) == 0) + device_led_state[dev_id] = value; + } +#endif +} + +void gpu_led_off(int dev_id) +{ +#if defined(WIN32) && defined(USE_WRAPNVML) + if (device_led_state[dev_id]) { + if (nvapi_set_led(nvapi_dev_map[dev_id], 0, device_name[dev_id]) == 0) + device_led_state[dev_id] = 0; + } +#endif +} + +#ifdef USE_WRAPNVML +extern double thr_hashrates[MAX_GPUS]; +extern bool opt_debug_threads; +extern bool opt_hwmonitor; +extern int num_cpus; + +void *monitor_thread(void *userdata) +{ + int thr_id = -1; + + while (!abort_flag && !opt_quiet) + { + // This thread monitors card's power lazily during scans, one at a time... + thr_id = (thr_id + 1) % opt_n_threads; + struct cgpu_info *cgpu = &thr_info[thr_id].gpu; + int dev_id = cgpu->gpu_id; cudaSetDevice(dev_id); + + if (hnvml != NULL && cgpu) + { + char khw[32] = { 0 }; + uint64_t clock = 0, mem_clock = 0; + uint32_t fanpercent = 0, power = 0; + double tempC = 0, khs_per_watt = 0; + uint32_t counter = 0; + int max_loops = 1000; + + pthread_cond_wait(&cgpu->monitor.sampling_signal, &cgpu->monitor.lock); + + do { + unsigned int tmp_clock=0, tmp_memclock=0; + nvml_get_current_clocks(dev_id, &tmp_clock, &tmp_memclock); +#ifdef WIN32 + if (tmp_clock < 200) { + // workaround for buggy drivers 378.x (real clock) + tmp_clock = nvapi_get_gpu_clock(nvapi_dev_map[dev_id]); + } +#endif + if (tmp_clock < 200) { + // some older cards only report a base clock with cuda props. + if (cuda_gpu_info(cgpu) == 0) { + tmp_clock = cgpu->gpu_clock/1000; + tmp_memclock = cgpu->gpu_memclock/1000; + } + } + clock += tmp_clock; + mem_clock += tmp_memclock; + tempC += gpu_temp(cgpu); + fanpercent += gpu_fanpercent(cgpu); + power += gpu_power(cgpu); + counter++; + + usleep(50000); + if (abort_flag) goto abort; + + } while (cgpu->monitor.sampling_flag && (--max_loops)); + + cgpu->monitor.gpu_temp = (uint32_t) (tempC/counter); + cgpu->monitor.gpu_fan = fanpercent/counter; + cgpu->monitor.gpu_power = power/counter; + cgpu->monitor.gpu_clock = (uint32_t) (clock/counter); + cgpu->monitor.gpu_memclock = (uint32_t) (mem_clock/counter); + + if (power) { + khs_per_watt = stats_get_speed(thr_id, thr_hashrates[thr_id]); + khs_per_watt = khs_per_watt / ((double)power / counter); + format_hashrate(khs_per_watt * 1000, khw); + if (strlen(khw)) + sprintf(&khw[strlen(khw)-1], "W %uW ", cgpu->monitor.gpu_power / 1000); + } + + if (opt_hwmonitor && (time(NULL) - cgpu->monitor.tm_displayed) > 60) { + gpulog(LOG_INFO, thr_id, "%u MHz %s%uC FAN %u%%", + cgpu->monitor.gpu_clock/*, cgpu->monitor.gpu_memclock*/, + khw, cgpu->monitor.gpu_temp, cgpu->monitor.gpu_fan + ); + cgpu->monitor.tm_displayed = (uint32_t)time(NULL); + } + + pthread_mutex_unlock(&cgpu->monitor.lock); + } + usleep(500); // safety + } +abort: + if (opt_debug_threads) + applog(LOG_DEBUG, "%s() died", __func__); + return NULL; +} +#endif diff --git a/nvml.h b/nvml.h index 71ff20b0..96547f18 100644 --- a/nvml.h +++ b/nvml.h @@ -1,27 +1,32 @@ /* - * A trivial little dlopen()-based wrapper library for the - * NVIDIA NVML library, to allow runtime discovery of NVML on an - * arbitrary system. This is all very hackish and simple-minded, but - * it serves my immediate needs in the short term until NVIDIA provides - * a static NVML wrapper library themselves, hopefully in - * CUDA 6.5 or maybe sometime shortly after. - * - * This trivial code is made available under the "new" 3-clause BSD license, - * and/or any of the GPL licenses you prefer. - * Feel free to use the code and modify as you see fit. - * - * John E. Stone - john.stone@gmail.com - * - */ +* A trivial little dlopen()-based wrapper library for the +* NVIDIA NVML library, to allow runtime discovery of NVML on an +* arbitrary system. This is all very hackish and simple-minded, but +* it serves my immediate needs in the short term until NVIDIA provides +* a static NVML wrapper library themselves, hopefully in +* CUDA 6.5 or maybe sometime shortly after. +* +* This trivial code is made available under the "new" 3-clause BSD license, +* and/or any of the GPL licenses you prefer. +* Feel free to use the code and modify as you see fit. +* +* John E. Stone - john.stone@gmail.com +* +*/ #ifdef USE_WRAPNVML #include "miner.h" +void *monitor_thread(void *userdata); + typedef void * nvmlDevice_t; +#define NVML_DEVICE_PCI_BUS_ID_BUFFER_SIZE 16 + /* our own version of the PCI info struct */ -typedef struct { - char bus_id_str[16]; /* string form of bus info */ +typedef struct +{ + char bus_id_str[NVML_DEVICE_PCI_BUS_ID_BUFFER_SIZE]; unsigned int domain; unsigned int bus; unsigned int device; @@ -33,19 +38,22 @@ typedef struct { unsigned int res3; } nvmlPciInfo_t; -enum nvmlEnableState_t { +enum nvmlEnableState_t +{ NVML_FEATURE_DISABLED = 0, NVML_FEATURE_ENABLED = 1, NVML_FEATURE_UNKNOWN = 2 }; -enum nvmlRestrictedAPI_t { +enum nvmlRestrictedAPI_t +{ NVML_RESTRICTED_API_SET_APPLICATION_CLOCKS = 0, - NVML_RESTRICTED_API_SET_AUTO_BOOSTED_CLOCKS = 1, + NVML_RESTRICTED_API_SET_AUTO_BOOSTED_CLOCKS = 1, // not for GTX cards NVML_RESTRICTED_API_COUNT = 2 }; -enum nvmlReturn_t { +enum nvmlReturn_t +{ NVML_SUCCESS = 0, NVML_ERROR_UNINITIALIZED = 1, NVML_ERROR_INVALID_ARGUMENT = 2, @@ -57,22 +65,45 @@ enum nvmlReturn_t { NVML_ERROR_INSUFFICIENT_POWER = 8, NVML_ERROR_DRIVER_NOT_LOADED = 9, NVML_ERROR_TIMEOUT = 10, + NVML_ERROR_IRQ_ISSUE = 11, + NVML_ERROR_LIBRARY_NOT_FOUND = 12, + NVML_ERROR_FUNCTION_NOT_FOUND = 13, + NVML_ERROR_CORRUPTED_INFOROM = 14, + NVML_ERROR_GPU_IS_LOST = 15, + NVML_ERROR_RESET_REQUIRED = 16, + NVML_ERROR_OPERATING_SYSTEM = 17, + NVML_ERROR_LIB_RM_VERSION_MISMATCH = 18, + NVML_ERROR_IN_USE = 19, NVML_ERROR_UNKNOWN = 999 }; -enum nvmlClockType_t { +enum nvmlClockType_t +{ NVML_CLOCK_GRAPHICS = 0, NVML_CLOCK_SM = 1, - NVML_CLOCK_MEM = 2 + NVML_CLOCK_MEM = 2, + NVML_CLOCK_VIDEO = 3, + NVML_CLOCK_COUNT +}; + +enum nvmlClockId_t +{ + NVML_CLOCK_ID_CURRENT = 0, + NVML_CLOCK_ID_APP_CLOCK_TARGET = 1, + NVML_CLOCK_ID_APP_CLOCK_DEFAULT = 2, + NVML_CLOCK_ID_CUSTOMER_BOOST_MAX = 3, + NVML_CLOCK_ID_COUNT }; -enum nvmlPcieUtilCounter_t { +enum nvmlPcieUtilCounter_t +{ NVML_PCIE_UTIL_TX_BYTES = 0, NVML_PCIE_UTIL_RX_BYTES = 1, NVML_PCIE_UTIL_COUNT }; -enum nvmlValueType_t { +enum nvmlValueType_t +{ NVML_VALUE_TYPE_DOUBLE = 0, NVML_VALUE_TYPE_UNSIGNED_INT = 1, NVML_VALUE_TYPE_UNSIGNED_LONG = 2, @@ -80,77 +111,104 @@ enum nvmlValueType_t { NVML_VALUE_TYPE_COUNT }; +typedef int nvmlGpuTopologyLevel_t; +typedef int nvmlNvLinkCapability_t; +typedef int nvmlNvLinkErrorCounter_t; +typedef int nvmlNvLinkUtilizationControl_t; + #define NVML_DEVICE_SERIAL_BUFFER_SIZE 30 #define NVML_DEVICE_UUID_BUFFER_SIZE 80 #define NVML_DEVICE_VBIOS_VERSION_BUFFER_SIZE 32 /* - * Handle to hold the function pointers for the entry points we need, - * and the shared library itself. - */ -typedef struct { +* Handle to hold the function pointers for the entry points we need, +* and the shared library itself. +*/ +typedef struct +{ void *nvml_dll; int nvml_gpucount; int cuda_gpucount; unsigned int *nvml_pci_domain_id; unsigned int *nvml_pci_bus_id; unsigned int *nvml_pci_device_id; + unsigned int *nvml_pci_vendor_id; unsigned int *nvml_pci_subsys_id; int *nvml_cuda_device_id; /* map NVML dev to CUDA dev */ int *cuda_nvml_device_id; /* map CUDA dev to NVML dev */ nvmlDevice_t *devs; nvmlEnableState_t *app_clocks; - nvmlReturn_t (*nvmlInit)(void); - nvmlReturn_t (*nvmlDeviceGetCount)(int *); - nvmlReturn_t (*nvmlDeviceGetHandleByIndex)(int, nvmlDevice_t *); - nvmlReturn_t (*nvmlDeviceGetAPIRestriction)(nvmlDevice_t, nvmlRestrictedAPI_t, nvmlEnableState_t *); - nvmlReturn_t (*nvmlDeviceSetAPIRestriction)(nvmlDevice_t, nvmlRestrictedAPI_t, nvmlEnableState_t); - nvmlReturn_t (*nvmlDeviceGetDefaultApplicationsClock)(nvmlDevice_t, nvmlClockType_t, unsigned int *); - nvmlReturn_t (*nvmlDeviceGetApplicationsClock)(nvmlDevice_t, nvmlClockType_t, unsigned int *); - nvmlReturn_t (*nvmlDeviceSetApplicationsClocks)(nvmlDevice_t, unsigned int, unsigned int); - nvmlReturn_t (*nvmlDeviceResetApplicationsClocks)(nvmlDevice_t); - nvmlReturn_t (*nvmlDeviceGetSupportedGraphicsClocks)(nvmlDevice_t, uint32_t mem, uint32_t *num, uint32_t *arr); - nvmlReturn_t (*nvmlDeviceGetSupportedMemoryClocks)(nvmlDevice_t, unsigned int *count, unsigned int *clocksMHz); - nvmlReturn_t (*nvmlDeviceGetClockInfo)(nvmlDevice_t, nvmlClockType_t, unsigned int *); - nvmlReturn_t (*nvmlDeviceGetMaxClockInfo)(nvmlDevice_t, nvmlClockType_t, unsigned int *); - nvmlReturn_t (*nvmlDeviceGetPowerManagementDefaultLimit)(nvmlDevice_t, unsigned int *limit); - nvmlReturn_t (*nvmlDeviceGetPowerManagementLimit)(nvmlDevice_t, unsigned int *limit); - nvmlReturn_t (*nvmlDeviceGetPowerManagementLimitConstraints)(nvmlDevice_t, unsigned int *min, unsigned int *max); - nvmlReturn_t (*nvmlDeviceSetPowerManagementLimit)(nvmlDevice_t device, unsigned int limit); - nvmlReturn_t (*nvmlDeviceGetPciInfo)(nvmlDevice_t, nvmlPciInfo_t *); - nvmlReturn_t (*nvmlDeviceGetCurrPcieLinkGeneration)(nvmlDevice_t device, unsigned int *gen); - nvmlReturn_t (*nvmlDeviceGetCurrPcieLinkWidth)(nvmlDevice_t device, unsigned int *width); - nvmlReturn_t (*nvmlDeviceGetMaxPcieLinkGeneration)(nvmlDevice_t device, unsigned int *gen); - nvmlReturn_t (*nvmlDeviceGetMaxPcieLinkWidth)(nvmlDevice_t device, unsigned int *width); - nvmlReturn_t (*nvmlDeviceGetName)(nvmlDevice_t, char *, int); - nvmlReturn_t (*nvmlDeviceGetTemperature)(nvmlDevice_t, int, unsigned int *); - nvmlReturn_t (*nvmlDeviceGetFanSpeed)(nvmlDevice_t, unsigned int *); - nvmlReturn_t (*nvmlDeviceGetPerformanceState)(nvmlDevice_t, int *); /* enum */ - nvmlReturn_t (*nvmlDeviceGetPowerUsage)(nvmlDevice_t, unsigned int *); - nvmlReturn_t (*nvmlDeviceGetSerial)(nvmlDevice_t, char *serial, unsigned int len); - nvmlReturn_t (*nvmlDeviceGetUUID)(nvmlDevice_t, char *uuid, unsigned int len); - nvmlReturn_t (*nvmlDeviceGetVbiosVersion)(nvmlDevice_t, char *version, unsigned int len); - nvmlReturn_t (*nvmlSystemGetDriverVersion)(char *version, unsigned int len); + nvmlReturn_t(*nvmlInit)(void); + nvmlReturn_t(*nvmlDeviceGetCount)(int *); + nvmlReturn_t(*nvmlDeviceGetHandleByIndex)(int, nvmlDevice_t *); + nvmlReturn_t(*nvmlDeviceGetAPIRestriction)(nvmlDevice_t, nvmlRestrictedAPI_t, nvmlEnableState_t *); + nvmlReturn_t(*nvmlDeviceSetAPIRestriction)(nvmlDevice_t, nvmlRestrictedAPI_t, nvmlEnableState_t); + nvmlReturn_t(*nvmlDeviceGetDefaultApplicationsClock)(nvmlDevice_t, nvmlClockType_t, unsigned int *); + nvmlReturn_t(*nvmlDeviceGetApplicationsClock)(nvmlDevice_t, nvmlClockType_t, unsigned int *); + nvmlReturn_t(*nvmlDeviceSetApplicationsClocks)(nvmlDevice_t, unsigned int, unsigned int); + nvmlReturn_t(*nvmlDeviceResetApplicationsClocks)(nvmlDevice_t); + nvmlReturn_t(*nvmlDeviceGetSupportedGraphicsClocks)(nvmlDevice_t, uint32_t mem, uint32_t *num, uint32_t *arr); + nvmlReturn_t(*nvmlDeviceGetSupportedMemoryClocks)(nvmlDevice_t, unsigned int *count, unsigned int *clocksMHz); + nvmlReturn_t(*nvmlDeviceGetClockInfo)(nvmlDevice_t, nvmlClockType_t, unsigned int *); + nvmlReturn_t(*nvmlDeviceGetMaxClockInfo)(nvmlDevice_t, nvmlClockType_t, unsigned int *); + nvmlReturn_t(*nvmlDeviceGetPowerManagementDefaultLimit)(nvmlDevice_t, unsigned int *limit); + nvmlReturn_t(*nvmlDeviceGetPowerManagementLimit)(nvmlDevice_t, unsigned int *limit); + nvmlReturn_t(*nvmlDeviceGetPowerManagementLimitConstraints)(nvmlDevice_t, unsigned int *min, unsigned int *max); + nvmlReturn_t(*nvmlDeviceSetPowerManagementLimit)(nvmlDevice_t device, unsigned int limit); + nvmlReturn_t(*nvmlDeviceGetPciInfo)(nvmlDevice_t, nvmlPciInfo_t *); + nvmlReturn_t(*nvmlDeviceGetCurrPcieLinkGeneration)(nvmlDevice_t device, unsigned int *gen); + nvmlReturn_t(*nvmlDeviceGetCurrPcieLinkWidth)(nvmlDevice_t device, unsigned int *width); + nvmlReturn_t(*nvmlDeviceGetMaxPcieLinkGeneration)(nvmlDevice_t device, unsigned int *gen); + nvmlReturn_t(*nvmlDeviceGetMaxPcieLinkWidth)(nvmlDevice_t device, unsigned int *width); + nvmlReturn_t(*nvmlDeviceGetName)(nvmlDevice_t, char *, int); + nvmlReturn_t(*nvmlDeviceGetTemperature)(nvmlDevice_t, int, unsigned int *); + nvmlReturn_t(*nvmlDeviceGetFanSpeed)(nvmlDevice_t, unsigned int *); + nvmlReturn_t(*nvmlDeviceGetPerformanceState)(nvmlDevice_t, int *); /* enum */ + nvmlReturn_t(*nvmlDeviceGetPowerUsage)(nvmlDevice_t, unsigned int *); + nvmlReturn_t(*nvmlDeviceGetSerial)(nvmlDevice_t, char *serial, unsigned int len); + nvmlReturn_t(*nvmlDeviceGetUUID)(nvmlDevice_t, char *uuid, unsigned int len); + nvmlReturn_t(*nvmlDeviceGetVbiosVersion)(nvmlDevice_t, char *version, unsigned int len); + nvmlReturn_t(*nvmlSystemGetDriverVersion)(char *version, unsigned int len); char* (*nvmlErrorString)(nvmlReturn_t); - nvmlReturn_t (*nvmlShutdown)(void); + nvmlReturn_t(*nvmlShutdown)(void); // v331 - nvmlReturn_t (*nvmlDeviceGetEnforcedPowerLimit)(nvmlDevice_t, unsigned int *limit); + nvmlReturn_t(*nvmlDeviceGetEnforcedPowerLimit)(nvmlDevice_t, unsigned int *limit); // v340 - //nvmlReturn_t (*nvmlDeviceGetCpuAffinity)(nvmlDevice_t, unsigned int cpuSetSize, unsigned long* cpuSet); - //nvmlReturn_t (*nvmlDeviceSetCpuAffinity)(nvmlDevice_t); - //nvmlReturn_t (*nvmlDeviceGetAutoBoostedClocksEnabled)(nvmlDevice_t, nvmlEnableState_t *isEnabled, nvmlEnableState_t *defaultIsEnabled); - //nvmlReturn_t (*nvmlDeviceSetAutoBoostedClocksEnabled)(nvmlDevice_t, nvmlEnableState_t enabled); +#ifdef __linux__ + nvmlReturn_t(*nvmlDeviceClearCpuAffinity)(nvmlDevice_t); + nvmlReturn_t(*nvmlDeviceGetCpuAffinity)(nvmlDevice_t, unsigned int cpuSetSize, unsigned long* cpuSet); + nvmlReturn_t(*nvmlDeviceSetCpuAffinity)(nvmlDevice_t); +#endif // v346 - nvmlReturn_t (*nvmlDeviceGetPcieThroughput)(nvmlDevice_t, nvmlPcieUtilCounter_t, unsigned int *value); -} nvml_handle; + nvmlReturn_t(*nvmlDeviceGetPcieThroughput)(nvmlDevice_t, nvmlPcieUtilCounter_t, unsigned int *value); + // v36x (API 8) + nvmlReturn_t(*nvmlDeviceGetClock)(nvmlDevice_t, nvmlClockType_t clockType, nvmlClockId_t clockId, unsigned int *clockMHz); +#ifdef __linux__ + nvmlReturn_t(*nvmlSystemGetTopologyGpuSet)(unsigned int cpuNumber, unsigned int *count, nvmlDevice_t *deviceArray); + nvmlReturn_t(*nvmlDeviceGetTopologyNearestGpus)(nvmlDevice_t, nvmlGpuTopologyLevel_t level, unsigned int *count, nvmlDevice_t *deviceArray); + nvmlReturn_t(*nvmlDeviceGetTopologyCommonAncestor)(nvmlDevice_t device1, nvmlDevice_t device2, nvmlGpuTopologyLevel_t *pathInfo); +#endif + nvmlReturn_t(*nvmlDeviceGetNvLinkState)(nvmlDevice_t, unsigned int link, nvmlEnableState_t *isActive); + nvmlReturn_t(*nvmlDeviceGetNvLinkVersion)(nvmlDevice_t, unsigned int link, unsigned int *version); + nvmlReturn_t(*nvmlDeviceGetNvLinkCapability)(nvmlDevice_t, unsigned int link, nvmlNvLinkCapability_t capability, unsigned int *capResult); + nvmlReturn_t(*nvmlDeviceGetNvLinkRemotePciInfo)(nvmlDevice_t, unsigned int link, nvmlPciInfo_t *pci); + nvmlReturn_t(*nvmlDeviceGetNvLinkErrorCounter)(nvmlDevice_t, unsigned int link, nvmlNvLinkErrorCounter_t counter, unsigned long long *counterValue); + nvmlReturn_t(*nvmlDeviceResetNvLinkErrorCounters)(nvmlDevice_t, unsigned int link); + nvmlReturn_t(*nvmlDeviceSetNvLinkUtilizationControl)(nvmlDevice_t, unsigned int link, unsigned int counter, nvmlNvLinkUtilizationControl_t *control, unsigned int reset); + nvmlReturn_t(*nvmlDeviceGetNvLinkUtilizationControl)(nvmlDevice_t, unsigned int link, unsigned int counter, nvmlNvLinkUtilizationControl_t *control); + nvmlReturn_t(*nvmlDeviceGetNvLinkUtilizationCounter)(nvmlDevice_t, unsigned int link, unsigned int counter, unsigned long long *rxcounter, unsigned long long *txcounter); + nvmlReturn_t(*nvmlDeviceFreezeNvLinkUtilizationCounter)(nvmlDevice_t, unsigned int link, unsigned int counter, nvmlEnableState_t freeze); + nvmlReturn_t(*nvmlDeviceResetNvLinkUtilizationCounter)(nvmlDevice_t, unsigned int link, unsigned int counter); +} nvml_handle; nvml_handle * nvml_create(); int nvml_destroy(nvml_handle *nvmlh); -/* - * Query the number of GPUs seen by NVML - */ +// Debug informations +void nvml_print_device_info(int dev_id); + +// Query the number of GPUs seen by NVML int nvml_get_gpucount(nvml_handle *nvmlh, int *gpucount); int nvml_set_plimit(nvml_handle *nvmlh, int dev_id); @@ -165,20 +223,52 @@ unsigned int gpu_fanpercent(struct cgpu_info *gpu); unsigned int gpu_fanrpm(struct cgpu_info *gpu); float gpu_temp(struct cgpu_info *gpu); unsigned int gpu_power(struct cgpu_info *gpu); +unsigned int gpu_plimit(struct cgpu_info *gpu); int gpu_pstate(struct cgpu_info *gpu); int gpu_busid(struct cgpu_info *gpu); -unsigned int gpu_power(struct cgpu_info *gpu); -unsigned int gpu_plimit(struct cgpu_info *gpu); -/* pid/vid, sn and bios rev */ +// pid/vid, sn and bios rev int gpu_info(struct cgpu_info *gpu); -int gpu_vendor(uint8_t pci_bus_id, char *vendorname); +int gpu_vendor(uint8_t pci_bus_id, char *vendorname); /* nvapi functions */ #ifdef WIN32 int nvapi_init(); +int nvapi_init_settings(); + +// to debug nvapi.. +int nvapi_pstateinfo(unsigned int devNum); +uint8_t nvapi_get_plimit(unsigned int devNum); + +// nvapi devNum from dev_id (cuda GPU #N) +unsigned int nvapi_devnum(int dev_id); +int nvapi_devid(unsigned int devNum); + +void nvapi_toggle_clocks(int thr_id, bool enable); + +// cuda Replacement for 6.5 compat int nvapiMemGetInfo(int dev_id, uint64_t *free, uint64_t *total); #endif #endif /* USE_WRAPNVML */ + +void gpu_led_on(int dev_id); +void gpu_led_percent(int dev_id, int percent); +void gpu_led_off(int dev_id); + +#define LED_MODE_OFF 0 +#define LED_MODE_SHARES 1 +#define LED_MODE_MINING 2 + +/* ------ nvidia-settings stuff for linux -------------------- */ + +int nvs_init(); +int nvs_set_clocks(int dev_id); +void nvs_reset_clocks(int dev_id); + +// nvidia-settings (X) devNum from dev_id (cuda GPU #N) +int8_t nvs_devnum(int dev_id); +int nvs_devid(int8_t devNum); + +extern bool need_nvsettings; \ No newline at end of file diff --git a/nvsettings.cpp b/nvsettings.cpp new file mode 100644 index 00000000..5ea32338 --- /dev/null +++ b/nvsettings.cpp @@ -0,0 +1,251 @@ +/** + * nvidia-settings command line interface for linux - tpruvot 2017 + * + * Notes: need X setup and running, with an opened X session. + * init speed could be improved, running multiple threads + */ + +#include +#include +#include +#include +#include +#include +#include // pid_t + +#include "miner.h" +#include "nvml.h" +#include "cuda_runtime.h" + +#ifdef __linux__ + +#define NVS_PATH "/usr/bin/nvidia-settings" + +static int8_t nvs_dev_map[MAX_GPUS] = { 0 }; +static uint8_t nvs_bus_ids[MAX_GPUS] = { 0 }; +static int32_t nvs_clocks_set[MAX_GPUS] = { 0 }; + +extern int32_t device_mem_offsets[MAX_GPUS]; + +#if 0 /* complicated exec way and not better in fine */ +int nvs_query_fork_int(int nvs_id, const char* field) +{ + pid_t pid; + int pipes[2] = { 0 }; + if (pipe(pipes) < 0) + return -1; + + if ((pid = fork()) == -1) { + close(pipes[0]); + close(pipes[1]); + return -1; + } else if (pid == 0) { + char gpu_field[128] = { 0 }; + sprintf(gpu_field, "[gpu:%d]/%s", nvs_id, field); + + dup2(pipes[1], STDOUT_FILENO); + close(pipes[0]); + //close(pipes[1]); + + if (-1 == execl(NVS_PATH, "nvidia-settings", "-q", gpu_field, "-t", NULL)) { + exit(-1); + } + } else { + int intval = -1; + FILE *p = fdopen(pipes[0], "r"); + close(pipes[1]); + if (!p) { + applog(LOG_WARNING, "%s: fdopen(%d) failed", __func__, pipes[0]); + return -1; + } + int rc = fscanf(p, "%d", &intval); // BUS 0000:2a:00.0 is read 42 + if (rc > 0) { + //applog(LOG_BLUE, "%s res=%d", field, intval); + } + fclose(p); + close(pipes[0]); + return intval; + } + return -1; +} +#endif + +int nvs_query_int(int nvs_id, const char* field, int showerr) +{ + FILE *fp; + char command[256] = { 0 }; + sprintf(command, "%s -t -q '[gpu:%d]/%s' 2>&1", NVS_PATH, nvs_id, field); + fp = popen(command, "r"); + if (fp) { + int intval = -1; + if (!showerr) { + int b = fscanf(fp, "%d", &intval); + if (!b) { + pclose(fp); + return -1; + } + } else { + char msg[512] = { 0 }; + char buf[64] = { 0 }; + ssize_t bytes, len=0, maxlen=sizeof(msg)-1; + while ((bytes=fscanf(fp, "%s", buf)) > 0) { + len += snprintf(&msg[len], maxlen-len, "%s ", buf); + if (len >= maxlen) break; + } + if (strstr(msg, "ERROR")) { + char *xtra = strstr(msg, "; please run"); + if (xtra) *xtra = '\0'; // strip noise + applog(LOG_INFO, "%s", msg); + intval = -1; + } else { + sscanf(msg, "%d", &intval); + } + } + pclose(fp); + return intval; + } + return -1; +} + +int nvs_query_str(int nvs_id, const char* field, char* output, size_t maxlen) +{ + FILE *fp; + char command[256] = { 0 }; + *output = '\0'; + sprintf(command, "%s -t -q '[gpu:%d]/%s' 2>&1", NVS_PATH, nvs_id, field); + fp = popen(command, "r"); + if (fp) { + char buf[256] = { 0 }; + ssize_t len=0; + ssize_t bytes=0; + while ((bytes=fscanf(fp, "%s", buf)) > 0) { + //applog(LOG_BLUE, "%d %s %d", nvs_id, buf, (int) bytes); + len += snprintf(&output[len], maxlen-len, "%s ", buf); + if (len >= maxlen) break; + } + pclose(fp); + if (strstr(output, "ERROR")) { + char *xtra = strstr(output, "; please run"); + if (xtra) *xtra = '\0'; // strip noise + applog(LOG_INFO, "%s", output); + *output='\0'; + } + return (int) len; + } + return -1; +} + +int nvs_set_int(int nvs_id, const char* field, int value) +{ + FILE *fp; + char command[256] = { 0 }; + int res = -1; + snprintf(command, 256, "%s -a '[gpu:%d]/%s=%d' 2>&1", NVS_PATH, nvs_id, field, value); + fp = popen(command, "r"); + if (fp) { + char msg[512] = { 0 }; + char buf[64] = { 0 }; + ssize_t bytes, len=0, maxlen=sizeof(msg)-1; + while ((bytes=fscanf(fp, "%s", buf)) > 0) { + len += snprintf(&msg[len], maxlen-len, "%s ", buf); + if (len >= maxlen) break; + } + if (strstr(msg, "ERROR")) { + char *xtra = strstr(msg, "; please run"); + if (xtra) *xtra = '\0'; // strip noise + applog(LOG_INFO, "%s", msg); + } else + res = 0; + pclose(fp); + } + return res; +} + +int8_t nvs_devnum(int dev_id) +{ + return nvs_dev_map[dev_id]; +} + +int nvs_devid(int8_t nvs_id) +{ + for (int i=0; i < opt_n_threads; i++) { + int dev_id = device_map[i % MAX_GPUS]; + if (nvs_dev_map[dev_id] == nvs_id) + return dev_id; + } + return 0; +} + +int nvs_init() +{ + struct stat info; + struct timeval tv_start, tv_end, diff; + int x_devices = 0; + int n_threads = opt_n_threads; + if (stat(NVS_PATH, &info)) + return -ENOENT; + + gettimeofday(&tv_start, NULL); + + for (int d = 0; d < MAX_GPUS; d++) { + // this part can be "slow" (100-200ms per device) + int res = nvs_query_int(d, "PCIBus", 1); + if (res < 0) break; + nvs_bus_ids[d] = 0xFFu & res; + x_devices++; + } + + if (opt_debug) { + gettimeofday(&tv_end, NULL); + timeval_subtract(&diff, &tv_end, &tv_start); + applog(LOG_DEBUG, "nvidia-settings pci bus queries took %.2f ms", + (1000.0 * diff.tv_sec) + (0.001 * diff.tv_usec)); + } + + if (!x_devices) + return -ENODEV; + if (!n_threads) n_threads = cuda_num_devices(); + for (int i = 0; i < n_threads; i++) { + int dev_id = device_map[i % MAX_GPUS]; + cudaDeviceProp props; + if (cudaGetDeviceProperties(&props, dev_id) == cudaSuccess) { + for (int8_t d = 0; d < x_devices; d++) { + if (nvs_bus_ids[d] == (uint8_t) props.pciBusID) { + gpulog(LOG_DEBUG, i, "matches X gpu:%d by busId %u", + (int) d, (uint) nvs_bus_ids[d]); + nvs_dev_map[dev_id] = d; + /* char buf[1024] = { 0 }; + nvs_query_str(d, "GPUCurrentClockFreqsString", buf, sizeof(buf)-1); + gpulog(LOG_DEBUG, d, "%s", buf); */ + break; + } + } + } + } + return 0; +} + +int nvs_set_clocks(int dev_id) +{ + int res; + int8_t d = nvs_devnum(dev_id); + if (d < 0) return -ENODEV; + if (!device_mem_offsets[dev_id] || nvs_clocks_set[d]) return 0; + res = nvs_set_int(d, "GPUMemoryTransferRateOffsetAllPerformanceLevels", device_mem_offsets[dev_id]*2); + if (res) nvs_clocks_set[d] = device_mem_offsets[dev_id]*2; + return res; +} + +void nvs_reset_clocks(int dev_id) +{ + int8_t d = nvs_devnum(dev_id); + if (d < 0 || !nvs_clocks_set[d]) return; + nvs_set_int(d, "GPUMemoryTransferRateOffsetAllPerformanceLevels", 0); + nvs_clocks_set[d] = 0; +} + +#else +int nvs_init() { return -ENOSYS; } +int nvs_set_clocks(int dev_id) { return -ENOSYS; } +void nvs_reset_clocks(int dev_id) { } +#endif diff --git a/util.cpp b/util.cpp index 37caf690..5b0d1a1a 100644 --- a/util.cpp +++ b/util.cpp @@ -173,6 +173,40 @@ void applog(int prio, const char *fmt, ...) va_end(ap); } +extern int gpu_threads; +// Use different prefix if multiple cpu threads per gpu +// Also, auto hide LOG_DEBUG if --debug (-D) is not used +void gpulog(int prio, int thr_id, const char *fmt, ...) +{ + char _ALIGN(128) pfmt[128]; + char _ALIGN(128) line[256]; + int len, dev_id = device_map[thr_id % MAX_GPUS]; + va_list ap; + + if(prio == LOG_DEBUG && !opt_debug) + return; + + if(gpu_threads > 1) + len = snprintf(pfmt, 128, "GPU T%d: %s", thr_id, fmt); + else + len = snprintf(pfmt, 128, "GPU #%d: %s", dev_id, fmt); + pfmt[sizeof(pfmt) - 1] = '\0'; + + va_start(ap, fmt); + + if(len && vsnprintf(line, sizeof(line), pfmt, ap)) + { + line[sizeof(line) - 1] = '\0'; + applog(prio, "%s", line); + } + else + { + fprintf(stderr, "%s OOM!\n", __func__); + } + + va_end(ap); +} + void format_hashrate(double hashrate, char *output) { char prefix = '\0';